From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by arlo.cworth.org (Postfix) with ESMTP id 81F556DE0BB9 for ; Wed, 10 May 2017 04:39:22 -0700 (PDT) X-Virus-Scanned: Debian amavisd-new at cworth.org X-Spam-Flag: NO X-Spam-Score: -0.005 X-Spam-Level: X-Spam-Status: No, score=-0.005 tagged_above=-999 required=5 tests=[AWL=0.006, SPF_PASS=-0.001, T_RP_MATCHES_RCVD=-0.01] autolearn=disabled Received: from arlo.cworth.org ([127.0.0.1]) by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id T1kQ_SLv6SbH for ; Wed, 10 May 2017 04:39:20 -0700 (PDT) Received: from fethera.tethera.net (fethera.tethera.net [198.245.60.197]) by arlo.cworth.org (Postfix) with ESMTPS id 037426DE0BB6 for ; Wed, 10 May 2017 04:39:19 -0700 (PDT) Received: from remotemail by fethera.tethera.net with local (Exim 4.84_2) (envelope-from ) id 1d8Pwo-0000fR-De; Wed, 10 May 2017 07:38:26 -0400 Received: (nullmailer pid 28573 invoked by uid 1000); Wed, 10 May 2017 11:39:15 -0000 From: David Bremner To: notmuch@freelists.org, notmuch@notmuchmail.org Subject: [PATCH 4/6] lib/index: generalize filter name Date: Wed, 10 May 2017 08:39:08 -0300 Message-Id: <20170510113910.28444-5-david@tethera.net> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20170510113910.28444-1-david@tethera.net> References: <20170510113910.28444-1-david@tethera.net> X-BeenThere: notmuch@notmuchmail.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 10 May 2017 11:39:22 -0000 We can't very well call it uuencode if it is going to filter other things as well. --- lib/index.cc | 92 +++++++++++++++++++++++++++++++----------------------------- 1 file changed, 48 insertions(+), 44 deletions(-) diff --git a/lib/index.cc b/lib/index.cc index 02b35b81..3bb1ac1c 100644 --- a/lib/index.cc +++ b/lib/index.cc @@ -26,8 +26,8 @@ /* Oh, how I wish that gobject didn't require so much noisy boilerplate! * (Though I have at least eliminated some of the stock set...) */ -typedef struct _NotmuchFilterDiscardUuencode NotmuchFilterDiscardUuencode; -typedef struct _NotmuchFilterDiscardUuencodeClass NotmuchFilterDiscardUuencodeClass; +typedef struct _NotmuchFilterDiscardNonTerms NotmuchFilterDiscardNonTerms; +typedef struct _NotmuchFilterDiscardNonTermsClass NotmuchFilterDiscardNonTermsClass; typedef void (*filter_fun) (GMimeFilter *filter, char *in, size_t len, size_t prespace, char **out, size_t *outlen, size_t *outprespace); @@ -41,44 +41,29 @@ typedef struct { } scanner_state_t; /** - * NotmuchFilterDiscardUuencode: + * NotmuchFilterDiscardNonTerms: * * @parent_object: parent #GMimeFilter * @encode: encoding vs decoding * @state: State of the parser * - * A filter to discard uuencoded portions of an email. - * - * A uuencoded portion is identified as beginning with a line - * matching: - * - * begin [0-7][0-7][0-7] .* - * - * After that detection, and beginning with the following line, - * characters will be discarded as long as the first character of each - * line begins with M and subsequent characters on the line are within - * the range of ASCII characters from ' ' to '`'. - * - * This is not a perfect UUencode filter. It's possible to have a - * message that will legitimately match that pattern, (so that some - * legitimate content is discarded). And for most UUencoded files, the - * final line of encoded data (the line not starting with M) will be - * indexed. + * A filter to discard non terms portions of an email, i.e. stuff not + * worth indexing. **/ -struct _NotmuchFilterDiscardUuencode { +struct _NotmuchFilterDiscardNonTerms { GMimeFilter parent_object; GMimeContentType *content_type; filter_fun real_filter; int state; }; -struct _NotmuchFilterDiscardUuencodeClass { +struct _NotmuchFilterDiscardNonTermsClass { GMimeFilterClass parent_class; }; -static GMimeFilter *notmuch_filter_discard_uuencode_new (GMimeContentType *content); +static GMimeFilter *notmuch_filter_discard_non_terms_new (GMimeContentType *content); -static void notmuch_filter_discard_uuencode_finalize (GObject *object); +static void notmuch_filter_discard_non_terms_finalize (GObject *object); static GMimeFilter *filter_copy (GMimeFilter *filter); static void filter_filter (GMimeFilter *filter, char *in, size_t len, size_t prespace, @@ -91,14 +76,14 @@ static void filter_reset (GMimeFilter *filter); static GMimeFilterClass *parent_class = NULL; static void -notmuch_filter_discard_uuencode_class_init (NotmuchFilterDiscardUuencodeClass *klass) +notmuch_filter_discard_non_terms_class_init (NotmuchFilterDiscardNonTermsClass *klass) { GObjectClass *object_class = G_OBJECT_CLASS (klass); GMimeFilterClass *filter_class = GMIME_FILTER_CLASS (klass); parent_class = (GMimeFilterClass *) g_type_class_ref (GMIME_TYPE_FILTER); - object_class->finalize = notmuch_filter_discard_uuencode_finalize; + object_class->finalize = notmuch_filter_discard_non_terms_finalize; filter_class->copy = filter_copy; filter_class->filter = filter_filter; @@ -107,7 +92,7 @@ notmuch_filter_discard_uuencode_class_init (NotmuchFilterDiscardUuencodeClass *k } static void -notmuch_filter_discard_uuencode_finalize (GObject *object) +notmuch_filter_discard_non_terms_finalize (GObject *object) { G_OBJECT_CLASS (parent_class)->finalize (object); } @@ -115,15 +100,15 @@ notmuch_filter_discard_uuencode_finalize (GObject *object) static GMimeFilter * filter_copy (GMimeFilter *gmime_filter) { - NotmuchFilterDiscardUuencode *filter = (NotmuchFilterDiscardUuencode *) gmime_filter; + NotmuchFilterDiscardNonTerms *filter = (NotmuchFilterDiscardNonTerms *) gmime_filter; - return notmuch_filter_discard_uuencode_new (filter->content_type); + return notmuch_filter_discard_non_terms_new (filter->content_type); } static void filter_filter (GMimeFilter *gmime_filter, char *inbuf, size_t inlen, size_t prespace, char **outbuf, size_t *outlen, size_t *outprespace) { - NotmuchFilterDiscardUuencode *filter = (NotmuchFilterDiscardUuencode *) gmime_filter; + NotmuchFilterDiscardNonTerms *filter = (NotmuchFilterDiscardNonTerms *) gmime_filter; (*filter->real_filter)(gmime_filter, inbuf, inlen, prespace, outbuf, outlen, outprespace); } @@ -133,7 +118,7 @@ do_filter (const scanner_state_t states[], GMimeFilter *gmime_filter, char *inbuf, size_t inlen, size_t prespace, char **outbuf, size_t *outlen, size_t *outprespace) { - NotmuchFilterDiscardUuencode *filter = (NotmuchFilterDiscardUuencode *) gmime_filter; + NotmuchFilterDiscardNonTerms *filter = (NotmuchFilterDiscardNonTerms *) gmime_filter; register const char *inptr = inbuf; const char *inend = inbuf + inlen; char *outptr; @@ -167,6 +152,25 @@ do_filter (const scanner_state_t states[], *outbuf = gmime_filter->outbuf; } +/* + * + * A uuencoded portion is identified as beginning with a line + * matching: + * + * begin [0-7][0-7][0-7] .* + * + * After that detection, and beginning with the following line, + * characters will be discarded as long as the first character of each + * line begins with M and subsequent characters on the line are within + * the range of ASCII characters from ' ' to '`'. + * + * This is not a perfect UUencode filter. It's possible to have a + * message that will legitimately match that pattern, (so that some + * legitimate content is discarded). And for most UUencoded files, the + * final line of encoded data (the line not starting with M) will be + * indexed. + */ + static void filter_filter_uuencode (GMimeFilter *gmime_filter, char *inbuf, size_t inlen, size_t prespace, char **outbuf, size_t *outlen, size_t *outprespace) @@ -210,7 +214,7 @@ filter_complete (GMimeFilter *filter, char *inbuf, size_t inlen, size_t prespace static void filter_reset (GMimeFilter *gmime_filter) { - NotmuchFilterDiscardUuencode *filter = (NotmuchFilterDiscardUuencode *) gmime_filter; + NotmuchFilterDiscardNonTerms *filter = (NotmuchFilterDiscardNonTerms *) gmime_filter; filter->state = 0; } @@ -218,32 +222,32 @@ filter_reset (GMimeFilter *gmime_filter) /** * notmuch_filter_discard_uuencode_new: * - * Returns: a new #NotmuchFilterDiscardUuencode filter. + * Returns: a new #NotmuchFilterDiscardNonTerms filter. **/ static GMimeFilter * -notmuch_filter_discard_uuencode_new (GMimeContentType *content_type) +notmuch_filter_discard_non_terms_new (GMimeContentType *content_type) { static GType type = 0; - NotmuchFilterDiscardUuencode *filter; + NotmuchFilterDiscardNonTerms *filter; if (!type) { static const GTypeInfo info = { - sizeof (NotmuchFilterDiscardUuencodeClass), + sizeof (NotmuchFilterDiscardNonTermsClass), NULL, /* base_class_init */ NULL, /* base_class_finalize */ - (GClassInitFunc) notmuch_filter_discard_uuencode_class_init, + (GClassInitFunc) notmuch_filter_discard_non_terms_class_init, NULL, /* class_finalize */ NULL, /* class_data */ - sizeof (NotmuchFilterDiscardUuencode), + sizeof (NotmuchFilterDiscardNonTerms), 0, /* n_preallocs */ NULL, /* instance_init */ NULL /* value_table */ }; - type = g_type_register_static (GMIME_TYPE_FILTER, "NotmuchFilterDiscardUuencode", &info, (GTypeFlags) 0); + type = g_type_register_static (GMIME_TYPE_FILTER, "NotmuchFilterDiscardNonTerms", &info, (GTypeFlags) 0); } - filter = (NotmuchFilterDiscardUuencode *) g_object_newv (type, 0, NULL); + filter = (NotmuchFilterDiscardNonTerms *) g_object_newv (type, 0, NULL); filter->state = 0; filter->content_type = content_type; filter->real_filter = filter_filter_uuencode; @@ -332,7 +336,7 @@ _index_mime_part (notmuch_message_t *message, GMimeObject *part) { GMimeStream *stream, *filter; - GMimeFilter *discard_uuencode_filter; + GMimeFilter *discard_non_terms_filter; GMimeDataWrapper *wrapper; GByteArray *byte_array; GMimeContentDisposition *disposition; @@ -422,10 +426,10 @@ _index_mime_part (notmuch_message_t *message, g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (stream), FALSE); filter = g_mime_stream_filter_new (stream); - discard_uuencode_filter = notmuch_filter_discard_uuencode_new (content_type); + discard_non_terms_filter = notmuch_filter_discard_non_terms_new (content_type); g_mime_stream_filter_add (GMIME_STREAM_FILTER (filter), - discard_uuencode_filter); + discard_non_terms_filter); charset = g_mime_object_get_content_type_parameter (part, "charset"); if (charset) { @@ -447,7 +451,7 @@ _index_mime_part (notmuch_message_t *message, g_object_unref (stream); g_object_unref (filter); - g_object_unref (discard_uuencode_filter); + g_object_unref (discard_non_terms_filter); g_byte_array_append (byte_array, (guint8 *) "\0", 1); body = (char *) g_byte_array_free (byte_array, FALSE); -- 2.11.0