From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by arlo.cworth.org (Postfix) with ESMTP id 391506DE0AE5 for ; Fri, 21 Jul 2017 03:59:12 -0700 (PDT) X-Virus-Scanned: Debian amavisd-new at cworth.org X-Spam-Flag: NO X-Spam-Score: -0.001 X-Spam-Level: X-Spam-Status: No, score=-0.001 tagged_above=-999 required=5 tests=[AWL=0.010, SPF_PASS=-0.001, T_RP_MATCHES_RCVD=-0.01] autolearn=disabled Received: from arlo.cworth.org ([127.0.0.1]) by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id Ihn7ViCPoM10 for ; Fri, 21 Jul 2017 03:59:11 -0700 (PDT) Received: from fethera.tethera.net (fethera.tethera.net [198.245.60.197]) by arlo.cworth.org (Postfix) with ESMTPS id 718F56DE00C4 for ; Fri, 21 Jul 2017 03:58:58 -0700 (PDT) Received: from remotemail by fethera.tethera.net with local (Exim 4.84_2) (envelope-from ) id 1dYVaq-0002za-Va; Fri, 21 Jul 2017 06:55:37 -0400 Received: (nullmailer pid 4900 invoked by uid 1000); Fri, 21 Jul 2017 10:58:47 -0000 From: David Bremner To: notmuch@notmuchmail.org, notmuch@freelists.org Subject: [Patch v4 03/12] lib: factor out message-id parsing to separate file. Date: Fri, 21 Jul 2017 07:58:31 -0300 Message-Id: <20170721105840.4737-4-david@tethera.net> X-Mailer: git-send-email 2.13.2 In-Reply-To: <20170721105840.4737-1-david@tethera.net> References: <20170721105840.4737-1-david@tethera.net> X-BeenThere: notmuch@notmuchmail.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 21 Jul 2017 10:59:12 -0000 This is really pure C string parsing, and doesn't need to be mixed in with the Xapian/C++ layer. Although not strictly necessary, it also makes it a bit more natural to call _parse_message_id from multiple compilation units. --- lib/Makefile.local | 1 + lib/add-message.cc | 108 +------------------------------------------------- lib/message-id.c | 96 ++++++++++++++++++++++++++++++++++++++++++++ lib/notmuch-private.h | 14 +++++++ 4 files changed, 113 insertions(+), 106 deletions(-) create mode 100644 lib/message-id.c diff --git a/lib/Makefile.local b/lib/Makefile.local index 9dd68286..0b5c4b08 100644 --- a/lib/Makefile.local +++ b/lib/Makefile.local @@ -38,6 +38,7 @@ libnotmuch_c_srcs = \ $(dir)/filenames.c \ $(dir)/string-list.c \ $(dir)/message-file.c \ + $(dir)/message-id.c \ $(dir)/messages.c \ $(dir)/sha1.c \ $(dir)/built-with.c \ diff --git a/lib/add-message.cc b/lib/add-message.cc index 0f09415e..f09094af 100644 --- a/lib/add-message.cc +++ b/lib/add-message.cc @@ -1,109 +1,5 @@ #include "database-private.h" -/* Advance 'str' past any whitespace or RFC 822 comments. A comment is - * a (potentially nested) parenthesized sequence with '\' used to - * escape any character (including parentheses). - * - * If the sequence to be skipped continues to the end of the string, - * then 'str' will be left pointing at the final terminating '\0' - * character. - */ -static void -skip_space_and_comments (const char **str) -{ - const char *s; - - s = *str; - while (*s && (isspace (*s) || *s == '(')) { - while (*s && isspace (*s)) - s++; - if (*s == '(') { - int nesting = 1; - s++; - while (*s && nesting) { - if (*s == '(') { - nesting++; - } else if (*s == ')') { - nesting--; - } else if (*s == '\\') { - if (*(s+1)) - s++; - } - s++; - } - } - } - - *str = s; -} - -/* Parse an RFC 822 message-id, discarding whitespace, any RFC 822 - * comments, and the '<' and '>' delimiters. - * - * If not NULL, then *next will be made to point to the first character - * not parsed, (possibly pointing to the final '\0' terminator. - * - * Returns a newly talloc'ed string belonging to 'ctx'. - * - * Returns NULL if there is any error parsing the message-id. */ -static char * -_parse_message_id (void *ctx, const char *message_id, const char **next) -{ - const char *s, *end; - char *result; - - if (message_id == NULL || *message_id == '\0') - return NULL; - - s = message_id; - - skip_space_and_comments (&s); - - /* Skip any unstructured text as well. */ - while (*s && *s != '<') - s++; - - if (*s == '<') { - s++; - } else { - if (next) - *next = s; - return NULL; - } - - skip_space_and_comments (&s); - - end = s; - while (*end && *end != '>') - end++; - if (next) { - if (*end) - *next = end + 1; - else - *next = end; - } - - if (end > s && *end == '>') - end--; - if (end <= s) - return NULL; - - result = talloc_strndup (ctx, s, end - s + 1); - - /* Finally, collapse any whitespace that is within the message-id - * itself. */ - { - char *r; - int len; - - for (r = result, len = strlen (r); *r; r++, len--) - if (*r == ' ' || *r == '\t') - memmove (r, r+1, len); - } - - return result; -} - /* Parse a References header value, putting a (talloc'ed under 'ctx') * copy of each referenced message-id into 'hash'. * @@ -126,7 +22,7 @@ parse_references (void *ctx, return NULL; while (*refs) { - ref = _parse_message_id (ctx, refs, &refs); + ref = _notmuch_message_id_parse (ctx, refs, &refs); if (ref && strcmp (ref, message_id)) { g_hash_table_add (hash, ref); @@ -619,7 +515,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch, */ header = _notmuch_message_file_get_header (message_file, "message-id"); if (header && *header != '\0') { - message_id = _parse_message_id (message_file, header, NULL); + message_id = _notmuch_message_id_parse (message_file, header, NULL); /* So the header value isn't RFC-compliant, but it's * better than no message-id at all. diff --git a/lib/message-id.c b/lib/message-id.c new file mode 100644 index 00000000..d7541d50 --- /dev/null +++ b/lib/message-id.c @@ -0,0 +1,96 @@ +#include "notmuch-private.h" + +/* Advance 'str' past any whitespace or RFC 822 comments. A comment is + * a (potentially nested) parenthesized sequence with '\' used to + * escape any character (including parentheses). + * + * If the sequence to be skipped continues to the end of the string, + * then 'str' will be left pointing at the final terminating '\0' + * character. + */ +static void +skip_space_and_comments (const char **str) +{ + const char *s; + + s = *str; + while (*s && (isspace (*s) || *s == '(')) { + while (*s && isspace (*s)) + s++; + if (*s == '(') { + int nesting = 1; + s++; + while (*s && nesting) { + if (*s == '(') { + nesting++; + } else if (*s == ')') { + nesting--; + } else if (*s == '\\') { + if (*(s+1)) + s++; + } + s++; + } + } + } + + *str = s; +} + +char * +_notmuch_message_id_parse (void *ctx, const char *message_id, const char **next) +{ + const char *s, *end; + char *result; + + if (message_id == NULL || *message_id == '\0') + return NULL; + + s = message_id; + + skip_space_and_comments (&s); + + /* Skip any unstructured text as well. */ + while (*s && *s != '<') + s++; + + if (*s == '<') { + s++; + } else { + if (next) + *next = s; + return NULL; + } + + skip_space_and_comments (&s); + + end = s; + while (*end && *end != '>') + end++; + if (next) { + if (*end) + *next = end + 1; + else + *next = end; + } + + if (end > s && *end == '>') + end--; + if (end <= s) + return NULL; + + result = talloc_strndup (ctx, s, end - s + 1); + + /* Finally, collapse any whitespace that is within the message-id + * itself. */ + { + char *r; + int len; + + for (r = result, len = strlen (r); *r; r++, len--) + if (*r == ' ' || *r == '\t') + memmove (r, r+1, len); + } + + return result; +} diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index 5dfebf5d..9957164d 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -492,6 +492,20 @@ notmuch_status_t _notmuch_query_count_documents (notmuch_query_t *query, const char *type, unsigned *count_out); +/* message-id.c */ + +/* Parse an RFC 822 message-id, discarding whitespace, any RFC 822 + * comments, and the '<' and '>' delimiters. + * + * If not NULL, then *next will be made to point to the first character + * not parsed, (possibly pointing to the final '\0' terminator. + * + * Returns a newly talloc'ed string belonging to 'ctx'. + * + * Returns NULL if there is any error parsing the message-id. */ +char * +_notmuch_message_id_parse (void *ctx, const char *message_id, const char **next); + /* message.cc */ -- 2.13.2