From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by arlo.cworth.org (Postfix) with ESMTP id 2BADF6DE0AF5 for ; Sun, 4 Jun 2017 05:32:56 -0700 (PDT) X-Virus-Scanned: Debian amavisd-new at cworth.org X-Spam-Flag: NO X-Spam-Score: -0.001 X-Spam-Level: X-Spam-Status: No, score=-0.001 tagged_above=-999 required=5 tests=[AWL=0.010, SPF_PASS=-0.001, T_RP_MATCHES_RCVD=-0.01] autolearn=disabled Received: from arlo.cworth.org ([127.0.0.1]) by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id OEFRnkvU-97c for ; Sun, 4 Jun 2017 05:32:55 -0700 (PDT) Received: from fethera.tethera.net (fethera.tethera.net [198.245.60.197]) by arlo.cworth.org (Postfix) with ESMTPS id 267456DE0BA5 for ; Sun, 4 Jun 2017 05:32:47 -0700 (PDT) Received: from remotemail by fethera.tethera.net with local (Exim 4.84_2) (envelope-from ) id 1dHUhA-0004Yv-8a; Sun, 04 Jun 2017 08:31:48 -0400 Received: (nullmailer pid 24527 invoked by uid 1000); Sun, 04 Jun 2017 12:32:38 -0000 From: David Bremner To: notmuch@freelists.org, notmuch@notmuchmail.org Subject: [patch v3 03/12] lib: factor out message-id parsing to separate file. Date: Sun, 4 Jun 2017 09:32:26 -0300 Message-Id: <20170604123235.24466-4-david@tethera.net> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20170604123235.24466-1-david@tethera.net> References: <20170604123235.24466-1-david@tethera.net> X-BeenThere: notmuch@notmuchmail.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 04 Jun 2017 12:32:56 -0000 This is really pure C string parsing, and doesn't need to be mixed in with the Xapian/C++ layer. Although not strictly necessary, it also makes it a bit more natural to call _parse_message_id from multiple compilation units. --- lib/Makefile.local | 1 + lib/add-message.cc | 106 +------------------------------------------------- lib/message-id.c | 96 +++++++++++++++++++++++++++++++++++++++++++++ lib/notmuch-private.h | 14 +++++++ 4 files changed, 112 insertions(+), 105 deletions(-) create mode 100644 lib/message-id.c diff --git a/lib/Makefile.local b/lib/Makefile.local index 9dd68286..0b5c4b08 100644 --- a/lib/Makefile.local +++ b/lib/Makefile.local @@ -38,6 +38,7 @@ libnotmuch_c_srcs = \ $(dir)/filenames.c \ $(dir)/string-list.c \ $(dir)/message-file.c \ + $(dir)/message-id.c \ $(dir)/messages.c \ $(dir)/sha1.c \ $(dir)/built-with.c \ diff --git a/lib/add-message.cc b/lib/add-message.cc index 0f09415e..314016a8 100644 --- a/lib/add-message.cc +++ b/lib/add-message.cc @@ -1,109 +1,5 @@ #include "database-private.h" -/* Advance 'str' past any whitespace or RFC 822 comments. A comment is - * a (potentially nested) parenthesized sequence with '\' used to - * escape any character (including parentheses). - * - * If the sequence to be skipped continues to the end of the string, - * then 'str' will be left pointing at the final terminating '\0' - * character. - */ -static void -skip_space_and_comments (const char **str) -{ - const char *s; - - s = *str; - while (*s && (isspace (*s) || *s == '(')) { - while (*s && isspace (*s)) - s++; - if (*s == '(') { - int nesting = 1; - s++; - while (*s && nesting) { - if (*s == '(') { - nesting++; - } else if (*s == ')') { - nesting--; - } else if (*s == '\\') { - if (*(s+1)) - s++; - } - s++; - } - } - } - - *str = s; -} - -/* Parse an RFC 822 message-id, discarding whitespace, any RFC 822 - * comments, and the '<' and '>' delimiters. - * - * If not NULL, then *next will be made to point to the first character - * not parsed, (possibly pointing to the final '\0' terminator. - * - * Returns a newly talloc'ed string belonging to 'ctx'. - * - * Returns NULL if there is any error parsing the message-id. */ -static char * -_parse_message_id (void *ctx, const char *message_id, const char **next) -{ - const char *s, *end; - char *result; - - if (message_id == NULL || *message_id == '\0') - return NULL; - - s = message_id; - - skip_space_and_comments (&s); - - /* Skip any unstructured text as well. */ - while (*s && *s != '<') - s++; - - if (*s == '<') { - s++; - } else { - if (next) - *next = s; - return NULL; - } - - skip_space_and_comments (&s); - - end = s; - while (*end && *end != '>') - end++; - if (next) { - if (*end) - *next = end + 1; - else - *next = end; - } - - if (end > s && *end == '>') - end--; - if (end <= s) - return NULL; - - result = talloc_strndup (ctx, s, end - s + 1); - - /* Finally, collapse any whitespace that is within the message-id - * itself. */ - { - char *r; - int len; - - for (r = result, len = strlen (r); *r; r++, len--) - if (*r == ' ' || *r == '\t') - memmove (r, r+1, len); - } - - return result; -} - /* Parse a References header value, putting a (talloc'ed under 'ctx') * copy of each referenced message-id into 'hash'. * @@ -126,7 +22,7 @@ parse_references (void *ctx, return NULL; while (*refs) { - ref = _parse_message_id (ctx, refs, &refs); + ref = _notmuch_message_id_parse (ctx, refs, &refs); if (ref && strcmp (ref, message_id)) { g_hash_table_add (hash, ref); diff --git a/lib/message-id.c b/lib/message-id.c new file mode 100644 index 00000000..d7541d50 --- /dev/null +++ b/lib/message-id.c @@ -0,0 +1,96 @@ +#include "notmuch-private.h" + +/* Advance 'str' past any whitespace or RFC 822 comments. A comment is + * a (potentially nested) parenthesized sequence with '\' used to + * escape any character (including parentheses). + * + * If the sequence to be skipped continues to the end of the string, + * then 'str' will be left pointing at the final terminating '\0' + * character. + */ +static void +skip_space_and_comments (const char **str) +{ + const char *s; + + s = *str; + while (*s && (isspace (*s) || *s == '(')) { + while (*s && isspace (*s)) + s++; + if (*s == '(') { + int nesting = 1; + s++; + while (*s && nesting) { + if (*s == '(') { + nesting++; + } else if (*s == ')') { + nesting--; + } else if (*s == '\\') { + if (*(s+1)) + s++; + } + s++; + } + } + } + + *str = s; +} + +char * +_notmuch_message_id_parse (void *ctx, const char *message_id, const char **next) +{ + const char *s, *end; + char *result; + + if (message_id == NULL || *message_id == '\0') + return NULL; + + s = message_id; + + skip_space_and_comments (&s); + + /* Skip any unstructured text as well. */ + while (*s && *s != '<') + s++; + + if (*s == '<') { + s++; + } else { + if (next) + *next = s; + return NULL; + } + + skip_space_and_comments (&s); + + end = s; + while (*end && *end != '>') + end++; + if (next) { + if (*end) + *next = end + 1; + else + *next = end; + } + + if (end > s && *end == '>') + end--; + if (end <= s) + return NULL; + + result = talloc_strndup (ctx, s, end - s + 1); + + /* Finally, collapse any whitespace that is within the message-id + * itself. */ + { + char *r; + int len; + + for (r = result, len = strlen (r); *r; r++, len--) + if (*r == ' ' || *r == '\t') + memmove (r, r+1, len); + } + + return result; +} diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index ac315e4c..dda94d76 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -492,6 +492,20 @@ notmuch_status_t _notmuch_query_count_documents (notmuch_query_t *query, const char *type, unsigned *count_out); +/* message-id.c */ + +/* Parse an RFC 822 message-id, discarding whitespace, any RFC 822 + * comments, and the '<' and '>' delimiters. + * + * If not NULL, then *next will be made to point to the first character + * not parsed, (possibly pointing to the final '\0' terminator. + * + * Returns a newly talloc'ed string belonging to 'ctx'. + * + * Returns NULL if there is any error parsing the message-id. */ +char * +_notmuch_message_id_parse (void *ctx, const char *message_id, const char **next); + /* message.cc */ -- 2.11.0