unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
From: David Bremner <david@tethera.net>
To: notmuch@notmuchmail.org, notmuch@freelists.org
Subject: [Patch v4 03/12] lib: factor out message-id parsing to separate file.
Date: Fri, 21 Jul 2017 07:58:31 -0300	[thread overview]
Message-ID: <20170721105840.4737-4-david@tethera.net> (raw)
In-Reply-To: <20170721105840.4737-1-david@tethera.net>

This is really pure C string parsing, and doesn't need to be mixed in
with the Xapian/C++ layer. Although not strictly necessary, it also
makes it a bit more natural to call _parse_message_id from multiple
compilation units.
---
 lib/Makefile.local    |   1 +
 lib/add-message.cc    | 108 +-------------------------------------------------
 lib/message-id.c      |  96 ++++++++++++++++++++++++++++++++++++++++++++
 lib/notmuch-private.h |  14 +++++++
 4 files changed, 113 insertions(+), 106 deletions(-)
 create mode 100644 lib/message-id.c

diff --git a/lib/Makefile.local b/lib/Makefile.local
index 9dd68286..0b5c4b08 100644
--- a/lib/Makefile.local
+++ b/lib/Makefile.local
@@ -38,6 +38,7 @@ libnotmuch_c_srcs =		\
 	$(dir)/filenames.c	\
 	$(dir)/string-list.c	\
 	$(dir)/message-file.c	\
+	$(dir)/message-id.c	\
 	$(dir)/messages.c	\
 	$(dir)/sha1.c		\
 	$(dir)/built-with.c	\
diff --git a/lib/add-message.cc b/lib/add-message.cc
index 0f09415e..f09094af 100644
--- a/lib/add-message.cc
+++ b/lib/add-message.cc
@@ -1,109 +1,5 @@
 #include "database-private.h"
 
-/* Advance 'str' past any whitespace or RFC 822 comments. A comment is
- * a (potentially nested) parenthesized sequence with '\' used to
- * escape any character (including parentheses).
- *
- * If the sequence to be skipped continues to the end of the string,
- * then 'str' will be left pointing at the final terminating '\0'
- * character.
- */
-static void
-skip_space_and_comments (const char **str)
-{
-    const char *s;
-
-    s = *str;
-    while (*s && (isspace (*s) || *s == '(')) {
-	while (*s && isspace (*s))
-	    s++;
-	if (*s == '(') {
-	    int nesting = 1;
-	    s++;
-	    while (*s && nesting) {
-		if (*s == '(') {
-		    nesting++;
-		} else if (*s == ')') {
-		    nesting--;
-		} else if (*s == '\\') {
-		    if (*(s+1))
-			s++;
-		}
-		s++;
-	    }
-	}
-    }
-
-    *str = s;
-}
-
-/* Parse an RFC 822 message-id, discarding whitespace, any RFC 822
- * comments, and the '<' and '>' delimiters.
- *
- * If not NULL, then *next will be made to point to the first character
- * not parsed, (possibly pointing to the final '\0' terminator.
- *
- * Returns a newly talloc'ed string belonging to 'ctx'.
- *
- * Returns NULL if there is any error parsing the message-id. */
-static char *
-_parse_message_id (void *ctx, const char *message_id, const char **next)
-{
-    const char *s, *end;
-    char *result;
-
-    if (message_id == NULL || *message_id == '\0')
-	return NULL;
-
-    s = message_id;
-
-    skip_space_and_comments (&s);
-
-    /* Skip any unstructured text as well. */
-    while (*s && *s != '<')
-	s++;
-
-    if (*s == '<') {
-	s++;
-    } else {
-	if (next)
-	    *next = s;
-	return NULL;
-    }
-
-    skip_space_and_comments (&s);
-
-    end = s;
-    while (*end && *end != '>')
-	end++;
-    if (next) {
-	if (*end)
-	    *next = end + 1;
-	else
-	    *next = end;
-    }
-
-    if (end > s && *end == '>')
-	end--;
-    if (end <= s)
-	return NULL;
-
-    result = talloc_strndup (ctx, s, end - s + 1);
-
-    /* Finally, collapse any whitespace that is within the message-id
-     * itself. */
-    {
-	char *r;
-	int len;
-
-	for (r = result, len = strlen (r); *r; r++, len--)
-	    if (*r == ' ' || *r == '\t')
-		memmove (r, r+1, len);
-    }
-
-    return result;
-}
-
 /* Parse a References header value, putting a (talloc'ed under 'ctx')
  * copy of each referenced message-id into 'hash'.
  *
@@ -126,7 +22,7 @@ parse_references (void *ctx,
 	return NULL;
 
     while (*refs) {
-	ref = _parse_message_id (ctx, refs, &refs);
+	ref = _notmuch_message_id_parse (ctx, refs, &refs);
 
 	if (ref && strcmp (ref, message_id)) {
 	    g_hash_table_add (hash, ref);
@@ -619,7 +515,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
      */
     header = _notmuch_message_file_get_header (message_file, "message-id");
     if (header && *header != '\0') {
-	message_id = _parse_message_id (message_file, header, NULL);
+	message_id = _notmuch_message_id_parse (message_file, header, NULL);
 
 	/* So the header value isn't RFC-compliant, but it's
 	 * better than no message-id at all.
diff --git a/lib/message-id.c b/lib/message-id.c
new file mode 100644
index 00000000..d7541d50
--- /dev/null
+++ b/lib/message-id.c
@@ -0,0 +1,96 @@
+#include "notmuch-private.h"
+
+/* Advance 'str' past any whitespace or RFC 822 comments. A comment is
+ * a (potentially nested) parenthesized sequence with '\' used to
+ * escape any character (including parentheses).
+ *
+ * If the sequence to be skipped continues to the end of the string,
+ * then 'str' will be left pointing at the final terminating '\0'
+ * character.
+ */
+static void
+skip_space_and_comments (const char **str)
+{
+    const char *s;
+
+    s = *str;
+    while (*s && (isspace (*s) || *s == '(')) {
+	while (*s && isspace (*s))
+	    s++;
+	if (*s == '(') {
+	    int nesting = 1;
+	    s++;
+	    while (*s && nesting) {
+		if (*s == '(') {
+		    nesting++;
+		} else if (*s == ')') {
+		    nesting--;
+		} else if (*s == '\\') {
+		    if (*(s+1))
+			s++;
+		}
+		s++;
+	    }
+	}
+    }
+
+    *str = s;
+}
+
+char *
+_notmuch_message_id_parse (void *ctx, const char *message_id, const char **next)
+{
+    const char *s, *end;
+    char *result;
+
+    if (message_id == NULL || *message_id == '\0')
+	return NULL;
+
+    s = message_id;
+
+    skip_space_and_comments (&s);
+
+    /* Skip any unstructured text as well. */
+    while (*s && *s != '<')
+	s++;
+
+    if (*s == '<') {
+	s++;
+    } else {
+	if (next)
+	    *next = s;
+	return NULL;
+    }
+
+    skip_space_and_comments (&s);
+
+    end = s;
+    while (*end && *end != '>')
+	end++;
+    if (next) {
+	if (*end)
+	    *next = end + 1;
+	else
+	    *next = end;
+    }
+
+    if (end > s && *end == '>')
+	end--;
+    if (end <= s)
+	return NULL;
+
+    result = talloc_strndup (ctx, s, end - s + 1);
+
+    /* Finally, collapse any whitespace that is within the message-id
+     * itself. */
+    {
+	char *r;
+	int len;
+
+	for (r = result, len = strlen (r); *r; r++, len--)
+	    if (*r == ' ' || *r == '\t')
+		memmove (r, r+1, len);
+    }
+
+    return result;
+}
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 5dfebf5d..9957164d 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -492,6 +492,20 @@ notmuch_status_t
 _notmuch_query_count_documents (notmuch_query_t *query,
 				const char *type,
 				unsigned *count_out);
+/* message-id.c */
+
+/* Parse an RFC 822 message-id, discarding whitespace, any RFC 822
+ * comments, and the '<' and '>' delimiters.
+ *
+ * If not NULL, then *next will be made to point to the first character
+ * not parsed, (possibly pointing to the final '\0' terminator.
+ *
+ * Returns a newly talloc'ed string belonging to 'ctx'.
+ *
+ * Returns NULL if there is any error parsing the message-id. */
+char *
+_notmuch_message_id_parse (void *ctx, const char *message_id, const char **next);
+
 
 /* message.cc */
 
-- 
2.13.2

  parent reply	other threads:[~2017-07-21 10:59 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-21 10:58 v4 of index multiple files per msg-id, add reindex command David Bremner
2017-07-21 10:58 ` [Patch v4 01/12] lib: isolate n_d_add_message and helper functions into own file David Bremner
2017-07-21 10:58 ` [Patch v4 02/12] lib/n_d_add_message: refactor test for new/ghost messages David Bremner
2017-07-21 10:58 ` David Bremner [this message]
2017-07-21 10:58 ` [Patch v4 04/12] lib: refactor notmuch_database_add_message header parsing David Bremner
2017-07-21 10:58 ` [Patch v4 05/12] test: add known broken tests for duplicate message id David Bremner
2017-07-21 10:58 ` [Patch v4 06/12] lib: index message files with duplicate message-ids David Bremner
2017-07-21 10:58 ` [Patch v4 07/12] lib: add notmuch_message_count_files David Bremner
2017-07-21 10:58 ` [Patch v4 08/12] lib: add notmuch_thread_get_total_files David Bremner
2017-07-21 10:58 ` [Patch v4 09/12] cli/search: print total number of files matched in summary output David Bremner
2017-07-21 10:58 ` [Patch v4 10/12] lib: add _notmuch_message_remove_indexed_terms David Bremner
2017-07-21 10:58 ` [Patch v4 11/12] lib: add notmuch_message_reindex David Bremner
2017-07-21 10:58 ` [Patch v4 12/12] add "notmuch reindex" subcommand David Bremner
2017-08-02  2:53 ` v4 of index multiple files per msg-id, add reindex command David Bremner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://notmuchmail.org/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170721105840.4737-4-david@tethera.net \
    --to=david@tethera.net \
    --cc=notmuch@freelists.org \
    --cc=notmuch@notmuchmail.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).