From: David Bremner <david@tethera.net>
To: notmuch@notmuchmail.org
Subject: [PATCH 03/10] lib: factor out message-id parsing to separate file.
Date: Fri, 14 Apr 2017 03:14:46 -0000 [thread overview]
Message-ID: <20170414025004.5334-4-david@tethera.net> (raw)
In-Reply-To: <20170414025004.5334-1-david@tethera.net>
This is really pure C string parsing, and doesn't need to be mixed in
with the Xapian/C++ layer. Although not strictly necessary, it also
makes it a bit more natural to call _parse_message_id from multiple
compilation units.
---
lib/Makefile.local | 1 +
lib/add-message.cc | 106 +-------------------------------------------------
lib/message-id.c | 96 +++++++++++++++++++++++++++++++++++++++++++++
lib/notmuch-private.h | 14 +++++++
4 files changed, 112 insertions(+), 105 deletions(-)
create mode 100644 lib/message-id.c
diff --git a/lib/Makefile.local b/lib/Makefile.local
index e29fb081..643199ad 100644
--- a/lib/Makefile.local
+++ b/lib/Makefile.local
@@ -36,6 +36,7 @@ libnotmuch_c_srcs = \
$(dir)/filenames.c \
$(dir)/string-list.c \
$(dir)/message-file.c \
+ $(dir)/message-id.c \
$(dir)/messages.c \
$(dir)/sha1.c \
$(dir)/built-with.c \
diff --git a/lib/add-message.cc b/lib/add-message.cc
index 0f09415e..314016a8 100644
--- a/lib/add-message.cc
+++ b/lib/add-message.cc
@@ -1,109 +1,5 @@
#include "database-private.h"
-/* Advance 'str' past any whitespace or RFC 822 comments. A comment is
- * a (potentially nested) parenthesized sequence with '\' used to
- * escape any character (including parentheses).
- *
- * If the sequence to be skipped continues to the end of the string,
- * then 'str' will be left pointing at the final terminating '\0'
- * character.
- */
-static void
-skip_space_and_comments (const char **str)
-{
- const char *s;
-
- s = *str;
- while (*s && (isspace (*s) || *s == '(')) {
- while (*s && isspace (*s))
- s++;
- if (*s == '(') {
- int nesting = 1;
- s++;
- while (*s && nesting) {
- if (*s == '(') {
- nesting++;
- } else if (*s == ')') {
- nesting--;
- } else if (*s == '\\') {
- if (*(s+1))
- s++;
- }
- s++;
- }
- }
- }
-
- *str = s;
-}
-
-/* Parse an RFC 822 message-id, discarding whitespace, any RFC 822
- * comments, and the '<' and '>' delimiters.
- *
- * If not NULL, then *next will be made to point to the first character
- * not parsed, (possibly pointing to the final '\0' terminator.
- *
- * Returns a newly talloc'ed string belonging to 'ctx'.
- *
- * Returns NULL if there is any error parsing the message-id. */
-static char *
-_parse_message_id (void *ctx, const char *message_id, const char **next)
-{
- const char *s, *end;
- char *result;
-
- if (message_id == NULL || *message_id == '\0')
- return NULL;
-
- s = message_id;
-
- skip_space_and_comments (&s);
-
- /* Skip any unstructured text as well. */
- while (*s && *s != '<')
- s++;
-
- if (*s == '<') {
- s++;
- } else {
- if (next)
- *next = s;
- return NULL;
- }
-
- skip_space_and_comments (&s);
-
- end = s;
- while (*end && *end != '>')
- end++;
- if (next) {
- if (*end)
- *next = end + 1;
- else
- *next = end;
- }
-
- if (end > s && *end == '>')
- end--;
- if (end <= s)
- return NULL;
-
- result = talloc_strndup (ctx, s, end - s + 1);
-
- /* Finally, collapse any whitespace that is within the message-id
- * itself. */
- {
- char *r;
- int len;
-
- for (r = result, len = strlen (r); *r; r++, len--)
- if (*r == ' ' || *r == '\t')
- memmove (r, r+1, len);
- }
-
- return result;
-}
-
/* Parse a References header value, putting a (talloc'ed under 'ctx')
* copy of each referenced message-id into 'hash'.
*
@@ -126,7 +22,7 @@ parse_references (void *ctx,
return NULL;
while (*refs) {
- ref = _parse_message_id (ctx, refs, &refs);
+ ref = _notmuch_message_id_parse (ctx, refs, &refs);
if (ref && strcmp (ref, message_id)) {
g_hash_table_add (hash, ref);
diff --git a/lib/message-id.c b/lib/message-id.c
new file mode 100644
index 00000000..d7541d50
--- /dev/null
+++ b/lib/message-id.c
@@ -0,0 +1,96 @@
+#include "notmuch-private.h"
+
+/* Advance 'str' past any whitespace or RFC 822 comments. A comment is
+ * a (potentially nested) parenthesized sequence with '\' used to
+ * escape any character (including parentheses).
+ *
+ * If the sequence to be skipped continues to the end of the string,
+ * then 'str' will be left pointing at the final terminating '\0'
+ * character.
+ */
+static void
+skip_space_and_comments (const char **str)
+{
+ const char *s;
+
+ s = *str;
+ while (*s && (isspace (*s) || *s == '(')) {
+ while (*s && isspace (*s))
+ s++;
+ if (*s == '(') {
+ int nesting = 1;
+ s++;
+ while (*s && nesting) {
+ if (*s == '(') {
+ nesting++;
+ } else if (*s == ')') {
+ nesting--;
+ } else if (*s == '\\') {
+ if (*(s+1))
+ s++;
+ }
+ s++;
+ }
+ }
+ }
+
+ *str = s;
+}
+
+char *
+_notmuch_message_id_parse (void *ctx, const char *message_id, const char **next)
+{
+ const char *s, *end;
+ char *result;
+
+ if (message_id == NULL || *message_id == '\0')
+ return NULL;
+
+ s = message_id;
+
+ skip_space_and_comments (&s);
+
+ /* Skip any unstructured text as well. */
+ while (*s && *s != '<')
+ s++;
+
+ if (*s == '<') {
+ s++;
+ } else {
+ if (next)
+ *next = s;
+ return NULL;
+ }
+
+ skip_space_and_comments (&s);
+
+ end = s;
+ while (*end && *end != '>')
+ end++;
+ if (next) {
+ if (*end)
+ *next = end + 1;
+ else
+ *next = end;
+ }
+
+ if (end > s && *end == '>')
+ end--;
+ if (end <= s)
+ return NULL;
+
+ result = talloc_strndup (ctx, s, end - s + 1);
+
+ /* Finally, collapse any whitespace that is within the message-id
+ * itself. */
+ {
+ char *r;
+ int len;
+
+ for (r = result, len = strlen (r); *r; r++, len--)
+ if (*r == ' ' || *r == '\t')
+ memmove (r, r+1, len);
+ }
+
+ return result;
+}
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 8587e86c..a1ae4bd5 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -500,6 +500,20 @@ notmuch_status_t
_notmuch_query_count_documents (notmuch_query_t *query,
const char *type,
unsigned *count_out);
+/* message-id.c */
+
+/* Parse an RFC 822 message-id, discarding whitespace, any RFC 822
+ * comments, and the '<' and '>' delimiters.
+ *
+ * If not NULL, then *next will be made to point to the first character
+ * not parsed, (possibly pointing to the final '\0' terminator.
+ *
+ * Returns a newly talloc'ed string belonging to 'ctx'.
+ *
+ * Returns NULL if there is any error parsing the message-id. */
+char *
+_notmuch_message_id_parse (void *ctx, const char *message_id, const char **next);
+
/* message.cc */
--
2.11.0
next prev parent reply other threads:[~2017-04-14 3:14 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-04-14 3:14 index multiple files per message-id, add reindex command David Bremner
2017-04-14 3:14 ` [PATCH 05/10] test: add known broken tests for duplicate message id David Bremner
2017-04-14 3:14 ` [PATCH 08/10] lib: add _notmuch_message_remove_indexed_terms David Bremner
2017-04-14 3:14 ` [PATCH 10/10] add "notmuch reindex" subcommand David Bremner
2017-04-14 3:14 ` [PATCH 04/10] lib: refactor notmuch_database_add_message header parsing David Bremner
2017-04-14 3:14 ` [PATCH 07/10] WIP: Add message count to summary output David Bremner
2017-04-14 3:14 ` [PATCH 01/10] lib: isolate n_d_add_message and helper functions into own file David Bremner
2017-04-14 3:14 ` [PATCH 09/10] lib: add notmuch_message_reindex David Bremner
2017-04-14 3:14 ` [PATCH 06/10] lib: index message files with duplicate message-ids David Bremner
2017-04-14 3:14 ` David Bremner [this message]
2017-04-14 3:14 ` [PATCH 02/10] lib/n_d_add_message: refactor test for new/ghost messages David Bremner
-- strict thread matches above, loose matches on Subject: below --
2017-04-19 1:23 v1.1 index multiple files per message-id, add reindex command David Bremner
2017-04-19 1:23 ` [PATCH 03/10] lib: factor out message-id parsing to separate file David Bremner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://notmuchmail.org/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170414025004.5334-4-david@tethera.net \
--to=david@tethera.net \
--cc=notmuch@notmuchmail.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://yhetil.org/notmuch.git/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).