unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
From: David Bremner <david@tethera.net>
To: David Bremner <david@tethera.net>,
	Daniel Kahn Gillmor <dkg@fifthhorseman.net>,
	notmuch@notmuchmail.org
Subject: [PATCH] WIP: add all subjects to value.
Date: Tue, 19 Dec 2017 10:15:40 -0400	[thread overview]
Message-ID: <20171219141540.21421-2-david@tethera.net> (raw)
In-Reply-To: <20171219141540.21421-1-david@tethera.net>

---
 lib/add-message.cc         |  3 +--
 lib/message.cc             | 52 ++++++++++++++++++++++++++++++++++++++++------
 test/T670-duplicate-mid.sh |  1 -
 3 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/lib/add-message.cc b/lib/add-message.cc
index f5fac8be..095a1f37 100644
--- a/lib/add-message.cc
+++ b/lib/add-message.cc
@@ -538,8 +538,7 @@ notmuch_database_index_file (notmuch_database_t *notmuch,
 	if (ret)
 	    goto DONE;
 
-	if (is_new || is_ghost)
-	    _notmuch_message_set_header_values (message, date, from, subject);
+	_notmuch_message_set_header_values (message, date, from, subject);
 
 	if (!indexopts) {
 	    def_indexopts = notmuch_database_get_default_indexopts (notmuch);
diff --git a/lib/message.cc b/lib/message.cc
index d5db89b6..c624e145 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -26,6 +26,8 @@
 
 #include <gmime/gmime.h>
 
+#include <sstream>
+
 struct _notmuch_message {
     notmuch_database_t *notmuch;
     Xapian::docid doc_id;
@@ -514,8 +516,14 @@ notmuch_message_get_header (notmuch_message_t *message, const char *header)
 
     if (slot != Xapian::BAD_VALUENO) {
 	try {
-	    std::string value = message->doc.get_value (slot);
-
+	    std::string raw = message->doc.get_value (slot);
+	    std::string value;
+	    if (slot == NOTMUCH_VALUE_SUBJECT) {
+		std::istringstream f(raw);
+		std::getline(f, value);
+	    } else {
+		value = raw;
+	    }
 	    /* If we have NOTMUCH_FEATURE_FROM_SUBJECT_ID_VALUES, then
 	     * empty values indicate empty headers.  If we don't, then
 	     * it could just mean we didn't record the header. */
@@ -655,6 +663,27 @@ _notmuch_message_remove_indexed_terms (notmuch_message_t *message)
     }
     return NOTMUCH_PRIVATE_STATUS_SUCCESS;
 }
+/* Remove all values from a document; currently these are
+   all regenerated during indexing */
+
+notmuch_private_status_t
+_notmuch_message_remove_values (notmuch_message_t *message)
+{
+    try {
+	message->doc.clear_values ();
+	message->modified = TRUE;
+    } catch (const Xapian::Error &error) {
+	notmuch_database_t *notmuch = message->notmuch;
+
+	if (!notmuch->exception_reported) {
+	    _notmuch_database_log(_notmuch_message_database (message), "A Xapian exception occurred creating message: %s\n",
+				      error.get_msg().c_str());
+	    notmuch->exception_reported = TRUE;
+	}
+	return NOTMUCH_PRIVATE_STATUS_XAPIAN_EXCEPTION;
+    }
+    return NOTMUCH_PRIVATE_STATUS_SUCCESS;
+}
 
 /* Return true if p points at "new" or "cur". */
 static bool is_maildir (const char *p)
@@ -1097,6 +1126,7 @@ _notmuch_message_set_header_values (notmuch_message_t *message,
 				    const char *subject)
 {
     time_t time_value;
+    std::string old_subject;
 
     /* GMime really doesn't want to see a NULL date, so protect its
      * sensibilities. */
@@ -1114,7 +1144,13 @@ _notmuch_message_set_header_values (notmuch_message_t *message,
     message->doc.add_value (NOTMUCH_VALUE_TIMESTAMP,
 			    Xapian::sortable_serialise (time_value));
     message->doc.add_value (NOTMUCH_VALUE_FROM, from);
-    message->doc.add_value (NOTMUCH_VALUE_SUBJECT, subject);
+
+    old_subject = message->doc.get_value (NOTMUCH_VALUE_SUBJECT);
+    if (old_subject.empty())
+	message->doc.add_value (NOTMUCH_VALUE_SUBJECT, subject);
+    else
+	message->doc.add_value (NOTMUCH_VALUE_SUBJECT, old_subject + "\n" + subject);
+
     message->modified = true;
 }
 
@@ -1999,6 +2035,12 @@ notmuch_message_reindex (notmuch_message_t *message,
 	goto DONE;
     }
 
+    private_status = _notmuch_message_remove_values (message);
+    if (private_status) {
+	ret = COERCE_STATUS(private_status, "error values");
+	goto DONE;
+    }
+
     ret = notmuch_message_remove_all_properties_with_prefix (message, "index.");
     if (ret)
 	goto DONE; /* XXX TODO: distinguish from other error returns above? */
@@ -2043,9 +2085,7 @@ notmuch_message_reindex (notmuch_message_t *message,
 	    thread_id = orig_thread_id;
 
 	_notmuch_message_add_term (message, "thread", thread_id);
-	/* Take header values only from first filename */
-	if (found == 0)
-	    _notmuch_message_set_header_values (message, date, from, subject);
+	_notmuch_message_set_header_values (message, date, from, subject);
 
 	ret = _notmuch_message_index_file (message, indexopts, message_file);
 
diff --git a/test/T670-duplicate-mid.sh b/test/T670-duplicate-mid.sh
index bf8cc3a8..cfc5dafb 100755
--- a/test/T670-duplicate-mid.sh
+++ b/test/T670-duplicate-mid.sh
@@ -48,7 +48,6 @@ notmuch search --output=files subject:'"message 2"' | notmuch_dir_sanitize > OUT
 test_expect_equal_file EXPECTED OUTPUT
 
 test_begin_subtest 'Regexp search for second subject'
-test_subtest_known_broken
 cat <<EOF >EXPECTED
 MAIL_DIR/copy0
 MAIL_DIR/copy1
-- 
2.15.1

  reply	other threads:[~2017-12-19 14:15 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-14 14:03 subjects and duplicated message id's David Bremner
2017-12-14 14:32 ` [PATCH] test: add known broken test for regexp search of second subject David Bremner
2018-05-03 10:52   ` David Bremner
2017-12-14 16:57 ` subjects and duplicated message id's Daniel Kahn Gillmor
2017-12-15  1:23   ` David Bremner
2017-12-19 14:15     ` WIP, all subjects in value slot David Bremner
2017-12-19 14:15       ` David Bremner [this message]
2018-05-04 13:48         ` [PATCH] WIP: add all subjects to value Daniel Kahn Gillmor
2018-05-07  0:54           ` David Bremner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://notmuchmail.org/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171219141540.21421-2-david@tethera.net \
    --to=david@tethera.net \
    --cc=dkg@fifthhorseman.net \
    --cc=notmuch@notmuchmail.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).