From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by arlo.cworth.org (Postfix) with ESMTP id 45C376DE11B5 for ; Tue, 19 Dec 2017 06:15:50 -0800 (PST) X-Virus-Scanned: Debian amavisd-new at cworth.org X-Spam-Flag: NO X-Spam-Score: 0 X-Spam-Level: X-Spam-Status: No, score=0 tagged_above=-999 required=5 tests=[AWL=0.011, SPF_PASS=-0.001, T_RP_MATCHES_RCVD=-0.01] autolearn=disabled Received: from arlo.cworth.org ([127.0.0.1]) by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id 3wvGZzl8AsRF for ; Tue, 19 Dec 2017 06:15:49 -0800 (PST) Received: from fethera.tethera.net (fethera.tethera.net [198.245.60.197]) by arlo.cworth.org (Postfix) with ESMTPS id 49E1F6DE11A3 for ; Tue, 19 Dec 2017 06:15:49 -0800 (PST) Received: from remotemail by fethera.tethera.net with local (Exim 4.89) (envelope-from ) id 1eRIgO-00036c-Ga; Tue, 19 Dec 2017 09:15:48 -0500 Received: (nullmailer pid 21486 invoked by uid 1000); Tue, 19 Dec 2017 14:15:47 -0000 From: David Bremner To: David Bremner , Daniel Kahn Gillmor , notmuch@notmuchmail.org Subject: [PATCH] WIP: add all subjects to value. Date: Tue, 19 Dec 2017 10:15:40 -0400 Message-Id: <20171219141540.21421-2-david@tethera.net> X-Mailer: git-send-email 2.15.1 In-Reply-To: <20171219141540.21421-1-david@tethera.net> References: <87h8ss2390.fsf@tethera.net> <20171219141540.21421-1-david@tethera.net> X-BeenThere: notmuch@notmuchmail.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 19 Dec 2017 14:15:50 -0000 --- lib/add-message.cc | 3 +-- lib/message.cc | 52 ++++++++++++++++++++++++++++++++++++++++------ test/T670-duplicate-mid.sh | 1 - 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/lib/add-message.cc b/lib/add-message.cc index f5fac8be..095a1f37 100644 --- a/lib/add-message.cc +++ b/lib/add-message.cc @@ -538,8 +538,7 @@ notmuch_database_index_file (notmuch_database_t *notmuch, if (ret) goto DONE; - if (is_new || is_ghost) - _notmuch_message_set_header_values (message, date, from, subject); + _notmuch_message_set_header_values (message, date, from, subject); if (!indexopts) { def_indexopts = notmuch_database_get_default_indexopts (notmuch); diff --git a/lib/message.cc b/lib/message.cc index d5db89b6..c624e145 100644 --- a/lib/message.cc +++ b/lib/message.cc @@ -26,6 +26,8 @@ #include +#include + struct _notmuch_message { notmuch_database_t *notmuch; Xapian::docid doc_id; @@ -514,8 +516,14 @@ notmuch_message_get_header (notmuch_message_t *message, const char *header) if (slot != Xapian::BAD_VALUENO) { try { - std::string value = message->doc.get_value (slot); - + std::string raw = message->doc.get_value (slot); + std::string value; + if (slot == NOTMUCH_VALUE_SUBJECT) { + std::istringstream f(raw); + std::getline(f, value); + } else { + value = raw; + } /* If we have NOTMUCH_FEATURE_FROM_SUBJECT_ID_VALUES, then * empty values indicate empty headers. If we don't, then * it could just mean we didn't record the header. */ @@ -655,6 +663,27 @@ _notmuch_message_remove_indexed_terms (notmuch_message_t *message) } return NOTMUCH_PRIVATE_STATUS_SUCCESS; } +/* Remove all values from a document; currently these are + all regenerated during indexing */ + +notmuch_private_status_t +_notmuch_message_remove_values (notmuch_message_t *message) +{ + try { + message->doc.clear_values (); + message->modified = TRUE; + } catch (const Xapian::Error &error) { + notmuch_database_t *notmuch = message->notmuch; + + if (!notmuch->exception_reported) { + _notmuch_database_log(_notmuch_message_database (message), "A Xapian exception occurred creating message: %s\n", + error.get_msg().c_str()); + notmuch->exception_reported = TRUE; + } + return NOTMUCH_PRIVATE_STATUS_XAPIAN_EXCEPTION; + } + return NOTMUCH_PRIVATE_STATUS_SUCCESS; +} /* Return true if p points at "new" or "cur". */ static bool is_maildir (const char *p) @@ -1097,6 +1126,7 @@ _notmuch_message_set_header_values (notmuch_message_t *message, const char *subject) { time_t time_value; + std::string old_subject; /* GMime really doesn't want to see a NULL date, so protect its * sensibilities. */ @@ -1114,7 +1144,13 @@ _notmuch_message_set_header_values (notmuch_message_t *message, message->doc.add_value (NOTMUCH_VALUE_TIMESTAMP, Xapian::sortable_serialise (time_value)); message->doc.add_value (NOTMUCH_VALUE_FROM, from); - message->doc.add_value (NOTMUCH_VALUE_SUBJECT, subject); + + old_subject = message->doc.get_value (NOTMUCH_VALUE_SUBJECT); + if (old_subject.empty()) + message->doc.add_value (NOTMUCH_VALUE_SUBJECT, subject); + else + message->doc.add_value (NOTMUCH_VALUE_SUBJECT, old_subject + "\n" + subject); + message->modified = true; } @@ -1999,6 +2035,12 @@ notmuch_message_reindex (notmuch_message_t *message, goto DONE; } + private_status = _notmuch_message_remove_values (message); + if (private_status) { + ret = COERCE_STATUS(private_status, "error values"); + goto DONE; + } + ret = notmuch_message_remove_all_properties_with_prefix (message, "index."); if (ret) goto DONE; /* XXX TODO: distinguish from other error returns above? */ @@ -2043,9 +2085,7 @@ notmuch_message_reindex (notmuch_message_t *message, thread_id = orig_thread_id; _notmuch_message_add_term (message, "thread", thread_id); - /* Take header values only from first filename */ - if (found == 0) - _notmuch_message_set_header_values (message, date, from, subject); + _notmuch_message_set_header_values (message, date, from, subject); ret = _notmuch_message_index_file (message, indexopts, message_file); diff --git a/test/T670-duplicate-mid.sh b/test/T670-duplicate-mid.sh index bf8cc3a8..cfc5dafb 100755 --- a/test/T670-duplicate-mid.sh +++ b/test/T670-duplicate-mid.sh @@ -48,7 +48,6 @@ notmuch search --output=files subject:'"message 2"' | notmuch_dir_sanitize > OUT test_expect_equal_file EXPECTED OUTPUT test_begin_subtest 'Regexp search for second subject' -test_subtest_known_broken cat <EXPECTED MAIL_DIR/copy0 MAIL_DIR/copy1 -- 2.15.1