From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by arlo.cworth.org (Postfix) with ESMTP id C682C6DE0130 for ; Mon, 6 Jun 2016 19:06:20 -0700 (PDT) X-Virus-Scanned: Debian amavisd-new at cworth.org X-Spam-Flag: NO X-Spam-Score: -0.011 X-Spam-Level: X-Spam-Status: No, score=-0.011 tagged_above=-999 required=5 tests=[AWL=-0.000, SPF_PASS=-0.001, T_RP_MATCHES_RCVD=-0.01] autolearn=disabled Received: from arlo.cworth.org ([127.0.0.1]) by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id rI7TzeRsZYMf for ; Mon, 6 Jun 2016 19:06:10 -0700 (PDT) Received: from fethera.tethera.net (fethera.tethera.net [198.245.60.197]) by arlo.cworth.org (Postfix) with ESMTPS id C2C196DE00DA for ; Mon, 6 Jun 2016 19:06:10 -0700 (PDT) Received: from remotemail by fethera.tethera.net with local (Exim 4.84) (envelope-from ) id 1bA6Om-0000AV-CX; Mon, 06 Jun 2016 22:05:44 -0400 Received: (nullmailer pid 7230 invoked by uid 1000); Tue, 07 Jun 2016 02:05:54 -0000 From: David Bremner To: Austin Clements , David Bremner Cc: sfischme@uwaterloo.ca, Gaute Hope , notmuch Subject: [PATCH] WIP: regexp matching in subjects Date: Mon, 6 Jun 2016 23:05:49 -0300 Message-Id: <1465265149-7174-1-git-send-email-david@tethera.net> X-Mailer: git-send-email 2.8.1 In-Reply-To: References: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: notmuch@notmuchmail.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 07 Jun 2016 02:06:20 -0000 the idea is that you can run % notmuch search 'subject:rx:' or % notmuch search subject:"your usual phrase search" This should also work with bindings. --- Here is Austin's "hack", crammed into the field processor framework. I seem to have broken one of the existing subject search tests with my recursive query parsing. I didn't have time to figure out why, yet. lib/Makefile.local | 2 ++ lib/database-private.h | 1 + lib/database.cc | 5 +++ lib/regexp-ps.cc | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/regexp-ps.h | 37 ++++++++++++++++++++ lib/subject-fp.cc | 41 ++++++++++++++++++++++ lib/subject-fp.h | 43 +++++++++++++++++++++++ 7 files changed, 221 insertions(+) create mode 100644 lib/regexp-ps.cc create mode 100644 lib/regexp-ps.h create mode 100644 lib/subject-fp.cc create mode 100644 lib/subject-fp.h diff --git a/lib/Makefile.local b/lib/Makefile.local index beb9635..0e7311f 100644 --- a/lib/Makefile.local +++ b/lib/Makefile.local @@ -51,6 +51,8 @@ libnotmuch_cxx_srcs = \ $(dir)/query.cc \ $(dir)/query-fp.cc \ $(dir)/config.cc \ + $(dir)/regexp-ps.cc \ + $(dir)/subject-fp.cc \ $(dir)/thread.cc libnotmuch_modules := $(libnotmuch_c_srcs:.c=.o) $(libnotmuch_cxx_srcs:.cc=.o) diff --git a/lib/database-private.h b/lib/database-private.h index ca71a92..5de0b81 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -186,6 +186,7 @@ struct _notmuch_database { #if HAVE_XAPIAN_FIELD_PROCESSOR Xapian::FieldProcessor *date_field_processor; Xapian::FieldProcessor *query_field_processor; + Xapian::FieldProcessor *subject_field_processor; #endif Xapian::ValueRangeProcessor *last_mod_range_processor; }; diff --git a/lib/database.cc b/lib/database.cc index 86bf261..adfbb81 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -21,6 +21,7 @@ #include "database-private.h" #include "parse-time-vrp.h" #include "query-fp.h" +#include "subject-fp.h" #include "string-util.h" #include @@ -1008,6 +1009,8 @@ notmuch_database_open_verbose (const char *path, notmuch->query_parser->add_boolean_prefix("date", notmuch->date_field_processor); notmuch->query_field_processor = new QueryFieldProcessor (*notmuch->query_parser, notmuch); notmuch->query_parser->add_boolean_prefix("query", notmuch->query_field_processor); + notmuch->subject_field_processor = new SubjectFieldProcessor (*notmuch->query_parser, notmuch); + notmuch->query_parser->add_boolean_prefix("subject", notmuch->subject_field_processor); #endif notmuch->last_mod_range_processor = new Xapian::NumberValueRangeProcessor (NOTMUCH_VALUE_LAST_MOD, "lastmod:"); @@ -1027,6 +1030,8 @@ notmuch_database_open_verbose (const char *path, for (i = 0; i < ARRAY_SIZE (PROBABILISTIC_PREFIX); i++) { prefix_t *prefix = &PROBABILISTIC_PREFIX[i]; + if (strcmp (prefix->name, "subject") == 0) + continue; notmuch->query_parser->add_prefix (prefix->name, prefix->prefix); } } catch (const Xapian::Error &error) { diff --git a/lib/regexp-ps.cc b/lib/regexp-ps.cc new file mode 100644 index 0000000..540c7d6 --- /dev/null +++ b/lib/regexp-ps.cc @@ -0,0 +1,92 @@ +/* query-fp.cc - "query:" field processor glue + * + * This file is part of notmuch. + * + * Copyright © 2016 David Bremner + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see https://www.gnu.org/licenses/ . + * + * Author: Austin Clements + * David Bremner + */ + +#include "regexp-ps.h" + +RegexpPostingSource::RegexpPostingSource (Xapian::valueno slot, const std::string ®exp) + : slot_ (slot) +{ + int r = regcomp (®exp_, regexp.c_str (), REG_EXTENDED | REG_NOSUB); + + if (r != 0) + /* XXX Report a query syntax error using regerror */ + throw "regcomp failed"; +} + +RegexpPostingSource::~RegexpPostingSource () +{ + regfree (®exp_); +} + +void +RegexpPostingSource::init (const Xapian::Database &db) +{ + db_ = db; + it_ = db_.valuestream_begin (slot_); + end_ = db.valuestream_end (slot_); + started_ = false; +} + +Xapian::doccount +RegexpPostingSource::get_termfreq_min () const +{ + return 0; +} + +Xapian::doccount +RegexpPostingSource::get_termfreq_est () const +{ + return get_termfreq_max () / 2; +} + +Xapian::doccount +RegexpPostingSource::get_termfreq_max () const +{ + return db_.get_value_freq (slot_); +} + +Xapian::docid +RegexpPostingSource::get_docid () const +{ + return it_.get_docid (); +} + +bool +RegexpPostingSource::at_end () const +{ + return it_ == end_; +} + +void +RegexpPostingSource::next (unused (double min_wt)) +{ + if (started_ && ! at_end ()) + ++it_; + started_ = true; + + for (; ! at_end (); ++it_) { + std::string value = *it_; + if (regexec (®exp_, value.c_str (), 0, NULL, 0) == 0) + break; + } +} diff --git a/lib/regexp-ps.h b/lib/regexp-ps.h new file mode 100644 index 0000000..a4553a7 --- /dev/null +++ b/lib/regexp-ps.h @@ -0,0 +1,37 @@ +#ifndef NOTMUCH_REGEX_PS_H +#define NOTMUCH_REGEX_PS_H + +#include +#include +#include +#include "notmuch-private.h" + +/* A posting source that returns documents where a value matches a + * regexp. + */ +class RegexpPostingSource : public Xapian::PostingSource +{ +protected: +const Xapian::valueno slot_; +regex_t regexp_; +Xapian::Database db_; +bool started_; +Xapian::ValueIterator it_, end_; + +/* No copying */ +RegexpPostingSource (const RegexpPostingSource &); +RegexpPostingSource &operator= (const RegexpPostingSource &); + +public: + RegexpPostingSource (Xapian::valueno slot, const std::string ®exp); +~RegexpPostingSource (); +void init (const Xapian::Database &db); +Xapian::doccount get_termfreq_min () const; +Xapian::doccount get_termfreq_est () const; +Xapian::doccount get_termfreq_max () const; +Xapian::docid get_docid () const; +bool at_end () const; +void next (unused (double min_wt)); +}; + +#endif diff --git a/lib/subject-fp.cc b/lib/subject-fp.cc new file mode 100644 index 0000000..1627721 --- /dev/null +++ b/lib/subject-fp.cc @@ -0,0 +1,41 @@ +/* subject-fp.cc - "subject:" field processor glue + * + * This file is part of notmuch. + * + * Copyright © 2016 David Bremner + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see https://www.gnu.org/licenses/ . + * + * Author: David Bremner + */ + +#include "database-private.h" +#include "subject-fp.h" +#include + +#if HAVE_XAPIAN_FIELD_PROCESSOR + +Xapian::Query +SubjectFieldProcessor::operator() (const std::string & str) +{ + std::string prefix = "rx:"; + + if (str.compare(0,prefix.size(),prefix)==0) { + postings = new RegexpPostingSource(NOTMUCH_VALUE_SUBJECT, str.substr(prefix.size())); + return Xapian::Query(postings); + } else { + return parser.parse_query (str, NOTMUCH_QUERY_PARSER_FLAGS, _find_prefix ("subject")); + } +} +#endif diff --git a/lib/subject-fp.h b/lib/subject-fp.h new file mode 100644 index 0000000..ca622ba --- /dev/null +++ b/lib/subject-fp.h @@ -0,0 +1,43 @@ +/* subject-fp.h - subject field processor glue + * + * This file is part of notmuch. + * + * Copyright © 2016 David Bremner + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see https://www.gnu.org/licenses/ . + * + * Author: David Bremner + */ + +#ifndef NOTMUCH_SUBJECT_FP_H +#define NOTMUCH_SUBJECT_FP_H + +#include +#include "notmuch.h" +#include "regexp-ps.h" + +#if HAVE_XAPIAN_FIELD_PROCESSOR +class SubjectFieldProcessor : public Xapian::FieldProcessor { + protected: + Xapian::QueryParser &parser; + notmuch_database_t *notmuch; + RegexpPostingSource *postings = NULL; + public: + SubjectFieldProcessor (Xapian::QueryParser &parser_, notmuch_database_t *notmuch_) + : parser(parser_), notmuch(notmuch_) { }; + + Xapian::Query operator()(const std::string & str); +}; +#endif +#endif /* NOTMUCH_SUBJECT_FP_H */ -- 2.8.1