unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/3] search improvements
@ 2017-06-14  0:14 Eric Wong
  2017-06-14  0:14 ` [PATCH 1/3] search: remove unnecessary abstractions and functionality Eric Wong
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Eric Wong @ 2017-06-14  0:14 UTC (permalink / raw)
  To: meta

These have been sitting in the stalled "repobrowse" branch for
a bit.  I think they can be tracked into "master", first; since
I'm leaning towards splitting repobrowse into a separate project
at the moment.

Eric Wong (3):
      search: remove unnecessary abstractions and functionality
      searchidx: switch to accounting by message bytes
      search: allow searching within mail diffs

 lib/PublicInbox/Search.pm    |  51 ++++++------
 lib/PublicInbox/SearchIdx.pm | 180 +++++++++++++++++++++++++++++++++++++------
 lib/PublicInbox/SearchMsg.pm |   2 +-
 t/search.t                   |   9 +--
 4 files changed, 183 insertions(+), 59 deletions(-)


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/3] search: remove unnecessary abstractions and functionality
  2017-06-14  0:14 [PATCH 0/3] search improvements Eric Wong
@ 2017-06-14  0:14 ` Eric Wong
  2017-06-15 23:11   ` [PATCH 4/3] searchidx: remove messages correctly from Xapian index Eric Wong
  2017-06-14  0:14 ` [PATCH 2/3] searchidx: switch to accounting by message bytes Eric Wong
  2017-06-14  0:14 ` [PATCH 3/3] search: allow searching within mail diffs Eric Wong
  2 siblings, 1 reply; 5+ messages in thread
From: Eric Wong @ 2017-06-14  0:14 UTC (permalink / raw)
  To: meta

This simplifies the code a bit and reduces the translation
overhead for looking directly at data from tools shipped
with Xapian.

While we're at it, fix thread-all.t :)
---
 lib/PublicInbox/Search.pm    | 31 +++++++++----------------------
 lib/PublicInbox/SearchIdx.pm | 20 +++++++++-----------
 lib/PublicInbox/SearchMsg.pm |  2 +-
 t/search.t                   |  9 +--------
 4 files changed, 20 insertions(+), 42 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 82a6e54..67837f4 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -56,8 +56,6 @@ my %bool_pfx_internal = (
 );
 
 my %bool_pfx_external = (
-	# do we still need these? probably not..
-	path => 'XPATH',
 	mid => 'Q', # uniQue id (Message-ID)
 );
 
@@ -107,11 +105,7 @@ chomp @HELP;
 # da (diff a/ removed lines)
 # db (diff b/ added lines)
 
-my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix);
-
-sub xpfx { $all_pfx{$_[0]} }
-
-my $mail_query = Search::Xapian::Query->new(xpfx('type') . 'mail');
+my $mail_query = Search::Xapian::Query->new('T' . 'mail');
 
 sub xdir {
 	my (undef, $git_dir) = @_;
@@ -146,11 +140,11 @@ sub get_thread {
 	my $smsg = eval { $self->lookup_message($mid) };
 
 	return { total => 0, msgs => [] } unless $smsg;
-	my $qtid = Search::Xapian::Query->new(xpfx('thread').$smsg->thread_id);
+	my $qtid = Search::Xapian::Query->new('G' . $smsg->thread_id);
 	my $path = $smsg->path;
 	if (defined $path && $path ne '') {
 		my $path = id_compress($smsg->path);
-		my $qsub = Search::Xapian::Query->new(xpfx('path').$path);
+		my $qsub = Search::Xapian::Query->new('XPATH' . $path);
 		$qtid = Search::Xapian::Query->new(OP_OR, $qtid, $qsub);
 	}
 	$opts ||= {};
@@ -279,7 +273,7 @@ sub lookup_message {
 	my ($self, $mid) = @_;
 	$mid = mid_clean($mid);
 
-	my $doc_id = $self->find_unique_doc_id('mid', $mid);
+	my $doc_id = $self->find_unique_doc_id('Q' . $mid);
 	my $smsg;
 	if (defined $doc_id) {
 		# raises on error:
@@ -299,9 +293,9 @@ sub lookup_mail { # no ghosts!
 }
 
 sub find_unique_doc_id {
-	my ($self, $term, $value) = @_;
+	my ($self, $termval) = @_;
 
-	my ($begin, $end) = $self->find_doc_ids($term, $value);
+	my ($begin, $end) = $self->find_doc_ids($termval);
 
 	return undef if $begin->equal($end); # not found
 
@@ -309,23 +303,16 @@ sub find_unique_doc_id {
 
 	# sanity check
 	$begin->inc;
-	$begin->equal($end) or die "Term '$term:$value' is not unique\n";
+	$begin->equal($end) or die "Term '$termval' is not unique\n";
 	$rv;
 }
 
 # returns begin and end PostingIterator
 sub find_doc_ids {
-	my ($self, $term, $value) = @_;
-
-	$self->find_doc_ids_for_term(xpfx($term) . $value);
-}
-
-# returns begin and end PostingIterator
-sub find_doc_ids_for_term {
-	my ($self, $term) = @_;
+	my ($self, $termval) = @_;
 	my $db = $self->{xdb};
 
-	($db->postlist_begin($term), $db->postlist_end($term));
+	($db->postlist_begin($termval), $db->postlist_end($termval));
 }
 
 # normalize subjects so they are suitable as pathnames for URLs
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index fd0d320..316111b 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -19,7 +19,6 @@ use PublicInbox::MsgIter;
 use Carp qw(croak);
 use POSIX qw(strftime);
 require PublicInbox::Git;
-*xpfx = *PublicInbox::Search::xpfx;
 
 use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian
 use constant {
@@ -160,12 +159,12 @@ sub add_message {
 		}
 		$smsg = PublicInbox::SearchMsg->new($mime);
 		my $doc = $smsg->{doc};
-		$doc->add_term(xpfx('mid') . $mid);
+		$doc->add_term('Q' . $mid);
 
 		my $subj = $smsg->subject;
 		if ($subj ne '') {
 			my $path = $self->subject_path($subj);
-			$doc->add_term(xpfx('path') . id_compress($path));
+			$doc->add_term('XPATH' . id_compress($path));
 		}
 
 		add_values($smsg, $bytes, $num);
@@ -332,7 +331,7 @@ sub link_message {
 	} else {
 		$tid = defined $old_tid ? $old_tid : $self->next_thread_id;
 	}
-	$doc->add_term(xpfx('thread') . $tid);
+	$doc->add_term('G' . $tid);
 }
 
 sub index_blob {
@@ -542,9 +541,9 @@ sub create_ghost {
 
 	my $tid = $self->next_thread_id;
 	my $doc = Search::Xapian::Document->new;
-	$doc->add_term(xpfx('mid') . $mid);
-	$doc->add_term(xpfx('thread') . $tid);
-	$doc->add_term(xpfx('type') . 'ghost');
+	$doc->add_term('Q' . $mid);
+	$doc->add_term('G' . $tid);
+	$doc->add_term('T' . 'ghost');
 
 	my $smsg = PublicInbox::SearchMsg->wrap($doc, $mid);
 	$self->{xdb}->add_document($doc);
@@ -555,15 +554,14 @@ sub create_ghost {
 sub merge_threads {
 	my ($self, $winner_tid, $loser_tid) = @_;
 	return if $winner_tid == $loser_tid;
-	my ($head, $tail) = $self->find_doc_ids('thread', $loser_tid);
-	my $thread_pfx = xpfx('thread');
+	my ($head, $tail) = $self->find_doc_ids('G' . $loser_tid);
 	my $db = $self->{xdb};
 
 	for (; $head != $tail; $head->inc) {
 		my $docid = $head->get_docid;
 		my $doc = $db->get_document($docid);
-		$doc->remove_term($thread_pfx . $loser_tid);
-		$doc->add_term($thread_pfx . $winner_tid);
+		$doc->remove_term('G' . $loser_tid);
+		$doc->add_term('G' . $winner_tid);
 		$db->replace_document($docid, $doc);
 	}
 }
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index b8eee66..a19d45d 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -14,7 +14,7 @@ use PublicInbox::Address;
 sub new {
 	my ($class, $mime) = @_;
 	my $doc = Search::Xapian::Document->new;
-	$doc->add_term(PublicInbox::Search::xpfx('type') . 'mail');
+	$doc->add_term('T' . 'mail');
 
 	bless { type => 'mail', doc => $doc, mime => $mime }, $class;
 }
diff --git a/t/search.t b/t/search.t
index c9c4e34..a75dc9b 100644
--- a/t/search.t
+++ b/t/search.t
@@ -95,15 +95,8 @@ sub filter_mids {
 	is($found->mid, 'root@s', 'mid set correctly');
 	ok(int($found->thread_id) > 0, 'thread_id is an integer');
 
+	my ($res, @res);
 	my @exp = sort qw(root@s last@s);
-	my $res = $ro->query("path:hello_world");
-	my @res = filter_mids($res);
-	is_deeply(\@res, \@exp, 'got expected results for path: match');
-
-	foreach my $p (qw(hello hello_ hello_world2 hello_world_)) {
-		$res = $ro->query("path:$p");
-		is($res->{total}, 0, "path variant `$p' does not match");
-	}
 
 	$res = $ro->query('s:(Hello world)');
 	@res = filter_mids($res);
-- 
EW


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/3] searchidx: switch to accounting by message bytes
  2017-06-14  0:14 [PATCH 0/3] search improvements Eric Wong
  2017-06-14  0:14 ` [PATCH 1/3] search: remove unnecessary abstractions and functionality Eric Wong
@ 2017-06-14  0:14 ` Eric Wong
  2017-06-14  0:14 ` [PATCH 3/3] search: allow searching within mail diffs Eric Wong
  2 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2017-06-14  0:14 UTC (permalink / raw)
  To: meta

Xapian memory usage is tied to the size of the indexed
text, so take the raw message size into account when
deciding when to flush Xapian data.

More importantly, we now flush Xapian before we have it
buffer beyond our maximum; and we do it unconditionally
to prevent even high priority processes from OOM-ing.
---
 lib/PublicInbox/SearchIdx.pm | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 316111b..30d3fe9 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -20,13 +20,14 @@ use Carp qw(croak);
 use POSIX qw(strftime);
 require PublicInbox::Git;
 
-use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian
 use constant {
+	MAX_MID_SIZE => 244, # max term size - 1 in Xapian
 	PERM_UMASK => 0,
 	OLD_PERM_GROUP => 1,
 	OLD_PERM_EVERYBODY => 2,
 	PERM_GROUP => 0660,
 	PERM_EVERYBODY => 0664,
+	BATCH_BYTES => 1_000_000,
 };
 
 sub new {
@@ -71,7 +72,6 @@ sub _xdb_acquire {
 		require File::Path;
 		_lock_acquire($self);
 		File::Path::mkpath($dir);
-		$self->{batch_size} = 100;
 		$flag = Search::Xapian::DB_CREATE_OR_OPEN;
 	}
 	$self->{xdb} = Search::Xapian::WritableDatabase->new($dir, $flag);
@@ -395,6 +395,15 @@ sub index_sync {
 	with_umask($self, sub { $self->_index_sync($opts) });
 }
 
+sub batch_adjust ($$$$) {
+	my ($max, $bytes, $batch_cb, $latest) = @_;
+	$$max -= $bytes;
+	if ($$max <= 0) {
+		$$max = BATCH_BYTES;
+		$batch_cb->($latest, 1);
+	}
+}
+
 sub rlog {
 	my ($self, $log, $add_cb, $del_cb, $batch_cb) = @_;
 	my $hex = '[a-f0-9]';
@@ -404,23 +413,21 @@ sub rlog {
 	my $git = $self->{git};
 	my $latest;
 	my $bytes;
-	my $max = $self->{batch_size}; # may be undef
+	my $max = BATCH_BYTES;
 	local $/ = "\n";
 	my $line;
 	while (defined($line = <$log>)) {
 		if ($line =~ /$addmsg/o) {
 			my $blob = $1;
 			my $mime = do_cat_mail($git, $blob, \$bytes) or next;
+			batch_adjust(\$max, $bytes, $batch_cb, $latest);
 			$add_cb->($self, $mime, $bytes, $blob);
 		} elsif ($line =~ /$delmsg/o) {
 			my $blob = $1;
-			my $mime = do_cat_mail($git, $blob) or next;
+			my $mime = do_cat_mail($git, $blob, \$bytes) or next;
+			batch_adjust(\$max, $bytes, $batch_cb, $latest);
 			$del_cb->($self, $mime);
 		} elsif ($line =~ /^commit ($h40)/o) {
-			if (defined $max && --$max <= 0) {
-				$max = $self->{batch_size};
-				$batch_cb->($latest, 1);
-			}
 			$latest = $1;
 		}
 	}
-- 
EW


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/3] search: allow searching within mail diffs
  2017-06-14  0:14 [PATCH 0/3] search improvements Eric Wong
  2017-06-14  0:14 ` [PATCH 1/3] search: remove unnecessary abstractions and functionality Eric Wong
  2017-06-14  0:14 ` [PATCH 2/3] searchidx: switch to accounting by message bytes Eric Wong
@ 2017-06-14  0:14 ` Eric Wong
  2 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2017-06-14  0:14 UTC (permalink / raw)
  To: meta

This can be tied into a repository browser to browse
in-flight topics on a mailing list.
---
 lib/PublicInbox/Search.pm    |  20 +++++--
 lib/PublicInbox/SearchIdx.pm | 137 +++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 148 insertions(+), 9 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 67837f4..c7c5455 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -75,6 +75,14 @@ my %prob_prefix = (
 
 	q => 'XQUOT',
 	nq => 'XNQ',
+	dfn => 'XDFN',
+	dfa => 'XDFA',
+	dfb => 'XDFB',
+	dfhh => 'XDFHH',
+	dfctx => 'XDFCTX',
+	dfpre => 'XDFPRE',
+	dfpost => 'XDFPOST',
+	dfblob => 'XDFPRE XDFPOST',
 
 	# default:
 	'' => 'XMID S A XNQ XQUOT XFN',
@@ -98,12 +106,16 @@ EOF
 	'a:' => 'match within the To, Cc, and From headers',
 	'tc:' => 'match within the To and Cc headers',
 	'bs:' => 'match within the Subject and body',
+	'dfn:' => 'match filename from diff',
+	'dfa:' => 'match diff removed (-) lines',
+	'dfb:' => 'match diff added (+) lines',
+	'dfhh:' => 'match diff hunk header context (usually a function name)',
+	'dfctx:' => 'match diff context lines',
+	'dfpre:' => 'match pre-image git blob ID',
+	'dfpost:' => 'match post-image git blob ID',
+	'dfblob:' => 'match either pre or post-image git blob ID',
 );
 chomp @HELP;
-# TODO:
-# df (filenames from diff)
-# da (diff a/ removed lines)
-# db (diff b/ added lines)
 
 my $mail_query = Search::Xapian::Query->new('T' . 'mail');
 
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 30d3fe9..9ba9437 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -28,8 +28,28 @@ use constant {
 	PERM_GROUP => 0660,
 	PERM_EVERYBODY => 0664,
 	BATCH_BYTES => 1_000_000,
+	DEBUG => !!$ENV{DEBUG},
 };
 
+my %GIT_ESC = (
+	a => "\a",
+	b => "\b",
+	f => "\f",
+	n => "\n",
+	r => "\r",
+	t => "\t",
+	v => "\013",
+);
+
+sub git_unquote ($) {
+	my ($s) = @_;
+	return $s unless ($s =~ /\A"(.*)"\z/);
+	$s = $1;
+	$s =~ s/\\([abfnrtv])/$GIT_ESC{$1}/g;
+	$s =~ s/\\([0-7]{1,3})/chr(oct($1))/ge;
+	$s;
+}
+
 sub new {
 	my ($class, $inbox, $creat) = @_;
 	my $git_dir = $inbox;
@@ -134,11 +154,108 @@ sub index_users ($$) {
 	$tg->increase_termpos;
 }
 
+sub index_text_inc ($$$) {
+	my ($tg, $text, $pfx) = @_;
+	$tg->index_text($text, 1, $pfx);
+	$tg->increase_termpos;
+}
+
+sub index_old_diff_fn {
+	my ($tg, $seen, $fa, $fb) = @_;
+
+	# no renames or space support for traditional diffs,
+	# find the number of leading common paths to strip:
+	my @fa = split('/', $fa);
+	my @fb = split('/', $fb);
+	while (scalar(@fa) && scalar(@fb)) {
+		$fa = join('/', @fa);
+		$fb = join('/', @fb);
+		if ($fa eq $fb) {
+			index_text_inc($tg, $fa,'XDFN') unless $seen->{$fa}++;
+			return 1;
+		}
+		shift @fa;
+		shift @fb;
+	}
+	0;
+}
+
+sub index_diff ($$$) {
+	my ($tg, $lines, $doc) = @_;
+	my %seen;
+	my $in_diff;
+	foreach (@$lines) {
+		if ($in_diff && s/^ //) { # diff context
+			index_text_inc($tg, $_, 'XDFCTX');
+		} elsif (/^-- $/) { # email signature begins
+			$in_diff = undef;
+		} elsif (m!^diff --git ("?a/.+) ("?b/.+)\z!) {
+			my ($fa, $fb) = ($1, $2);
+			my $fn = (split('/', git_unquote($fa), 2))[1];
+			index_text_inc($tg, $fn, 'XDFN') unless $seen{$fn}++;
+			$fn = (split('/', git_unquote($fb), 2))[1];
+			index_text_inc($tg, $fn, 'XDFN') unless $seen{$fn}++;
+			$in_diff = 1;
+		# traditional diff:
+		} elsif (m/^diff -(.+) (\S+) (\S+)$/) {
+			my ($opt, $fa, $fb) = ($1, $2, $3);
+			# only support unified:
+			next unless $opt =~ /[uU]/;
+			$in_diff = index_old_diff_fn($tg, \%seen, $fa, $fb);
+		} elsif (m!^--- ("?a/.+)!) {
+			my $fn = (split('/', git_unquote($1), 2))[1];
+			index_text_inc($tg, $fn, 'XDFN') unless $seen{$fn}++;
+			$in_diff = 1;
+		} elsif (m!^\+\+\+ ("?b/.+)!)  {
+			my $fn = (split('/', git_unquote($1), 2))[1];
+			index_text_inc($tg, $fn, 'XDFN') unless $seen{$fn}++;
+			$in_diff = 1;
+		} elsif (/^--- (\S+)/) {
+			$in_diff = $1;
+		} elsif (defined $in_diff && /^\+\+\+ (\S+)/) {
+			$in_diff = index_old_diff_fn($tg, \%seen, $in_diff, $1);
+		} elsif ($in_diff && s/^\+//) { # diff added
+			index_text_inc($tg, $_, 'XDFB');
+		} elsif ($in_diff && s/^-//) { # diff removed
+			index_text_inc($tg, $_, 'XDFA');
+		} elsif (m!^index ([a-f0-9]+)\.\.([a-f0-9]+)!) {
+			my ($ba, $bb) = ($1, $2);
+			index_git_blob_id($doc, 'XDFPRE', $ba);
+			index_git_blob_id($doc, 'XDFPOST', $bb);
+			$in_diff = 1;
+		} elsif (/^@@ (?:\S+) (?:\S+) @@\s*$/) {
+			# traditional diff w/o -p
+		} elsif (/^@@ (?:\S+) (?:\S+) @@\s*(\S+.*)$/) {
+			# hunk header context
+			index_text_inc($tg, $1, 'XDFHH');
+		# ignore the following lines:
+		} elsif (/^(?:dis)similarity index/) {
+		} elsif (/^(?:old|new) mode/) {
+		} elsif (/^(?:deleted|new) file mode/) {
+		} elsif (/^(?:copy|rename) (?:from|to) /) {
+		} elsif (/^(?:dis)?similarity index /) {
+		} elsif (/^\\ No newline at end of file/) {
+		} elsif (/^Binary files .* differ/) {
+		} elsif ($_ eq '') {
+			$in_diff = undef;
+		} else {
+			warn "non-diff line: $_\n" if DEBUG && $_ ne '';
+			$in_diff = undef;
+		}
+	}
+}
+
 sub index_body ($$$) {
-	my ($tg, $lines, $inc) = @_;
-	$tg->index_text(join("\n", @$lines), $inc, $inc ? 'XNQ' : 'XQUOT');
-	@$lines = ();
+	my ($tg, $lines, $doc) = @_;
+	my $txt = join("\n", @$lines);
+	$tg->index_text($txt, !!$doc, $doc ? 'XNQ' : 'XQUOT');
 	$tg->increase_termpos;
+	# does it look like a diff?
+	if ($doc && $txt =~ /^(?:diff|---|\+\+\+) /ms) {
+		$txt = undef;
+		index_diff($tg, $lines, $doc);
+	}
+	@$lines = ();
 }
 
 sub add_message {
@@ -205,7 +322,7 @@ sub add_message {
 			my @lines = split(/\n/, $body);
 			while (defined(my $l = shift @lines)) {
 				if ($l =~ /^>/) {
-					index_body($tg, \@orig, 1) if @orig;
+					index_body($tg, \@orig, $doc) if @orig;
 					push @quot, $l;
 				} else {
 					index_body($tg, \@quot, 0) if @quot;
@@ -213,7 +330,7 @@ sub add_message {
 				}
 			}
 			index_body($tg, \@quot, 0) if @quot;
-			index_body($tg, \@orig, 1) if @orig;
+			index_body($tg, \@orig, $doc) if @orig;
 		});
 
 		link_message($self, $smsg, $old_tid);
@@ -339,6 +456,16 @@ sub index_blob {
 	$self->add_message($mime, $bytes, $num, $blob);
 }
 
+sub index_git_blob_id {
+	my ($doc, $pfx, $objid) = @_;
+
+	my $len = length($objid);
+	for (my $len = length($objid); $len >= 7; ) {
+		$doc->add_term($pfx.$objid);
+		$objid = substr($objid, 0, --$len);
+	}
+}
+
 sub unindex_blob {
 	my ($self, $mime) = @_;
 	my $mid = eval { mid_clean(mid_mime($mime)) };
-- 
EW


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 4/3] searchidx: remove messages correctly from Xapian index
  2017-06-14  0:14 ` [PATCH 1/3] search: remove unnecessary abstractions and functionality Eric Wong
@ 2017-06-15 23:11   ` Eric Wong
  0 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2017-06-15 23:11 UTC (permalink / raw)
  To: meta

This fixes a bug introduced in
commit 7eeadcb62729b0efbcb53cd9b7b181897c92cf9a
("search: remove unnecessary abstractions and functionality")
---
 lib/PublicInbox/SearchIdx.pm | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 9ba9437..69b7a6f 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -366,8 +366,12 @@ sub remove_message {
 	$mid = mid_clean($mid);
 
 	eval {
-		$doc_id = $self->find_unique_doc_id('mid', $mid);
-		$db->delete_document($doc_id) if defined $doc_id;
+		$doc_id = $self->find_unique_doc_id('Q' . $mid);
+		if (defined $doc_id) {
+			$db->delete_document($doc_id);
+		} else {
+			warn "cannot remove non-existent <$mid>\n";
+		}
 	};
 
 	if ($@) {
-- 
EW

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2017-06-15 23:11 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-06-14  0:14 [PATCH 0/3] search improvements Eric Wong
2017-06-14  0:14 ` [PATCH 1/3] search: remove unnecessary abstractions and functionality Eric Wong
2017-06-15 23:11   ` [PATCH 4/3] searchidx: remove messages correctly from Xapian index Eric Wong
2017-06-14  0:14 ` [PATCH 2/3] searchidx: switch to accounting by message bytes Eric Wong
2017-06-14  0:14 ` [PATCH 3/3] search: allow searching within mail diffs Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).