From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-3.8 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 8A2601F4B4 for ; Fri, 11 Dec 2020 03:37:40 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] extindex: preliminary --reindex support Date: Fri, 11 Dec 2020 03:37:40 +0000 Message-Id: <20201211033740.24568-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: --reindex allows us to catch missed and stale messages due to -extindex vs -index races prior to commit 02b2fcc46f364b51 ("extsearchidx: enforce -index before -extindex"). We'll also rely on reindex to internally deal with v1/v2 inbox removals and partial-unindexing of messages which are only removed from one inbox out of many. This reindex design is completely different than how normal v1/v2 inbox reindex operates due to extindex having multiple histories to work with. Instead of scanning git history, this relies exclusively on comparing over.sqlite3 contents between the v1/v2 inboxes and the extindex. Changes to Xapian behavior also get picked up, now. Xapian indexing is handled by workers with minimal IPC to the parent process. This results in more read I/O but fewer writes when dealing with cross-posted messages. Changes to $smsg->populate and --rethread still need further work. --- lib/PublicInbox/ExtSearchIdx.pm | 195 ++++++++++++++++++++++++++++-- lib/PublicInbox/OverIdx.pm | 23 ++++ lib/PublicInbox/SearchIdx.pm | 77 +++++++++++- lib/PublicInbox/SearchIdxShard.pm | 11 ++ lib/PublicInbox/V2Writable.pm | 6 +- t/extsearch.t | 62 +++++++++- 6 files changed, 357 insertions(+), 17 deletions(-) diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 84449cb4..394a89d4 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -29,6 +29,7 @@ use PublicInbox::InboxWritable; use PublicInbox::ContentHash qw(content_hash); use PublicInbox::Eml; use File::Spec; +use DBI qw(:sql_types); # SQL_BLOB sub new { my (undef, $dir, $opt) = @_; @@ -123,9 +124,11 @@ sub do_xpost ($$) { my $nr = $self->{oidx}->remove_xref3($docid, $oid, $eidx_key, \$rm_eidx_info); if ($nr == 0) { + $self->{oidx}->eidxq_del($docid); $idx->shard_remove($docid); } elsif ($rm_eidx_info) { $idx->shard_remove_eidx_info($docid, $eidx_key, $eml); + $self->{oidx}->eidxq_add($docid); # yes, add } } } @@ -168,7 +171,7 @@ sub do_finalize ($) { sub do_step ($) { # main iterator for adding messages to the index my ($req) = @_; - my $self = $req->{self}; + my $self = $req->{self} // die 'BUG: {self} missing'; while (1) { if (my $next_arg = $req->{next_arg}) { if (my $smsg = $self->{oidx}->next_by_mid(@$next_arg)) { @@ -311,7 +314,7 @@ sub _sync_inbox ($$$) { $ibx->git->cleanup; # done with this inbox, now } -sub unref_doc ($$$$) { +sub gc_unref_doc ($$$$) { my ($self, $ibx_id, $eidx_key, $docid) = @_; my $dbh = $self->{oidx}->dbh; @@ -326,15 +329,14 @@ SELECT oidbin FROM xref3 WHERE docid = ? AND ibx_id = ? DELETE FROM xref3 WHERE docid = ? AND ibx_id = ? my $remain = $self->{oidx}->get_xref3($docid); - my $idx = $self->idx_shard($docid); - if (@$remain) { + if (scalar(@$remain)) { + $self->{oidx}->eidxq_add($docid); # enqueue for reindex for my $oid (@oid) { warn "I: unref #$docid $eidx_key $oid\n"; - $idx->shard_remove_eidx_info($docid, $eidx_key); } } else { warn "I: remove #$docid $eidx_key @oid\n"; - $idx->shard_remove($docid); + $self->idx_shard($docid)->shard_remove($docid); } } @@ -356,7 +358,7 @@ sub eidx_gc { warn "I: deleting messages for $eidx_key...\n"; $x3_doc->execute($ibx_id); while (defined(my $docid = $x3_doc->fetchrow_array)) { - unref_doc($self, $ibx_id, $eidx_key, $docid); + gc_unref_doc($self, $ibx_id, $eidx_key, $docid); } $dbh->prepare_cached(<<'')->execute($ibx_id); DELETE FROM inboxes WHERE ibx_id = ? @@ -393,20 +395,187 @@ DELETE FROM xref3 WHERE docid NOT IN (SELECT num FROM over) done($self); } +sub eidxq_process ($$) { # for reindexing + my ($self, $sync) = @_; + + $self->{oidx}->commit_lazy; # ensure shard workers can see it + $self->{oidx}->begin_lazy; + my $dbh = $self->{oidx}->dbh; + my $tot = $dbh->selectrow_array('SELECT COUNT(*) FROM eidxq') or return; + ${$sync->{nr}} = 0; + $sync->{-regen_fmt} = "%u/$tot\n"; + my $pr = $sync->{-opt}->{-progress}; + if ($pr) { + my $min = $dbh->selectrow_array('SELECT MIN(docid) FROM eidxq'); + my $max = $dbh->selectrow_array('SELECT MAX(docid) FROM eidxq'); + $pr->("Xapian indexing $min..$max (total=$tot)\n"); + } + + my $del = $dbh->prepare('DELETE FROM eidxq WHERE docid = ?'); + my $iter = $dbh->prepare('SELECT docid FROM eidxq ORDER BY docid ASC'); + $iter->execute; + while (defined(my $docid = $iter->fetchrow_array)) { + $self->idx_shard($docid)->shard_reindex_docid($docid); + $del->execute($docid); + last if $sync->{quit}; + my $cur = ++${$sync->{nr}}; + + # shards flush on their own, just don't queue up too many + # deletes + if (($cur % 1000) == 0) { + $self->{oidx}->commit_lazy; + $self->{oidx}->begin_lazy; + $pr->("reindexed $cur/$tot\n") if $pr; + } + # this is only for SIGUSR1, shards do their own accounting: + reindex_checkpoint($self, $sync) if ${$sync->{need_checkpoint}}; + } + $pr->("reindexed ${$sync->{nr}}/$tot\n") if $pr; + $self->{oidx}->commit_lazy; + $self->{oidx}->begin_lazy; +} + +sub _reindex_unseen { # git->cat_async callback + my ($bref, $oid, $type, $size, $req) = @_; + return if is_bad_blob($oid, $type, $size, $req->{oid}); + my $self = $req->{self} // die 'BUG: {self} unset'; + local $self->{current_info} = "$self->{current_info} $oid"; + my $new_smsg = bless { blob => $oid, }, 'PublicInbox::Smsg'; + $new_smsg->{bytes} = $size + crlf_adjust($$bref); + my $eml = $req->{eml} = PublicInbox::Eml->new($bref); + $req->{new_smsg} = $new_smsg; + $req->{chash} = content_hash($eml); + $req->{mids} = mids($eml); # do_step iterates through this + do_step($req); # enter the normal indexing flow +} + +# --reindex may catch totally unseen messages, this handles them +sub reindex_unseen ($$$$) { + my ($self, $sync, $ibx, $xsmsg) = @_; + my $req = { + %$sync, # has {self} + autime => $xsmsg->{ds}, + cotime => $xsmsg->{ts}, + oid => $xsmsg->{blob}, + ibx => $ibx, + xnum => $xsmsg->{num}, + # {mids} and {chash} will be filled in at _reindex_unseen + }; + warn "I: reindex_unseen ${\$ibx->eidx_key}:$req->{xnum}:$req->{oid}\n"; + $self->git->cat_async($xsmsg->{blob}, \&_reindex_unseen, $req); +} + +sub _reindex_check_unseen ($$$) { + my ($self, $sync, $ibx) = @_; + my $ibx_id = $ibx->{-ibx_id}; + my ($beg, $end) = (1, 1000); + + # first, check if we missed any messages in target $ibx + my $inx3 = $self->{oidx}->dbh->prepare(<<''); +SELECT DISTINCT(docid) FROM xref3 WHERE ibx_id = ? AND xnum = ? AND oidbin = ? + + my $msgs; + while (scalar(@{$msgs = $ibx->over->query_xover($beg, $end)})) { + $beg = $msgs->[-1]->{num} + 1; + $end = $beg + 1000; + for my $xsmsg (@$msgs) { + my $oidbin = pack('H*', $xsmsg->{blob}); + $inx3->bind_param(1, $ibx_id); + $inx3->bind_param(2, $xsmsg->{num}); + $inx3->bind_param(3, $oidbin, SQL_BLOB); + $inx3->execute; + my $docids = $inx3->fetchall_arrayref; + # index messages which were totally missed + # the first time around ASAP: + if (scalar(@$docids) == 0) { + reindex_unseen($self, $sync, $ibx, $xsmsg); + } else { # already seen, reindex later + for my $r (@$docids) { + $self->{oidx}->eidxq_add($r->[0]); + } + } + last if $sync->{quit}; + } + last if $sync->{quit}; + } +} + +sub _reindex_check_stale ($$$) { + my ($self, $sync, $ibx) = @_; + + # now, check if there's stale xrefs + my $get_xnum = $self->{oidx}->dbh->prepare(<<''); +SELECT docid,xnum,oidbin FROM xref3 WHERE ibx_id = ? ORDER BY docid ASC + + $get_xnum->execute($ibx->{-ibx_id}); + my $del_xref3 = $self->{oidx}->dbh->prepare(<<''); +DELETE FROM xref3 WHERE ibx_id = ? AND xnum = ? AND oidbin = ? + + while (my ($docid, $xnum, $oidbin) = $get_xnum->fetchrow_array) { + last if $sync->{quit}; + my $smsg = $ibx->over->get_art($xnum); + my $oidhex = unpack('H*', $oidbin); + my $err; + if (!$smsg) { + $err = 'stale'; + } elsif ($smsg->{blob} ne $oidhex) { + $err = "mismatch (!= $smsg->{blob})"; + } else { + next; # likely, all good + } + warn $ibx->eidx_key . ":$xnum:$oidhex (#$docid): $err\n"; + $del_xref3->bind_param(1, $ibx->{-ibx_id}); + $del_xref3->bind_param(2, $xnum); + $del_xref3->bind_param(3, $oidbin, SQL_BLOB); + $del_xref3->execute; + + # get_xref3 over-fetches, but this is a rare path: + my $xr3 = $self->{oidx}->get_xref3($docid); + my $idx = $self->idx_shard($docid); + if (scalar(@$xr3) == 0) { # all gone + $self->{oidx}->eidxq_del($docid); + $idx->shard_remove($docid); + } else { # enqueue for reindex of remaining messages + $idx->shard_remove_eidx_info($docid, $ibx->eidx_key); + $self->{oidx}->eidxq_add($docid); # yes, add + } + } +} + +sub _reindex_inbox ($$$) { + my ($self, $sync, $ibx) = @_; + _reindex_check_unseen($self, $sync, $ibx); + _reindex_check_stale($self, $sync, $ibx) unless $sync->{quit}; + delete @$ibx{qw(over mm search git)}; # won't need these for a bit +} + +sub eidx_reindex { + my ($self, $sync) = @_; + + for my $ibx (@{$self->{ibx_list}}) { + _reindex_inbox($self, $sync, $ibx); + last if $sync->{quit}; + } + $self->git->async_wait_all; # ensure eidxq gets filled completely + eidxq_process($self, $sync) unless $sync->{quit}; +} + sub eidx_sync { # main entry point my ($self, $opt) = @_; - $self->idx_init($opt); # acquire lock via V2Writable::_idx_init - $self->{oidx}->rethread_prepare($opt); my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ }; local $self->{current_info} = ''; local $SIG{__WARN__} = sub { $warn_cb->($self->{current_info}, ': ', @_); }; + $self->idx_init($opt); # acquire lock via V2Writable::_idx_init + $self->{oidx}->rethread_prepare($opt); my $sync = { need_checkpoint => \(my $need_checkpoint = 0), - reindex => $opt->{reindex}, -opt => $opt, + # DO NOT SET {reindex} here, it's incompatible with reused + # V2Writable code, reindex is totally different here + # compared to v1/v2 inboxes because we have multiple histories self => $self, -regen_fmt => "%u/?\n", }; @@ -415,6 +584,10 @@ sub eidx_sync { # main entry point local $SIG{QUIT} = $quit; local $SIG{INT} = $quit; local $SIG{TERM} = $quit; + for my $ibx (@{$self->{ibx_list}}) { + $ibx->{-ibx_id} //= $self->{oidx}->ibx_id($ibx->eidx_key); + } + eidx_reindex($self, $sync) if delete($opt->{reindex}); # don't use $_ here, it'll get clobbered by reindex_checkpoint for my $ibx (@{$self->{ibx_list}}) { @@ -422,6 +595,7 @@ sub eidx_sync { # main entry point _sync_inbox($self, $sync, $ibx); } $self->{oidx}->rethread_done($opt) unless $sync->{quit}; + eidxq_process($self, $sync) unless $sync->{quit}; PublicInbox::V2Writable::done($self); } @@ -522,5 +696,6 @@ no warnings 'once'; *count_shards = \&PublicInbox::V2Writable::count_shards; *atfork_child = \&PublicInbox::V2Writable::atfork_child; *idx_shard = \&PublicInbox::V2Writable::idx_shard; +*reindex_checkpoint = \&PublicInbox::V2Writable::reindex_checkpoint; 1; diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index 38552247..4a39bf53 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -563,6 +563,15 @@ CREATE TABLE IF NOT EXISTS eidx_meta ( val VARCHAR(255) NOT NULL ) + # A queue of current docids which need reindexing. + # eidxq persists across aborted -extindex invocations + # Currently used for "-extindex --reindex" for Xapian + # data, but may be used in more places down the line. + $dbh->do(<<''); +CREATE TABLE IF NOT EXISTS eidxq ( + docid INTEGER PRIMARY KEY NOT NULL +) + $dbh; }; } @@ -661,4 +670,18 @@ UPDATE over SET ddd = ? WHERE num = ? $sth->execute; } +sub eidxq_add { + my ($self, $docid) = @_; + $self->dbh->prepare_cached(<<'')->execute($docid); +INSERT OR IGNORE INTO eidxq (docid) VALUES (?) + +} + +sub eidxq_del { + my ($self, $docid) = @_; + $self->dbh->prepare_cached(<<'')->execute($docid); +DELETE FROM eidxq WHERE docid = ? + +} + 1; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 0fbe6560..cd8f4dd7 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -352,8 +352,9 @@ sub index_ids ($$$$) { index_list_id($self, $doc, $hdr); } -sub add_xapian ($$$$) { +sub eml2doc ($$$;$) { my ($self, $eml, $smsg, $mids) = @_; + $mids //= mids_for_index($eml); my $doc = $X->{Document}->new; add_val($doc, PublicInbox::Search::TS(), $smsg->{ts}); my @ds = gmtime($smsg->{ds}); @@ -396,6 +397,12 @@ sub add_xapian ($$$$) { } } } + $doc; +} + +sub add_xapian ($$$$) { + my ($self, $eml, $smsg, $mids) = @_; + my $doc = eml2doc($self, $eml, $smsg, $mids); $self->{xdb}->replace_document($smsg->{num}, $doc); } @@ -941,6 +948,10 @@ sub set_metadata_once { sub _commit_txn { my ($self) = @_; + if (my $eidx = $self->{eidx}) { + $eidx->git->async_wait_all; + $eidx->{transact_bytes} = 0; + } if (my $xdb = $self->{xdb}) { set_metadata_once($self); $xdb->commit_transaction; @@ -997,4 +1008,68 @@ SELECT COUNT(*) FROM over WHERE num = ? } } +sub reindex_xap { # git->cat_async callback + my ($bref, $oid, $type, $size, $ary) = @_; + my ($ibx_id, $oidhex, $req, $more) = @$ary; + my $self = $req->{self} // die 'BUG: {self} missing'; + my $eidx = $self->{eidx} // die 'BUG: {eidx} missing'; + my $eidx_key = $self->{-eidx_key_for}->{$ibx_id} // + die "BUG: bad ibx_id=$ibx_id ($oid)"; + + my $docid = $req->{docid}; + local $eidx->{current_info} = "#$docid $oid"; + return if is_bad_blob($oid, $type, $size, $oidhex); + if (my $doc = $req->{doc}) { # modify existing doc + $req->{tg_isset} //= do { # for existing documents in {xdb} + term_generator($self)->set_document($doc); + 1; + }; + $doc->add_boolean_term('O'.$eidx_key); + index_list_id($self, $doc, PublicInbox::Eml->new($bref)); + } else { # first time seeing this doc + my $smsg = $self->{eidx}->over->get_art($docid) // + die "BUG: #$docid ($oid) not in over"; + $smsg->{bytes} = $size + crlf_adjust($$bref); + $smsg->{eidx_key} = $eidx_key; + my $eml = PublicInbox::Eml->new($bref); + $req->{doc} = eml2doc($self, $eml, $smsg); + $req->{tg_isset} = 1; # eml2doc calls $tg->set_document + } + return if $more; + my $doc = delete($req->{doc}) or return; # all bad blobs! + $eidx->{transact_bytes} += $size; + $self->{xdb}->replace_document($req->{docid}, $doc); +} + +sub reindex_docid { + my ($self, $docid) = @_; + my $eidx = $self->{eidx} // die 'BUG: {eidx} missing'; + my $eidx_key_for = $self->{-eidx_key_for} //= do { + my %eidx_key_for = map { + $_->[0] => $_->[1]; + } @{$eidx->over->dbh->selectall_arrayref(<<'')}; +SELECT ibx_id,eidx_key FROM inboxes + + \%eidx_key_for; + }; + + begin_txn_lazy($self); + my $doc = eval { $self->{xdb}->get_document($docid) }; + my $req = { doc => $doc, self => $self, docid => $docid }; + my $sth = $eidx->over->dbh->prepare_cached(<<'', undef, 1); +SELECT ibx_id,oidbin FROM xref3 WHERE docid = ? ORDER BY ibx_id ASC + + $sth->execute($docid); + my $rows = $sth->fetchall_arrayref; + while (my $row = shift(@$rows)) { + my ($ibx_id, $oidbin) = @$row; + my $oidhex = unpack('H*', $oidbin); + $eidx->git->cat_async($oidhex, \&reindex_xap, + [ $ibx_id, $oidhex, $req, scalar(@$rows) ]); + } + if ($eidx->{transact_bytes} >= $eidx->{batch_bytes}) { + commit_txn_lazy($self); + } +} + 1; diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm index 579ed196..b6eef6bd 100644 --- a/lib/PublicInbox/SearchIdxShard.pm +++ b/lib/PublicInbox/SearchIdxShard.pm @@ -94,6 +94,8 @@ sub shard_worker_loop ($$$$$) { my $over_fn = $1; $over_fn =~ tr/\0/\n/; $self->over_check(PublicInbox::Over->new($over_fn)); + } elsif ($line =~ /\AE ([0-9]+)\n/) { + $self->reindex_docid($1 + 0); } else { chomp $line; my $eidx_key; @@ -223,4 +225,13 @@ sub shard_over_check { } } +sub shard_reindex_docid { + my ($self, $docid) = @_; + if (my $w = $self->{w}) { + print $w "E $docid\n" or die "failed to write to shard: $!"; + } else { + $self->reindex_docid($docid); + } +} + 1; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index bef3a67a..572eb418 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -275,13 +275,17 @@ sub _idx_init { # with_umask callback $self->{shards} = $nshards if $nshards && $nshards != $self->{shards}; $self->{batch_bytes} = $opt->{batch_size} // $PublicInbox::SearchIdx::BATCH_BYTES; - $self->{batch_bytes} *= $self->{shards} if $self->{parallel}; # need to create all shards before initializing msgmap FD # idx_shards must be visible to all forked processes my $max = $self->{shards} - 1; my $idx = $self->{idx_shards} = []; push @$idx, PublicInbox::SearchIdxShard->new($self, $_) for (0..$max); + + # SearchIdxShard may do their own flushing, so don't scale + # until after forking + $self->{batch_bytes} *= $self->{shards} if $self->{parallel}; + my $ibx = $self->{ibx} or return; # ExtIdxSearch # Now that all subprocesses are up, we can open the FDs diff --git a/t/extsearch.t b/t/extsearch.t index 70a60b5a..f16ec0d1 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -5,6 +5,7 @@ use strict; use Test::More; use PublicInbox::TestCommon; use PublicInbox::Config; +use PublicInbox::Search; use Fcntl qw(:seek); my $json = PublicInbox::Config::json() or plan skip_all => 'JSON missing'; require_git(2.6); @@ -101,7 +102,7 @@ my $es = PublicInbox::ExtSearch->new("$home/extindex"); is(scalar(@$xref3), 2, 'only to entries'); } -{ +if ('inbox edited') { my ($in, $out, $err); $in = $out = $err = ''; my $opt = { 0 => \$in, 1 => \$out, 2 => \$err }; @@ -176,13 +177,13 @@ is(scalar(@it), 2, 'two inboxes'); like($it[0]->get_document->get_data, qr/v2test/, 'docdata matched v2'); like($it[1]->get_document->get_data, qr/v1test/, 'docdata matched v1'); +my $cfg = PublicInbox::Config->new; +my $schema_version = PublicInbox::Search::SCHEMA_VERSION(); if ('inject w/o indexing') { use PublicInbox::Import; - use PublicInbox::Search; - my $schema_version = PublicInbox::Search::SCHEMA_VERSION(); - my $v1ibx = PublicInbox::Config->new->lookup_name('v1test'); + my $v1ibx = $cfg->lookup_name('v1test'); my $last_v1_commit = $v1ibx->mm->last_commit; - my $v2ibx = PublicInbox::Config->new->lookup_name('v2test'); + my $v2ibx = $cfg->lookup_name('v2test'); my $last_v2_commit = $v2ibx->mm->last_commit_xap($schema_version, 0); my $git0 = PublicInbox::Git->new("$v2ibx->{inboxdir}/git/0.git"); chomp(my $cmt = $git0->qx(qw(rev-parse HEAD^0))); @@ -230,6 +231,57 @@ if ('inject w/o indexing') { is($mset->size, 1, 'got v2 message'); } +if ('reindex catches missed messages') { + use PublicInbox::InboxWritable; + use PublicInbox::OverIdx; + my $v2ibx = $cfg->lookup_name('v2test'); + my $im = PublicInbox::InboxWritable->new($v2ibx)->importer(0); + my $cmt_a = $v2ibx->mm->last_commit_xap($schema_version, 0); + my $eml = eml_load('t/data/0001.patch'); + $im->add($eml); + $im->done; + my $cmt_b = $v2ibx->mm->last_commit_xap($schema_version, 0); + isnt($cmt_a, $cmt_b, 'v2 0.git HEAD updated'); + my $f = "$home/extindex/ei$schema_version/over.sqlite3"; + my $oidx = PublicInbox::OverIdx->new($f); + $oidx->dbh; + my $uv = $v2ibx->uidvalidity; + my $lc_key = "lc-v2:v2.example//$uv;0"; + is($oidx->eidx_meta($lc_key, $cmt_b), $cmt_a, + 'update lc-v2 meta, old is as expected'); + my $max = $oidx->max; + $oidx->dbh_close; + ok(run_script([qw(-extindex), "$home/extindex", $v2ibx->{inboxdir}]), + '-extindex noop'); + is($oidx->max, $max, '->max unchanged'); + is($oidx->eidx_meta($lc_key), $cmt_b, 'lc-v2 unchanged'); + $oidx->dbh_close; + my $opt = { 2 => \(my $err = '') }; + ok(run_script([qw(-extindex --reindex), "$home/extindex", + $v2ibx->{inboxdir}], undef, $opt), + '--reindex for unseen'); + is($oidx->max, $max + 1, '->max bumped'); + is($oidx->eidx_meta($lc_key), $cmt_b, 'lc-v2 stays unchanged'); + my @err = split(/^/, $err); + is(scalar(@err), 1, 'only one warning'); + like($err[0], qr/I: reindex_unseen/, 'got reindex_unseen message'); + my $new = $oidx->get_art($max + 1); + is($new->{subject}, $eml->header('Subject'), 'new message added'); + + ok($im->remove($eml), 'remove new message from v2 inbox'); + $im->done; + my $cmt_c = $v2ibx->mm->last_commit_xap($schema_version, 0); + is($oidx->eidx_meta($lc_key, $cmt_c), $cmt_b, + 'bump lc-v2 meta again to skip v2 remove'); + $err = ''; + ok(run_script([qw(-extindex --reindex), "$home/extindex", + $v2ibx->{inboxdir}], undef, $opt), + '--reindex for stale'); + @err = split(/^/, $err); + is(scalar(@err), 1, 'only one warning'); + like($err[0], qr/\(#$new->{num}\): stale/, 'got stale message warning'); +} + if ('remove v1test and test gc') { xsys([qw(git config --unset publicinbox.v1test.inboxdir)], { GIT_CONFIG => $cfg_path });