* [PATCH 1/6] cindex: drop unneeded module use
2023-04-25 11:02 [PATCH 0/6] cleanups and error message consistency Eric Wong
@ 2023-04-25 11:02 ` Eric Wong
2023-04-25 11:02 ` [PATCH 2/6] cindex: simplify tmpfile management for indexing Eric Wong
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-25 11:02 UTC (permalink / raw)
To: meta
I initially thought I'd use the PublicInbox::Eml module and rely
on --pretty=mboxrd; but eventually decided against it since
it wasn't saving any code.
---
lib/PublicInbox/CodeSearchIdx.pm | 2 --
1 file changed, 2 deletions(-)
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index 440c537e..428a40fe 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -16,11 +16,9 @@ package PublicInbox::CodeSearchIdx;
use v5.12;
# parent order matters, we want ->DESTROY from IPC, not SearchIdx
use parent qw(PublicInbox::CodeSearch PublicInbox::IPC PublicInbox::SearchIdx);
-use PublicInbox::Eml;
use PublicInbox::DS qw(awaitpid);
use PublicInbox::PktOp;
use PublicInbox::IPC qw(nproc_shards);
-use PublicInbox::Admin;
use POSIX qw(WNOHANG SEEK_SET);
use File::Path ();
use File::Spec ();
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/6] cindex: simplify tmpfile management for indexing
2023-04-25 11:02 [PATCH 0/6] cleanups and error message consistency Eric Wong
2023-04-25 11:02 ` [PATCH 1/6] cindex: drop unneeded module use Eric Wong
@ 2023-04-25 11:02 ` Eric Wong
2023-04-25 11:02 ` [PATCH 3/6] cindex: simplify store_repo Eric Wong
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-25 11:02 UTC (permalink / raw)
To: meta
I considered making this a pipe, but we must avoid spawning
`git log --stdin --no-walk=unsorted' for the no-op case since that
still emits a commit if stdin is empty. So just get rid of an
unnecessary loop and do lseek(2) inside workers for parallelism
---
lib/PublicInbox/CodeSearchIdx.pm | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index 428a40fe..9aefa331 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -226,6 +226,7 @@ sub shard_index { # via wq_io_do in IDX_SHARDS
my $in = delete($self->{0}) // die 'BUG: no {0} input';
my $op_p = delete($self->{1}) // die 'BUG: no {1} op_p';
+ sysseek($in, 0, SEEK_SET) or die "seek: $!";
my ($rd, $pid) = $git->popen(@LOG_STDIN, undef, { 0 => $in });
close $in or die "close: $!";
awaitpid($pid, \&cidx_reap_log, $self, $op_p);
@@ -452,10 +453,6 @@ sub partition_refs ($$$) {
if (!$? || (($? & 127) == POSIX::SIGPIPE && $seen > $SEEN_MAX)) {
my $n = $NCHANGE - $n0;
progress($self, "$git->{git_dir}: $n commits") if $n;
- for my $fh (@shard_in) {
- $fh->flush or die "flush: $!";
- sysseek($fh, 0, SEEK_SET) or die "seek: $!";
- }
return @shard_in;
}
die "git --git-dir=$git->{git_dir} rev-list: \$?=$?\n";
@@ -542,6 +539,7 @@ sub index_repo { # cidx_await cb
my ($c, $p) = PublicInbox::PktOp->pair;
$c->{ops}->{shard_done} = [ $self, $repo_ctx, $commit_shard ];
for my $n (0..$#shard_in) {
+ $shard_in[$n]->flush or die "flush shard[$n]: $!";
-s $shard_in[$n] or next;
last if $DO_QUIT;
$IDX_SHARDS[$n]->wq_io_do('shard_index',
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/6] cindex: simplify store_repo
2023-04-25 11:02 [PATCH 0/6] cleanups and error message consistency Eric Wong
2023-04-25 11:02 ` [PATCH 1/6] cindex: drop unneeded module use Eric Wong
2023-04-25 11:02 ` [PATCH 2/6] cindex: simplify tmpfile management for indexing Eric Wong
@ 2023-04-25 11:02 ` Eric Wong
2023-04-25 11:02 ` [PATCH 4/6] searchidxshard: use BUG error messages more consistently Eric Wong
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-25 11:02 UTC (permalink / raw)
To: meta
It's easier to just create a new Xapian::Document and
replace it rather than to load and edit it. I don't
know if there's any performance difference one way or
the other, but fewer branches helps with maintainability
and smaller optree size to lower memory use and startup
speed.
---
lib/PublicInbox/CodeSearchIdx.pm | 33 +++++++++++---------------------
1 file changed, 11 insertions(+), 22 deletions(-)
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index 9aefa331..671134bf 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -146,29 +146,18 @@ sub progress {
sub store_repo { # wq_do - returns docid
my ($self, $repo) = @_;
$self->begin_txn_lazy;
- my $xdb = $self->{xdb};
- for (@{$repo->{to_delete}}) { $xdb->delete_document($_) } # XXX needed?
- if (defined $repo->{docid}) {
- my $doc = $self->get_doc($repo->{docid}) //
- die "$repo->{git_dir} doc #$repo->{docid} gone";
- add_val($doc, PublicInbox::CodeSearch::CT, $repo->{ct});
- my %new = map { $_ => undef } @{$repo->{roots}};
- my $old = xap_terms('G', $doc);
- delete @new{keys %$old};
- $doc->add_boolean_term('G'.$_) for keys %new;
- delete @$old{@{$repo->{roots}}};
- $doc->remove_term('G'.$_) for keys %$old;
- $doc->set_data($repo->{fp});
- $xdb->replace_document($repo->{docid}, $doc);
- $repo->{docid}
+ $self->{xdb}->delete_document($_) for @{$repo->{to_delete}};
+ my $doc = $PublicInbox::Search::X{Document}->new;
+ add_val($doc, PublicInbox::CodeSearch::CT, $repo->{ct});
+ $doc->add_boolean_term("P$repo->{git_dir}");
+ $doc->add_boolean_term('T'.'r');
+ $doc->add_boolean_term('G'.$_) for @{$repo->{roots}};
+ $doc->set_data($repo->{fp}); # \n delimited
+ if ($repo->{docid}) {
+ $self->{xdb}->replace_document($repo->{docid}, $doc);
+ $repo->{docid};
} else {
- my $new = $PublicInbox::Search::X{Document}->new;
- add_val($new, PublicInbox::CodeSearch::CT, $repo->{ct});
- $new->add_boolean_term("P$repo->{git_dir}");
- $new->add_boolean_term('T'.'r');
- $new->add_boolean_term('G'.$_) for @{$repo->{roots}};
- $new->set_data($repo->{fp}); # \n delimited
- $xdb->add_document($new);
+ $self->{xdb}->add_document($doc);
}
}
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 4/6] searchidxshard: use BUG error messages more consistently
2023-04-25 11:02 [PATCH 0/6] cleanups and error message consistency Eric Wong
` (2 preceding siblings ...)
2023-04-25 11:02 ` [PATCH 3/6] cindex: simplify store_repo Eric Wong
@ 2023-04-25 11:02 ` Eric Wong
2023-04-25 11:02 ` [PATCH 5/6] searchidx: reduce short-lived variables for TermGenerator Eric Wong
2023-04-25 11:02 ` [PATCH 6/6] emergency: make error messages more consistent Eric Wong
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-25 11:02 UTC (permalink / raw)
To: meta
We'll also drop the "\n" for die() to make diagnostics easier.
There's no known bugs in this area, just consistency
improvements and LoC reduction.
---
lib/PublicInbox/SearchIdxShard.pm | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm
index 831be51b..21bd56c2 100644
--- a/lib/PublicInbox/SearchIdxShard.pm
+++ b/lib/PublicInbox/SearchIdxShard.pm
@@ -32,12 +32,10 @@ sub new {
$self;
}
-sub _worker_done {
+sub _worker_done { # OnDestroy cb
my ($self) = @_;
- if ($self->need_xapian) {
- die "$$ $0 xdb not released\n" if $self->{xdb};
- }
- die "$$ $0 still in transaction\n" if $self->{txn};
+ die "BUG: $$ $0 xdb active" if $self->need_xapian && $self->{xdb};
+ die "BUG: $$ $0 txn active" if $self->{txn};
}
sub ipc_atfork_child { # called automatically before ipc_worker_loop
@@ -64,7 +62,7 @@ sub echo {
sub idx_close {
my ($self) = @_;
- die "transaction in progress $self\n" if $self->{txn};
+ die "BUG: $$ $0 txn active" if $self->{txn};
$self->idx_release if $self->{xdb};
}
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 5/6] searchidx: reduce short-lived variables for TermGenerator
2023-04-25 11:02 [PATCH 0/6] cleanups and error message consistency Eric Wong
` (3 preceding siblings ...)
2023-04-25 11:02 ` [PATCH 4/6] searchidxshard: use BUG error messages more consistently Eric Wong
@ 2023-04-25 11:02 ` Eric Wong
2023-04-25 11:02 ` [PATCH 6/6] emergency: make error messages more consistent Eric Wong
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-25 11:02 UTC (permalink / raw)
To: meta
We can avoid needless refcount traffic in some cases.
---
lib/PublicInbox/SearchIdx.pm | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 3415cce4..cfc2d544 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -178,9 +178,8 @@ sub term_generator ($) { # write-only
sub index_phrase ($$$$) {
my ($self, $text, $wdf_inc, $prefix) = @_;
- my $tg = term_generator($self);
- $tg->index_text($text, $wdf_inc, $prefix);
- $tg->increase_termpos;
+ term_generator($self)->index_text($text, $wdf_inc, $prefix);
+ $self->{term_generator}->increase_termpos;
}
sub index_text ($$$$) {
@@ -189,8 +188,8 @@ sub index_text ($$$$) {
if ($self->{indexlevel} eq 'full') {
index_phrase($self, $text, $wdf_inc, $prefix);
} else {
- my $tg = term_generator($self);
- $tg->index_text_without_positions($text, $wdf_inc, $prefix);
+ term_generator($self)->index_text_without_positions(
+ $text, $wdf_inc, $prefix);
}
}
@@ -457,8 +456,7 @@ sub eml2doc ($$$;$) {
add_val($doc, PublicInbox::Search::UID(), $smsg->{num});
add_val($doc, PublicInbox::Search::THREADID, $smsg->{tid});
- my $tg = term_generator($self);
- $tg->set_document($doc);
+ term_generator($self)->set_document($doc);
index_headers($self, $smsg);
if (defined(my $eidx_key = $smsg->{eidx_key})) {
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 6/6] emergency: make error messages more consistent
2023-04-25 11:02 [PATCH 0/6] cleanups and error message consistency Eric Wong
` (4 preceding siblings ...)
2023-04-25 11:02 ` [PATCH 5/6] searchidx: reduce short-lived variables for TermGenerator Eric Wong
@ 2023-04-25 11:02 ` Eric Wong
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-25 11:02 UTC (permalink / raw)
To: meta
Showing "failed" is needless if we already know the program
is die-ing. We'll prefix "BUG:" to bug messages, "W:" to
non-fatal warnings to be consistent with our newer code such
as lei.
---
lib/PublicInbox/Emergency.pm | 21 ++++++++++-----------
1 file changed, 10 insertions(+), 11 deletions(-)
diff --git a/lib/PublicInbox/Emergency.pm b/lib/PublicInbox/Emergency.pm
index 56c58dd1..74a4069a 100644
--- a/lib/PublicInbox/Emergency.pm
+++ b/lib/PublicInbox/Emergency.pm
@@ -1,10 +1,9 @@
-# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Emergency Maildir delivery for MDA
package PublicInbox::Emergency;
-use strict;
-use v5.10.1;
+use v5.12;
use Fcntl qw(:DEFAULT SEEK_SET);
use Sys::Hostname qw(hostname);
use IO::Handle; # ->flush
@@ -35,14 +34,14 @@ sub prepare {
my ($self, $strref) = @_;
my $pid = $$;
my $tmp_key = "tmp.$pid";
- die "already in transaction: $self->{$tmp_key}" if $self->{$tmp_key};
+ die "BUG: in transaction: $self->{$tmp_key}" if $self->{$tmp_key};
my ($tmp, $fh);
do {
$tmp = _fn_in($self, $pid, 'tmp');
$! = undef;
} while (!sysopen($fh, $tmp, O_CREAT|O_EXCL|O_RDWR) and $! == EEXIST);
- print $fh $$strref or die "write failed: $!";
- $fh->flush or die "flush failed: $!";
+ print $fh $$strref or die "print: $!";
+ $fh->flush or die "flush: $!";
$self->{fh} = $fh;
$self->{$tmp_key} = $tmp;
}
@@ -51,15 +50,15 @@ sub abort {
my ($self) = @_;
delete $self->{fh};
my $tmp = delete $self->{"tmp.$$"} or return;
- unlink($tmp) or warn "Failed to unlink $tmp: $!";
+ unlink($tmp) or warn "W: unlink($tmp): $!";
undef;
}
sub fh {
my ($self) = @_;
- my $fh = $self->{fh} or die "{fh} not open!\n";
- seek($fh, 0, SEEK_SET) or die "seek(fh) failed: $!";
- sysseek($fh, 0, SEEK_SET) or die "sysseek(fh) failed: $!";
+ my $fh = $self->{fh} or die "BUG: {fh} not open";
+ seek($fh, 0, SEEK_SET) or die "seek: $!";
+ sysseek($fh, 0, SEEK_SET) or die "sysseek: $!";
$fh;
}
@@ -73,7 +72,7 @@ sub commit {
$new = _fn_in($self, $pid, 'new');
} while (!($ok = link($tmp, $new)) && $! == EEXIST);
die "link($tmp, $new): $!" unless $ok;
- unlink($tmp) or warn "Failed to unlink $tmp: $!";
+ unlink($tmp) or warn "W: unlink($tmp): $!";
}
sub DESTROY { commit($_[0]) }
^ permalink raw reply related [flat|nested] 7+ messages in thread