Patch 2 helps lei users avoid wasting bandwidth. I've been running patches 6 and 7 on https://yhbt.net/lore/ for a while, now; and memory use seems far lower. The rest are pretty minor things, I think. Eric Wong (7): lei: always pass $lei to LeiAuth->op_merge lei export-kw: skip read-only IMAP folders shared_kv: remove cache_size attribute support http: use a larger buffer for ->getline responses listener: emit warnings on EPERM thread: avoid Perl5 internal scratchpad target cache git: avoid Perl5 internal scratchpad target cache lib/PublicInbox/Git.pm | 3 ++- lib/PublicInbox/HTTP.pm | 2 +- lib/PublicInbox/LeiExportKw.pm | 14 ++++++++++++-- lib/PublicInbox/LeiForgetSearch.pm | 2 +- lib/PublicInbox/LeiImport.pm | 2 +- lib/PublicInbox/LeiLsMailSource.pm | 2 +- lib/PublicInbox/LeiMailDiff.pm | 2 +- lib/PublicInbox/LeiRefreshMailSync.pm | 2 +- lib/PublicInbox/LeiTag.pm | 2 +- lib/PublicInbox/LeiXSearch.pm | 2 +- lib/PublicInbox/Listener.pm | 8 ++------ lib/PublicInbox/NetWriter.pm | 9 +++++++++ lib/PublicInbox/SearchThread.pm | 22 +++++++++++----------- lib/PublicInbox/SearchView.pm | 4 ++-- lib/PublicInbox/SharedKV.pm | 3 --- lib/PublicInbox/View.pm | 4 ++-- t/lei-export-kw.t | 2 +- t/lei-import-imap.t | 3 +++ t/thread-cycle.t | 4 ++-- 19 files changed, 54 insertions(+), 38 deletions(-)
This will make future developments easier. --- lib/PublicInbox/LeiExportKw.pm | 2 +- lib/PublicInbox/LeiForgetSearch.pm | 2 +- lib/PublicInbox/LeiImport.pm | 2 +- lib/PublicInbox/LeiLsMailSource.pm | 2 +- lib/PublicInbox/LeiMailDiff.pm | 2 +- lib/PublicInbox/LeiRefreshMailSync.pm | 2 +- lib/PublicInbox/LeiTag.pm | 2 +- lib/PublicInbox/LeiXSearch.pm | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/PublicInbox/LeiExportKw.pm b/lib/PublicInbox/LeiExportKw.pm index 5be9e51f..756d0e9c 100644 --- a/lib/PublicInbox/LeiExportKw.pm +++ b/lib/PublicInbox/LeiExportKw.pm @@ -115,7 +115,7 @@ EOM $self->{nwr}->{-skip_creat} = 1; } my $ops = {}; - $lei->{auth}->op_merge($ops, $self) if $lei->{auth}; + $lei->{auth}->op_merge($ops, $self, $lei) if $lei->{auth}; (my $op_c, $ops) = $lei->workers_start($self, 1, $ops); $lei->{wq1} = $self; $lei->{-err_type} = 'non-fatal'; diff --git a/lib/PublicInbox/LeiForgetSearch.pm b/lib/PublicInbox/LeiForgetSearch.pm index f353fe52..dfeb0293 100644 --- a/lib/PublicInbox/LeiForgetSearch.pm +++ b/lib/PublicInbox/LeiForgetSearch.pm @@ -46,7 +46,7 @@ sub lei_forget_search { $self->prepare_inputs($lei, $self->{o_remote}) or return; } my $ops = {}; - $lei->{auth}->op_merge($ops, $self) if $lei->{auth}; + $lei->{auth}->op_merge($ops, $self, $lei) if $lei->{auth}; (my $op_c, $ops) = $lei->workers_start($self, 1, $ops); $lei->{wq1} = $self; net_merge_all_done($self) unless $lei->{auth}; diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index 2f8fd6c6..d8f39fdf 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -103,7 +103,7 @@ sub do_import_index ($$@) { ($lei->{opt}->{'new-only'} && (!$net || !$net->{imap_order})) and warn "# --new-only is only for IMAP\n"; my $ops = {}; - $lei->{auth}->op_merge($ops, $self) if $lei->{auth}; + $lei->{auth}->op_merge($ops, $self, $lei) if $lei->{auth}; $lei->{-eml_noisy} = 1; (my $op_c, $ops) = $lei->workers_start($self, $j, $ops); $lei->{wq1} = $self; diff --git a/lib/PublicInbox/LeiLsMailSource.pm b/lib/PublicInbox/LeiLsMailSource.pm index 7c3c0cda..5eb7032d 100644 --- a/lib/PublicInbox/LeiLsMailSource.pm +++ b/lib/PublicInbox/LeiLsMailSource.pm @@ -96,7 +96,7 @@ sub lei_ls_mail_source { } $lei->start_pager if $isatty; my $ops = {}; - $lei->{auth}->op_merge($ops, $self); + $lei->{auth}->op_merge($ops, $self, $lei); (my $op_c, $ops) = $lei->workers_start($self, 1, $ops); $lei->{wq1} = $self; $lei->{-err_type} = 'non-fatal'; diff --git a/lib/PublicInbox/LeiMailDiff.pm b/lib/PublicInbox/LeiMailDiff.pm index a29ae225..4f3a4608 100644 --- a/lib/PublicInbox/LeiMailDiff.pm +++ b/lib/PublicInbox/LeiMailDiff.pm @@ -83,7 +83,7 @@ sub lei_mail_diff { $lei->{opt}->{color} //= $isatty; $lei->start_pager if $isatty; my $ops = {}; - $lei->{auth}->op_merge($ops, $self) if $lei->{auth}; + $lei->{auth}->op_merge($ops, $self, $lei) if $lei->{auth}; (my $op_c, $ops) = $lei->workers_start($self, 1, $ops); $lei->{wq1} = $self; $lei->{-err_type} = 'non-fatal'; diff --git a/lib/PublicInbox/LeiRefreshMailSync.pm b/lib/PublicInbox/LeiRefreshMailSync.pm index 0cb9f3dc..f516f572 100644 --- a/lib/PublicInbox/LeiRefreshMailSync.pm +++ b/lib/PublicInbox/LeiRefreshMailSync.pm @@ -82,7 +82,7 @@ EOM $lei->{opt}->{'mail-sync'} = 1; # for prepare_inputs $self->prepare_inputs($lei, \@folders) or return; my $ops = {}; - $lei->{auth}->op_merge($ops, $self) if $lei->{auth}; + $lei->{auth}->op_merge($ops, $self, $lei) if $lei->{auth}; (my $op_c, $ops) = $lei->workers_start($self, 1, $ops); $lei->{wq1} = $self; $lei->{-err_type} = 'non-fatal'; diff --git a/lib/PublicInbox/LeiTag.pm b/lib/PublicInbox/LeiTag.pm index 817b87f8..77654d1a 100644 --- a/lib/PublicInbox/LeiTag.pm +++ b/lib/PublicInbox/LeiTag.pm @@ -49,7 +49,7 @@ sub lei_tag { # the "lei tag" method grep(defined, @$vmd_mod{qw(+kw +L -L -kw)}) or return $lei->fail('no keywords or labels specified'); my $ops = {}; - $lei->{auth}->op_merge($ops, $self) if $lei->{auth}; + $lei->{auth}->op_merge($ops, $self, $lei) if $lei->{auth}; (my $op_c, $ops) = $lei->workers_start($self, 1, $ops); $lei->{wq1} = $self; $lei->{-err_type} = 'non-fatal'; diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index 119070a2..acc36897 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -548,7 +548,7 @@ sub do_query { 'child_error' => [ $lei ], 'incr_start_query' => [ $self, $lei ], }; - $lei->{auth}->op_merge($ops, $l2m) if $l2m && $lei->{auth}; + $lei->{auth}->op_merge($ops, $l2m, $lei) if $l2m && $lei->{auth}; my $end = $lei->pkt_op_pair; $lei->{1}->autoflush(1); $lei->start_pager if delete $lei->{need_pager};
Since we want to store IMAP flags asynchronously and not wait for results, we can't check for IMAP errors this way and end up wasting bandwidth on public-inbox-imapd. Now, we just check PERMANENTFLAGS up front to ensure a folder can handle IMAP flag storage before proceeding. --- lib/PublicInbox/LeiExportKw.pm | 12 +++++++++++- lib/PublicInbox/NetWriter.pm | 9 +++++++++ t/lei-export-kw.t | 2 +- t/lei-import-imap.t | 3 +++ 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/LeiExportKw.pm b/lib/PublicInbox/LeiExportKw.pm index 756d0e9c..0ecfb782 100644 --- a/lib/PublicInbox/LeiExportKw.pm +++ b/lib/PublicInbox/LeiExportKw.pm @@ -67,7 +67,17 @@ sub input_path_url { $self->{lms}->each_src($input, \&export_kw_md, $self, $mdir); } elsif ($input =~ m!\Aimaps?://!i) { my $uri = PublicInbox::URIimap->new($input); - if (my $mic = $self->{nwr}->mic_for_folder($uri)) { + my $mic = $self->{nwr}->mic_for_folder($uri); + if ($mic && !$self->{nwr}->can_store_flags($mic)) { + my $m = "$input does not support PERMANENTFLAGS"; + if (defined $self->{lei}->{opt}->{all}) { + $self->{lei}->qerr("# $m"); + } else { # set error code if user explicitly requested + $self->{lei}->child_error(0, "E: $m"); + } + return; + } + if ($mic) { $self->{lms}->each_src($$uri, \&export_kw_imap, $self, $mic); $mic->expunge; diff --git a/lib/PublicInbox/NetWriter.pm b/lib/PublicInbox/NetWriter.pm index 629a752a..4a1f34f6 100644 --- a/lib/PublicInbox/NetWriter.pm +++ b/lib/PublicInbox/NetWriter.pm @@ -56,4 +56,13 @@ sub imap_set_kw { $mic; # caller must ->expunge } +sub can_store_flags { + my ($self, $mic) = @_; + for ($mic->Results) { + /^\* OK \[PERMANENTFLAGS \(([^\)]*)\)\].*/ and + return $self->can('perm_fl_ok')->($1); + } + undef; +} + 1; diff --git a/t/lei-export-kw.t b/t/lei-export-kw.t index 55730e87..88b2a80b 100644 --- a/t/lei-export-kw.t +++ b/t/lei-export-kw.t @@ -4,7 +4,7 @@ use strict; use v5.10.1; use PublicInbox::TestCommon; use File::Copy qw(cp); use File::Path qw(make_path); -require_mods(qw(lei -imapd Mail::IMAPClient)); +require_mods(qw(lei)); # see lei-import-imap.t for IMAP tests my ($tmpdir, $for_destroy) = tmpdir; my $expect = eml_load('t/data/0001.patch'); my $do_export_kw = 1; diff --git a/t/lei-import-imap.t b/t/lei-import-imap.t index 315567b3..3b6cb299 100644 --- a/t/lei-import-imap.t +++ b/t/lei-import-imap.t @@ -110,6 +110,9 @@ test_lei({ tmpdir => $tmpdir }, sub { is(scalar(@$out), 2, 'got JSON') or diag explain($out); lei_ok qw(lcat), $url_orig; is($lei_out, $orig, 'lcat w/o UID works'); + + ok(!lei(qw(export-kw), $url_orig), 'export-kw fails on read-only IMAP'); + like($lei_err, qr/does not support/, 'error noted in failure'); }); done_testing;
We're not using it, anywhere. --- lib/PublicInbox/SharedKV.pm | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/PublicInbox/SharedKV.pm b/lib/PublicInbox/SharedKV.pm index 398f4ca8..27407f83 100644 --- a/lib/PublicInbox/SharedKV.pm +++ b/lib/PublicInbox/SharedKV.pm @@ -27,9 +27,6 @@ sub dbh { }); my $opt = $self->{opt} // {}; $dbh->do('PRAGMA synchronous = OFF') if !$opt->{fsync}; - if (my $s = $opt->{cache_size}) { - $dbh->do("PRAGMA cache_size = $s"); - } $dbh->do('PRAGMA journal_mode = '. ($opt->{journal_mode} // 'WAL')); $dbh->do(<<'');
64K matches the Linux pipe default, and matches what we use in httpd/async and qspawn. This should reduce syscalls used for serving git packs via dumb HTTP and any ->getline code paths used by other PSGI code. This appears to speed up HTML rendering by w3m when serving giant HTML responsees from the Devel::Mwrap::PSGI memory debugger. --- lib/PublicInbox/HTTP.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm index 18a19250..e65988be 100644 --- a/lib/PublicInbox/HTTP.pm +++ b/lib/PublicInbox/HTTP.pm @@ -235,7 +235,7 @@ sub getline_pull { # limit our own running time for fairness with other # clients and to avoid buffering too much: my $buf = eval { - local $/ = \8192; + local $/ = \65536; $forward->getline; } if $forward;
In retrospect, warnings for EPERM on accept4(2) failure may help detect misconfigured firewalls, so start emitting warnings for EPERM. Fwiw, I've never known excessive EPERM warnings to be excessively noisy in other TCP services I've run over the years. --- lib/PublicInbox/Listener.pm | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/lib/PublicInbox/Listener.pm b/lib/PublicInbox/Listener.pm index 09f1f2e5..7cedc349 100644 --- a/lib/PublicInbox/Listener.pm +++ b/lib/PublicInbox/Listener.pm @@ -8,7 +8,7 @@ use parent 'PublicInbox::DS'; use Socket qw(SOL_SOCKET SO_KEEPALIVE IPPROTO_TCP TCP_NODELAY); use IO::Handle; use PublicInbox::Syscall qw(EPOLLIN EPOLLEXCLUSIVE); -use Errno qw(EAGAIN ECONNABORTED EPERM); +use Errno qw(EAGAIN ECONNABORTED); # Warn on transient errors, mostly resource limitations. # EINTR would indicate the failure to set NonBlocking in systemd or similar @@ -38,13 +38,9 @@ sub event_step { IO::Handle::blocking($c, 0); # no accept4 :< eval { $self->{post_accept}->($c, $addr, $sock) }; warn "E: $@\n" if $@; - } elsif ($! == EAGAIN || $! == ECONNABORTED || $! == EPERM) { + } elsif ($! == EAGAIN || $! == ECONNABORTED) { # EAGAIN is common and likely # ECONNABORTED is common with bad connections - # EPERM happens if firewall rules prevent a connection - # on Linux (and everything that emulates Linux). - # Firewall rules are sometimes intentional, so we don't - # warn on EPERM to avoid being too noisy... return; } elsif (my $sym = $ERR_WARN{int($!)}) { warn "W: accept(): $! ($sym)\n";
The use of array-returning built-ins such as `grep' inside arrayref declarations appears to result in permanently allocated scratchpad space for caching according to my malloc inspector. Thread skeletons get discarded every response, but multiple skeletons can exist in memory at once, so do what we can to prevent long-lived allocations from being made, here. In other words, replacing constructs such as: my $foo = [ grep(...) ]; with: my @foo = grep(...); Seems to ensure the mortality of the underlying array. --- lib/PublicInbox/SearchThread.pm | 22 +++++++++++----------- lib/PublicInbox/SearchView.pm | 4 ++-- lib/PublicInbox/View.pm | 4 ++-- t/thread-cycle.t | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/lib/PublicInbox/SearchThread.pm b/lib/PublicInbox/SearchThread.pm index 507f25ba..f07dd696 100644 --- a/lib/PublicInbox/SearchThread.pm +++ b/lib/PublicInbox/SearchThread.pm @@ -83,15 +83,15 @@ sub thread { } } my $ibx = $ctx->{ibx}; - my $rootset = [ grep { # n.b.: delete prevents cyclic refs + my @rootset = grep { # n.b.: delete prevents cyclic refs !delete($_->{parent}) && $_->visible($ibx) - } values %id_table ]; - $rootset = $ordersub->($rootset); - $_->order_children($ordersub, $ctx) for @$rootset; + } values %id_table; + $ordersub->(\@rootset); + $_->order_children($ordersub, $ctx) for @rootset; # parent imposter messages with reused Message-IDs unshift(@{$id_table{$_->{mid}}->{children}}, $_) for @imposters; - $rootset; + \@rootset; } package PublicInbox::SearchThread::Msg; @@ -172,12 +172,12 @@ sub order_children { my @q = ($cur); my $ibx = $ctx->{ibx}; while (defined($cur = shift @q)) { - my $c = $cur->{children}; # The hashref here... - - $c = [ grep { !$seen{$_}++ && visible($_, $ibx) } values %$c ]; - $c = $ordersub->($c) if scalar @$c > 1; - $cur->{children} = $c; # ...becomes an arrayref - push @q, @$c; + # the {children} hashref here... + my @c = grep { !$seen{$_}++ && visible($_, $ibx) } + values %{$cur->{children}}; + $ordersub->(\@c) if scalar(@c) > 1; + $cur->{children} = \@c; # ...becomes an arrayref + push @q, @c; } } diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index a42867c5..b1cdb480 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -274,10 +274,10 @@ sub search_nav_bot { # also used by WwwListing for searching extindex miscidx } sub sort_relevance { - [ sort { + @{$_[0]} = sort { (eval { $b->topmost->{pct} } // 0) <=> (eval { $a->topmost->{pct} } // 0) - } @{$_[0]} ] + } @{$_[0]}; } sub mset_thread { diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 116aa641..2e9cf705 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -1073,10 +1073,10 @@ sub _skel_ghost { } sub sort_ds { - [ sort { + @{$_[0]} = sort { (eval { $a->topmost->{ds} } || 0) <=> (eval { $b->topmost->{ds} } || 0) - } @{$_[0]} ]; + } @{$_[0]}; } # accumulate recent topics if search is supported diff --git a/t/thread-cycle.t b/t/thread-cycle.t index e89b1846..1e5dfb51 100644 --- a/t/thread-cycle.t +++ b/t/thread-cycle.t @@ -108,7 +108,7 @@ SKIP: { eval 'package EmptyInbox; sub smsg_by_mid { undef }'; my $ctx = { ibx => bless {}, 'EmptyInbox' }; my $rootset = PublicInbox::SearchThread::thread($smsgs, sub { - [ sort { $a->{mid} cmp $b->{mid} } @{$_[0]} ] }, $ctx); + @{$_[0]} = sort { $a->{mid} cmp $b->{mid} } @{$_[0]} }, $ctx); my $oldout = select $fh; find_cycle($rootset); select $oldout; @@ -120,7 +120,7 @@ done_testing; sub thread_to_s { my ($msgs) = @_; my $rootset = PublicInbox::SearchThread::thread($msgs, sub { - [ sort { $a->{mid} cmp $b->{mid} } @{$_[0]} ] }); + @{$_[0]} = sort { $a->{mid} cmp $b->{mid} } @{$_[0]} }); my $st = ''; my @q = map { (0, $_) } @$rootset; while (@q) {
Creating a scalar ref directly off substr() seemed to be causing the underlying non-ref scalar to end up in Perl's scratchpad. Assign the substr result to a local variable seems sufficient to prevent multi-megabyte SVs from lingering indefinitely when a read-only daemon serves rare, oversized blobs. --- lib/PublicInbox/Git.pm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index e634ca55..9d2c5f75 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -158,7 +158,8 @@ sub my_read ($$$) { return; # unrecoverable error } } - \substr($$rbuf, 0, $len, ''); + my $no_pad = substr($$rbuf, 0, $len, ''); + \$no_pad; } sub my_readline ($$) {