From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 02/10] lei: support remote externals
Date: Sat, 23 Jan 2021 10:27:47 +0000 [thread overview]
Message-ID: <20210123102755.425-3-e@80x24.org> (raw)
In-Reply-To: <20210123102755.425-1-e@80x24.org>
Via curl(1), since that lets us easily use tor on a
per-connection basis via LD_PRELOAD (torsocks) or proxy.
We'll eventually support more curl options which can allow
users to get past firewalls and deal with other odd network
configurations.
---
lib/PublicInbox/LEI.pm | 19 ++++++++++--
lib/PublicInbox/LeiOverview.pm | 10 +++++-
lib/PublicInbox/LeiToMail.pm | 20 +++++++-----
lib/PublicInbox/LeiXSearch.pm | 57 +++++++++++++++++++++++++++++++++-
lib/PublicInbox/ProcessPipe.pm | 2 ++
script/lei | 2 ++
t/lei.t | 39 +++++++++++++++++++++++
7 files changed, 137 insertions(+), 12 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index ef3f90fc..f6bc920d 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -84,6 +84,7 @@ our %CMD = ( # sorted in order of importance/use:
'q' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw(
save-as=s output|mfolder|o=s format|f=s dedupe|d=s thread|t augment|a
sort|s=s reverse|r offset=i remote local! external! pretty mua-cmd=s
+ verbose|v
since|after=s until|before=s), opt_dash('limit|n=i', '[0-9]+') ],
'show' => [ 'MID|OID', 'show a given object (Message-ID or object ID)',
@@ -278,6 +279,16 @@ sub fail ($$;$) {
undef;
}
+sub child_error { # passes non-fatal curl exit codes to user
+ my ($self, $child_error) = @_; # child_error is $?
+ if (my $sock = $self->{sock}) { # send to lei(1) client
+ send($sock, "child_error $child_error", MSG_EOR);
+ } else { # oneshot
+ $self->{child_error} = $child_error;
+ }
+ undef;
+}
+
sub atfork_prepare_wq {
my ($self, $wq) = @_;
my $tcafc = $wq->{-ipc_atfork_child_close} //= [ $listener // () ];
@@ -959,19 +970,21 @@ sub lazy_start {
exit($exit_code // 0);
}
-# for users w/o Socket::Msghdr
+# for users w/o Socket::Msghdr installed or Inline::C enabled
sub oneshot {
my ($main_pkg) = @_;
my $exit = $main_pkg->can('exit'); # caller may override exit()
local $quit = $exit if $exit;
local %PATH2CFG;
umask(077) // die("umask(077): $!");
- dispatch((bless {
+ my $self = bless {
0 => *STDIN{GLOB},
1 => *STDOUT{GLOB},
2 => *STDERR{GLOB},
env => \%ENV
- }, __PACKAGE__), @ARGV);
+ }, __PACKAGE__;
+ dispatch($self, @ARGV);
+ x_it($self, $self->{child_error}) if $self->{child_error};
}
# ensures stdout hits the FS before sock disconnects so a client
diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index 7a4fa857..49538a60 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -209,7 +209,15 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
$json->ascii(1) if $lei->{opt}->{ascii};
}
my $l2m = $lei->{l2m};
- if ($l2m && $l2m->{-wq_s1}) {
+ if ($l2m && $ibxish->can('scheme')) { # remote https?:// mboxrd
+ delete $l2m->{-wq_s1};
+ my $g2m = $l2m->can('git_to_mail');
+ my $wcb = $l2m->write_cb($lei);
+ sub {
+ my ($smsg, undef, $eml) = @_; # no mitem in $_[1]
+ $wcb->(undef, $smsg, $eml);
+ };
+ } elsif ($l2m && $l2m->{-wq_s1}) {
my ($lei_ipc, @io) = $lei->atfork_parent_wq($l2m);
# n.b. $io[0] = qry_status_wr, $io[1] = mbox|stdout,
# $io[4] becomes a notification pipe that triggers EOF
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index cea68319..43c59da0 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -251,9 +251,9 @@ sub _mbox_write_cb ($$) {
my $dedupe = $lei->{dedupe};
$dedupe->prepare_dedupe;
sub { # for git_to_mail
- my ($buf, $smsg) = @_;
+ my ($buf, $smsg, $eml) = @_;
return unless $out;
- my $eml = PublicInbox::Eml->new($buf);
+ $eml //= PublicInbox::Eml->new($buf);
if (!$dedupe->is_dup($eml, $smsg->{blob})) {
$buf = $eml2mbox->($eml, $smsg);
my $lk = $ovv->lock_for_scope;
@@ -286,18 +286,23 @@ sub _augment_file { # _maildir_each_file cb
# _maildir_each_file callback, \&CORE::unlink doesn't work with it
sub _unlink { unlink($_[0]) }
+sub _rand () {
+ state $seq = 0;
+ sprintf('%x,%x,%x,%x', rand(0xffffffff), time, $$, ++$seq);
+}
+
sub _buf2maildir {
my ($dst, $buf, $smsg) = @_;
my $kw = $smsg->{kw} // [];
my $sfx = join('', sort(map { $kw2char{$_} // () } @$kw));
my $rand = ''; # chosen by die roll :P
my ($tmp, $fh, $final);
- my $common = $smsg->{blob};
+ my $common = $smsg->{blob} // _rand;
if (defined(my $pct = $smsg->{pct})) { $common .= "=$pct" }
do {
$tmp = $dst.'tmp/'.$rand.$common;
} while (!sysopen($fh, $tmp, O_CREAT|O_EXCL|O_WRONLY) &&
- $! == EEXIST && ($rand = int(rand 0x7fffffff).','));
+ $! == EEXIST && ($rand = _rand.','));
if (print $fh $$buf and close($fh)) {
# ignore new/ and write only to cur/, otherwise MUAs
# with R/W access to the Maildir will end up doing
@@ -308,7 +313,7 @@ sub _buf2maildir {
do {
$final = $dst.$rand.$common.':2,'.$sfx;
} while (!link($tmp, $final) && $! == EEXIST &&
- ($rand = int(rand 0x7fffffff).','));
+ ($rand = _rand.','));
unlink($tmp) or warn "W: failed to unlink $tmp: $!\n";
} else {
my $err = $!;
@@ -323,9 +328,10 @@ sub _maildir_write_cb ($$) {
$dedupe->prepare_dedupe;
my $dst = $lei->{ovv}->{dst};
sub { # for git_to_mail
- my ($buf, $smsg) = @_;
+ my ($buf, $smsg, $eml) = @_;
+ $buf //= \($eml->as_string);
return _buf2maildir($dst, $buf, $smsg) if !$dedupe;
- my $eml = PublicInbox::Eml->new($$buf); # copy buf
+ $eml //= PublicInbox::Eml->new($$buf); # copy buf
return if $dedupe->is_dup($eml, $smsg->{blob});
undef $eml;
_buf2maildir($dst, $buf, $smsg);
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 10c25246..d32fe09a 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -14,6 +14,7 @@ use PublicInbox::Import;
use File::Temp 0.19 (); # 0.19 for ->newdir
use File::Spec ();
use PublicInbox::Search qw(xap_terms);
+use PublicInbox::Spawn qw(popen_rd);
sub new {
my ($class) = @_;
@@ -169,8 +170,58 @@ sub query_mset { # non-parallel for non-"--thread" users
$lei->{ovv}->ovv_atexit_child($lei);
}
+sub each_eml { # callback for MboxReader->mboxrd
+ my ($eml, $self, $lei, $each_smsg) = @_;
+ my $smsg = bless {}, 'PublicInbox::Smsg';
+ $smsg->populate($eml);
+ $smsg->{$_} //= '' for qw(from to cc ds subject references mid);
+ delete @$smsg{qw(From Subject -ds -ts)};
+ if (my $startq = delete($self->{5})) { wait_startq($startq) }
+ return if !$lei->{l2m} && $lei->{dedupe}->is_smsg_dup($smsg);
+ $each_smsg->($smsg, undef, $eml);
+}
+
sub query_remote_mboxrd {
my ($self, $lei, $uri) = @_;
+ local $0 = "$0 query_remote_mboxrd";
+ my %sig = $lei->atfork_child_wq($self); # keep $self->{5} startq
+ local @SIG{keys %sig} = values %sig;
+ my $opt = $lei->{opt};
+ $uri->query_form(q => $lei->{mset_opt}->{qstr}, x => 'm',
+ $opt->{thread} ? (t => 1) : ());
+ my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $uri);
+ my $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing';
+ $dedupe->prepare_dedupe;
+ my @cmd = qw(curl -XPOST -sSf);
+ my $tor = $opt->{torsocks} //= 'auto';
+ if ($tor eq 'auto' && substr($uri->host, -6) eq '.onion' &&
+ (($lei->{env}->{LD_PRELOAD}//'') !~ /torsocks/)) {
+ unshift @cmd, 'torsocks';
+ } elsif (PublicInbox::Config::git_bool($tor)) {
+ unshift @cmd, 'torsocks';
+ }
+ my $verbose = $opt->{verbose};
+ push @cmd, '-v' if $verbose;
+ push @cmd, $uri->as_string;
+ $lei->err("# @cmd") if $verbose;
+ $? = 0;
+ my $fh = popen_rd(\@cmd, $lei->{env}, { 2 => $lei->{2} });
+ $fh = IO::Uncompress::Gunzip->new($fh);
+ eval {
+ PublicInbox::MboxReader->mboxrd($fh, \&each_eml,
+ $self, $lei, $each_smsg);
+ };
+ return $lei->fail("E: @cmd: $@") if $@;
+ if (($? >> 8) == 22) { # HTTP 404 from curl(1)
+ $uri->query_form(q => $lei->{mset_opt}->{qstr});
+ $lei->err('# no results from '.$uri->as_string);
+ } elsif ($?) {
+ $uri->query_form(q => $lei->{mset_opt}->{qstr});
+ $lei->err('E: '.$uri->as_string);
+ $lei->child_error($?);
+ }
+ undef $each_smsg;
+ $lei->{ovv}->ovv_atexit_child($lei);
}
sub git {
@@ -230,7 +281,6 @@ sub start_query { # always runs in main (lei-daemon) process
} else {
$self->wq_do('query_mset', $io, $lei);
}
- # TODO
for my $uri (remotes($self)) {
$self->wq_do('query_remote_mboxrd', $io, $lei, $uri);
}
@@ -263,6 +313,7 @@ sub do_query {
my ($lei, @io) = $lei_orig->atfork_parent_wq($self);
$io[0] = undef;
pipe(my $done, $io[0]) or die "pipe $!";
+ $lei_orig->{1}->autoflush(1);
$lei_orig->event_step_init; # wait for shutdowns
my $done_op = {
@@ -296,6 +347,10 @@ sub do_query {
sub ipc_atfork_prepare {
my ($self) = @_;
+ if (exists $self->{remotes}) {
+ require PublicInbox::MboxReader;
+ require IO::Uncompress::Gunzip;
+ }
# (0: done_wr, 1: stdout|mbox, 2: stderr,
# 3: sock, 4: $l2m->{-wq_s1}, 5: $startq)
$self->wq_set_recv_modes(qw[+<&= >&= >&= +<&= +<&= <&=]);
diff --git a/lib/PublicInbox/ProcessPipe.pm b/lib/PublicInbox/ProcessPipe.pm
index e540dc22..97e9c268 100644
--- a/lib/PublicInbox/ProcessPipe.pm
+++ b/lib/PublicInbox/ProcessPipe.pm
@@ -13,6 +13,8 @@ sub TIEHANDLE {
$class;
}
+sub BINMODE { binmode(shift->{fh}) } # for IO::Uncompress::Gunzip
+
sub READ { read($_[0]->{fh}, $_[1], $_[2], $_[3] || 0) }
sub READLINE { readline($_[0]->{fh}) }
diff --git a/script/lei b/script/lei
index 8dcea562..8c40bf12 100755
--- a/script/lei
+++ b/script/lei
@@ -93,6 +93,8 @@ Falling back to (slow) one-shot mode
if ($buf =~ /\Ax_it ([0-9]+)\z/) {
$x_it_code = $1 + 0;
last;
+ } elsif ($buf =~ /\Achild_error ([0-9]+)\z/) {
+ $x_it_code = $1 + 0;
} elsif ($buf =~ /\Aexec (.+)\z/) {
exec_cmd(\@fds, split(/\0/, $1));
} else {
diff --git a/t/lei.t b/t/lei.t
index 50ad2bb1..6b45f5b7 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -8,11 +8,15 @@ use PublicInbox::TestCommon;
use PublicInbox::Config;
use File::Path qw(rmtree);
use Fcntl qw(SEEK_SET);
+use PublicInbox::Spawn qw(which);
require_git 2.6;
require_mods(qw(json DBD::SQLite Search::Xapian));
my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
my ($home, $for_destroy) = tmpdir();
my $err_filter;
+my @onions = qw(http://hjrcffqmbrq6wope.onion/meta/
+ http://czquwvybam4bgbro.onion/meta/
+ http://ou63pmih66umazou.onion/meta/);
my $lei = sub {
my ($cmd, $env, $xopt) = @_;
$out = $err = '';
@@ -155,6 +159,32 @@ my $setup_publicinboxes = sub {
$seen || BAIL_OUT 'no imports';
};
+my $test_external_remote = sub {
+ my ($url, $k) = @_;
+SKIP: {
+ my $nr = 4;
+ skip "$k unset", $nr if !$url;
+ which('curl') or skip 'no curl', $nr;
+ which('torsocks') or skip 'no torsocks', $nr if $url =~ m!\.onion/!;
+ $lei->('ls-external');
+ for my $e (split(/^/ms, $out)) {
+ $e =~ s/\s+boost.*//s;
+ $lei->('forget-external', '-q', $e) or
+ fail "error forgetting $e: $err"
+ }
+ $lei->('add-external', $url);
+ my $mid = '20140421094015.GA8962@dcvr.yhbt.net';
+ ok($lei->('q', "m:$mid"), "query $url");
+ is($err, '', "no errors on $url");
+ my $res = PublicInbox::Config->json->decode($out);
+ is($res->[0]->{'m'}, "<$mid>", "got expected mid from $url");
+ ok($lei->('q', "m:$mid", 'd:..20101002'), 'no results, no error');
+ like($err, qr/404/, 'noted 404');
+ is($out, "[null]\n", 'got null results');
+ $lei->('forget-external', $url);
+} # /SKIP
+}; # /sub
+
my $test_external = sub {
$setup_publicinboxes->();
$cleanup->();
@@ -243,6 +273,15 @@ my $test_external = sub {
}
ok(!$lei->('q', '-o', "$home/mbox", 's:nope'),
'fails if mbox format unspecified');
+ my %e = (
+ TEST_LEI_EXTERNAL_HTTPS => 'https://public-inbox.org/meta/',
+ TEST_LEI_EXTERNAL_ONION => $onions[int(rand(scalar(@onions)))],
+ );
+ for my $k (keys %e) {
+ my $url = $ENV{$k} // '';
+ $url = $e{$k} if $url eq '1';
+ $test_external_remote->($url, $k);
+ }
};
my $test_lei_common = sub {
next prev parent reply other threads:[~2021-01-23 10:27 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-01-23 10:27 [PATCH 00/10] lei: externals more stuff Eric Wong
2021-01-23 10:27 ` [PATCH 01/10] lei: move external vivification to xsearch Eric Wong
2021-01-23 10:27 ` Eric Wong [this message]
2021-01-24 6:01 ` [PATCH 02/10] lei: support remote externals Kyle Meyer
2021-01-24 12:02 ` Eric Wong
2021-01-24 12:12 ` Eric Wong
2021-01-24 22:11 ` Kyle Meyer
2021-01-25 18:37 ` Eric Wong
2021-01-23 10:27 ` [PATCH 03/10] lei_to_mail: drop cyclic reference if not using IPC Eric Wong
2021-01-23 10:27 ` [PATCH 04/10] lei: oneshot: preserve stdout if writing mbox Eric Wong
2021-01-23 10:27 ` [PATCH 05/10] lei: default "-f $mfolder" args for common MUAs Eric Wong
2021-01-23 10:27 ` [PATCH 06/10] lei completion: handle URLs with port numbers Eric Wong
2021-01-23 10:27 ` [PATCH 07/10] lei forget-external: just show the location Eric Wong
2021-01-23 10:27 ` [PATCH 08/10] lei q: support a bunch of curl(1) options Eric Wong
2021-01-23 10:27 ` [PATCH 09/10] lei forget-external: don't show redundant "not found" Eric Wong
2021-01-23 10:27 ` [PATCH 10/10] lei add-external: don't allow non-existent directories Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210123102755.425-3-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).