unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 02/10] lei: support remote externals
Date: Sat, 23 Jan 2021 10:27:47 +0000	[thread overview]
Message-ID: <20210123102755.425-3-e@80x24.org> (raw)
In-Reply-To: <20210123102755.425-1-e@80x24.org>

Via curl(1), since that lets us easily use tor on a
per-connection basis via LD_PRELOAD (torsocks) or proxy.
We'll eventually support more curl options which can allow
users to get past firewalls and deal with other odd network
configurations.
---
 lib/PublicInbox/LEI.pm         | 19 ++++++++++--
 lib/PublicInbox/LeiOverview.pm | 10 +++++-
 lib/PublicInbox/LeiToMail.pm   | 20 +++++++-----
 lib/PublicInbox/LeiXSearch.pm  | 57 +++++++++++++++++++++++++++++++++-
 lib/PublicInbox/ProcessPipe.pm |  2 ++
 script/lei                     |  2 ++
 t/lei.t                        | 39 +++++++++++++++++++++++
 7 files changed, 137 insertions(+), 12 deletions(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index ef3f90fc..f6bc920d 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -84,6 +84,7 @@ our %CMD = ( # sorted in order of importance/use:
 'q' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw(
 	save-as=s output|mfolder|o=s format|f=s dedupe|d=s thread|t augment|a
 	sort|s=s reverse|r offset=i remote local! external! pretty mua-cmd=s
+	verbose|v
 	since|after=s until|before=s), opt_dash('limit|n=i', '[0-9]+') ],
 
 'show' => [ 'MID|OID', 'show a given object (Message-ID or object ID)',
@@ -278,6 +279,16 @@ sub fail ($$;$) {
 	undef;
 }
 
+sub child_error { # passes non-fatal curl exit codes to user
+	my ($self, $child_error) = @_; # child_error is $?
+	if (my $sock = $self->{sock}) { # send to lei(1) client
+		send($sock, "child_error $child_error", MSG_EOR);
+	} else { # oneshot
+		$self->{child_error} = $child_error;
+	}
+	undef;
+}
+
 sub atfork_prepare_wq {
 	my ($self, $wq) = @_;
 	my $tcafc = $wq->{-ipc_atfork_child_close} //= [ $listener // () ];
@@ -959,19 +970,21 @@ sub lazy_start {
 	exit($exit_code // 0);
 }
 
-# for users w/o Socket::Msghdr
+# for users w/o Socket::Msghdr installed or Inline::C enabled
 sub oneshot {
 	my ($main_pkg) = @_;
 	my $exit = $main_pkg->can('exit'); # caller may override exit()
 	local $quit = $exit if $exit;
 	local %PATH2CFG;
 	umask(077) // die("umask(077): $!");
-	dispatch((bless {
+	my $self = bless {
 		0 => *STDIN{GLOB},
 		1 => *STDOUT{GLOB},
 		2 => *STDERR{GLOB},
 		env => \%ENV
-	}, __PACKAGE__), @ARGV);
+	}, __PACKAGE__;
+	dispatch($self, @ARGV);
+	x_it($self, $self->{child_error}) if $self->{child_error};
 }
 
 # ensures stdout hits the FS before sock disconnects so a client
diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index 7a4fa857..49538a60 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -209,7 +209,15 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 		$json->ascii(1) if $lei->{opt}->{ascii};
 	}
 	my $l2m = $lei->{l2m};
-	if ($l2m && $l2m->{-wq_s1}) {
+	if ($l2m && $ibxish->can('scheme')) { # remote https?:// mboxrd
+		delete $l2m->{-wq_s1};
+		my $g2m = $l2m->can('git_to_mail');
+		my $wcb = $l2m->write_cb($lei);
+		sub {
+			my ($smsg, undef, $eml) = @_; # no mitem in $_[1]
+			$wcb->(undef, $smsg, $eml);
+		};
+	} elsif ($l2m && $l2m->{-wq_s1}) {
 		my ($lei_ipc, @io) = $lei->atfork_parent_wq($l2m);
 		# n.b. $io[0] = qry_status_wr, $io[1] = mbox|stdout,
 		# $io[4] becomes a notification pipe that triggers EOF
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index cea68319..43c59da0 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -251,9 +251,9 @@ sub _mbox_write_cb ($$) {
 	my $dedupe = $lei->{dedupe};
 	$dedupe->prepare_dedupe;
 	sub { # for git_to_mail
-		my ($buf, $smsg) = @_;
+		my ($buf, $smsg, $eml) = @_;
 		return unless $out;
-		my $eml = PublicInbox::Eml->new($buf);
+		$eml //= PublicInbox::Eml->new($buf);
 		if (!$dedupe->is_dup($eml, $smsg->{blob})) {
 			$buf = $eml2mbox->($eml, $smsg);
 			my $lk = $ovv->lock_for_scope;
@@ -286,18 +286,23 @@ sub _augment_file { # _maildir_each_file cb
 # _maildir_each_file callback, \&CORE::unlink doesn't work with it
 sub _unlink { unlink($_[0]) }
 
+sub _rand () {
+	state $seq = 0;
+	sprintf('%x,%x,%x,%x', rand(0xffffffff), time, $$, ++$seq);
+}
+
 sub _buf2maildir {
 	my ($dst, $buf, $smsg) = @_;
 	my $kw = $smsg->{kw} // [];
 	my $sfx = join('', sort(map { $kw2char{$_} // () } @$kw));
 	my $rand = ''; # chosen by die roll :P
 	my ($tmp, $fh, $final);
-	my $common = $smsg->{blob};
+	my $common = $smsg->{blob} // _rand;
 	if (defined(my $pct = $smsg->{pct})) { $common .= "=$pct" }
 	do {
 		$tmp = $dst.'tmp/'.$rand.$common;
 	} while (!sysopen($fh, $tmp, O_CREAT|O_EXCL|O_WRONLY) &&
-		$! == EEXIST && ($rand = int(rand 0x7fffffff).','));
+		$! == EEXIST && ($rand = _rand.','));
 	if (print $fh $$buf and close($fh)) {
 		# ignore new/ and write only to cur/, otherwise MUAs
 		# with R/W access to the Maildir will end up doing
@@ -308,7 +313,7 @@ sub _buf2maildir {
 		do {
 			$final = $dst.$rand.$common.':2,'.$sfx;
 		} while (!link($tmp, $final) && $! == EEXIST &&
-			($rand = int(rand 0x7fffffff).','));
+			($rand = _rand.','));
 		unlink($tmp) or warn "W: failed to unlink $tmp: $!\n";
 	} else {
 		my $err = $!;
@@ -323,9 +328,10 @@ sub _maildir_write_cb ($$) {
 	$dedupe->prepare_dedupe;
 	my $dst = $lei->{ovv}->{dst};
 	sub { # for git_to_mail
-		my ($buf, $smsg) = @_;
+		my ($buf, $smsg, $eml) = @_;
+		$buf //= \($eml->as_string);
 		return _buf2maildir($dst, $buf, $smsg) if !$dedupe;
-		my $eml = PublicInbox::Eml->new($$buf); # copy buf
+		$eml //= PublicInbox::Eml->new($$buf); # copy buf
 		return if $dedupe->is_dup($eml, $smsg->{blob});
 		undef $eml;
 		_buf2maildir($dst, $buf, $smsg);
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 10c25246..d32fe09a 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -14,6 +14,7 @@ use PublicInbox::Import;
 use File::Temp 0.19 (); # 0.19 for ->newdir
 use File::Spec ();
 use PublicInbox::Search qw(xap_terms);
+use PublicInbox::Spawn qw(popen_rd);
 
 sub new {
 	my ($class) = @_;
@@ -169,8 +170,58 @@ sub query_mset { # non-parallel for non-"--thread" users
 	$lei->{ovv}->ovv_atexit_child($lei);
 }
 
+sub each_eml { # callback for MboxReader->mboxrd
+	my ($eml, $self, $lei, $each_smsg) = @_;
+	my $smsg = bless {}, 'PublicInbox::Smsg';
+	$smsg->populate($eml);
+	$smsg->{$_} //= '' for qw(from to cc ds subject references mid);
+	delete @$smsg{qw(From Subject -ds -ts)};
+	if (my $startq = delete($self->{5})) { wait_startq($startq) }
+	return if !$lei->{l2m} && $lei->{dedupe}->is_smsg_dup($smsg);
+	$each_smsg->($smsg, undef, $eml);
+}
+
 sub query_remote_mboxrd {
 	my ($self, $lei, $uri) = @_;
+	local $0 = "$0 query_remote_mboxrd";
+	my %sig = $lei->atfork_child_wq($self); # keep $self->{5} startq
+	local @SIG{keys %sig} = values %sig;
+	my $opt = $lei->{opt};
+	$uri->query_form(q => $lei->{mset_opt}->{qstr}, x => 'm',
+			$opt->{thread} ? (t => 1) : ());
+	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $uri);
+	my $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing';
+	$dedupe->prepare_dedupe;
+	my @cmd = qw(curl -XPOST -sSf);
+	my $tor = $opt->{torsocks} //= 'auto';
+	if ($tor eq 'auto' && substr($uri->host, -6) eq '.onion' &&
+			(($lei->{env}->{LD_PRELOAD}//'') !~ /torsocks/)) {
+		unshift @cmd, 'torsocks';
+	} elsif (PublicInbox::Config::git_bool($tor)) {
+		unshift @cmd, 'torsocks';
+	}
+	my $verbose = $opt->{verbose};
+	push @cmd, '-v' if $verbose;
+	push @cmd, $uri->as_string;
+	$lei->err("# @cmd") if $verbose;
+	$? = 0;
+	my $fh = popen_rd(\@cmd, $lei->{env}, { 2 => $lei->{2} });
+	$fh = IO::Uncompress::Gunzip->new($fh);
+	eval {
+		PublicInbox::MboxReader->mboxrd($fh, \&each_eml,
+						$self, $lei, $each_smsg);
+	};
+	return $lei->fail("E: @cmd: $@") if $@;
+	if (($? >> 8) == 22) { # HTTP 404 from curl(1)
+		$uri->query_form(q => $lei->{mset_opt}->{qstr});
+		$lei->err('# no results from '.$uri->as_string);
+	} elsif ($?) {
+		$uri->query_form(q => $lei->{mset_opt}->{qstr});
+		$lei->err('E: '.$uri->as_string);
+		$lei->child_error($?);
+	}
+	undef $each_smsg;
+	$lei->{ovv}->ovv_atexit_child($lei);
 }
 
 sub git {
@@ -230,7 +281,6 @@ sub start_query { # always runs in main (lei-daemon) process
 	} else {
 		$self->wq_do('query_mset', $io, $lei);
 	}
-	# TODO
 	for my $uri (remotes($self)) {
 		$self->wq_do('query_remote_mboxrd', $io, $lei, $uri);
 	}
@@ -263,6 +313,7 @@ sub do_query {
 	my ($lei, @io) = $lei_orig->atfork_parent_wq($self);
 	$io[0] = undef;
 	pipe(my $done, $io[0]) or die "pipe $!";
+	$lei_orig->{1}->autoflush(1);
 
 	$lei_orig->event_step_init; # wait for shutdowns
 	my $done_op = {
@@ -296,6 +347,10 @@ sub do_query {
 
 sub ipc_atfork_prepare {
 	my ($self) = @_;
+	if (exists $self->{remotes}) {
+		require PublicInbox::MboxReader;
+		require IO::Uncompress::Gunzip;
+	}
 	# (0: done_wr, 1: stdout|mbox, 2: stderr,
 	#  3: sock, 4: $l2m->{-wq_s1}, 5: $startq)
 	$self->wq_set_recv_modes(qw[+<&= >&= >&= +<&= +<&= <&=]);
diff --git a/lib/PublicInbox/ProcessPipe.pm b/lib/PublicInbox/ProcessPipe.pm
index e540dc22..97e9c268 100644
--- a/lib/PublicInbox/ProcessPipe.pm
+++ b/lib/PublicInbox/ProcessPipe.pm
@@ -13,6 +13,8 @@ sub TIEHANDLE {
 		$class;
 }
 
+sub BINMODE { binmode(shift->{fh}) } # for IO::Uncompress::Gunzip
+
 sub READ { read($_[0]->{fh}, $_[1], $_[2], $_[3] || 0) }
 
 sub READLINE { readline($_[0]->{fh}) }
diff --git a/script/lei b/script/lei
index 8dcea562..8c40bf12 100755
--- a/script/lei
+++ b/script/lei
@@ -93,6 +93,8 @@ Falling back to (slow) one-shot mode
 		if ($buf =~ /\Ax_it ([0-9]+)\z/) {
 			$x_it_code = $1 + 0;
 			last;
+		} elsif ($buf =~ /\Achild_error ([0-9]+)\z/) {
+			$x_it_code = $1 + 0;
 		} elsif ($buf =~ /\Aexec (.+)\z/) {
 			exec_cmd(\@fds, split(/\0/, $1));
 		} else {
diff --git a/t/lei.t b/t/lei.t
index 50ad2bb1..6b45f5b7 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -8,11 +8,15 @@ use PublicInbox::TestCommon;
 use PublicInbox::Config;
 use File::Path qw(rmtree);
 use Fcntl qw(SEEK_SET);
+use PublicInbox::Spawn qw(which);
 require_git 2.6;
 require_mods(qw(json DBD::SQLite Search::Xapian));
 my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
 my ($home, $for_destroy) = tmpdir();
 my $err_filter;
+my @onions = qw(http://hjrcffqmbrq6wope.onion/meta/
+	http://czquwvybam4bgbro.onion/meta/
+	http://ou63pmih66umazou.onion/meta/);
 my $lei = sub {
 	my ($cmd, $env, $xopt) = @_;
 	$out = $err = '';
@@ -155,6 +159,32 @@ my $setup_publicinboxes = sub {
 	$seen || BAIL_OUT 'no imports';
 };
 
+my $test_external_remote = sub {
+	my ($url, $k) = @_;
+SKIP: {
+	my $nr = 4;
+	skip "$k unset", $nr if !$url;
+	which('curl') or skip 'no curl', $nr;
+	which('torsocks') or skip 'no torsocks', $nr if $url =~ m!\.onion/!;
+	$lei->('ls-external');
+	for my $e (split(/^/ms, $out)) {
+		$e =~ s/\s+boost.*//s;
+		$lei->('forget-external', '-q', $e) or
+			fail "error forgetting $e: $err"
+	}
+	$lei->('add-external', $url);
+	my $mid = '20140421094015.GA8962@dcvr.yhbt.net';
+	ok($lei->('q', "m:$mid"), "query $url");
+	is($err, '', "no errors on $url");
+	my $res = PublicInbox::Config->json->decode($out);
+	is($res->[0]->{'m'}, "<$mid>", "got expected mid from $url");
+	ok($lei->('q', "m:$mid", 'd:..20101002'), 'no results, no error');
+	like($err, qr/404/, 'noted 404');
+	is($out, "[null]\n", 'got null results');
+	$lei->('forget-external', $url);
+} # /SKIP
+}; # /sub
+
 my $test_external = sub {
 	$setup_publicinboxes->();
 	$cleanup->();
@@ -243,6 +273,15 @@ my $test_external = sub {
 	}
 	ok(!$lei->('q', '-o', "$home/mbox", 's:nope'),
 			'fails if mbox format unspecified');
+	my %e = (
+		TEST_LEI_EXTERNAL_HTTPS => 'https://public-inbox.org/meta/',
+		TEST_LEI_EXTERNAL_ONION => $onions[int(rand(scalar(@onions)))],
+	);
+	for my $k (keys %e) {
+		my $url = $ENV{$k} // '';
+		$url = $e{$k} if $url eq '1';
+		$test_external_remote->($url, $k);
+	}
 };
 
 my $test_lei_common = sub {

  parent reply	other threads:[~2021-01-23 10:27 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-23 10:27 [PATCH 00/10] lei: externals more stuff Eric Wong
2021-01-23 10:27 ` [PATCH 01/10] lei: move external vivification to xsearch Eric Wong
2021-01-23 10:27 ` Eric Wong [this message]
2021-01-24  6:01   ` [PATCH 02/10] lei: support remote externals Kyle Meyer
2021-01-24 12:02     ` Eric Wong
2021-01-24 12:12       ` Eric Wong
2021-01-24 22:11       ` Kyle Meyer
2021-01-25 18:37         ` Eric Wong
2021-01-23 10:27 ` [PATCH 03/10] lei_to_mail: drop cyclic reference if not using IPC Eric Wong
2021-01-23 10:27 ` [PATCH 04/10] lei: oneshot: preserve stdout if writing mbox Eric Wong
2021-01-23 10:27 ` [PATCH 05/10] lei: default "-f $mfolder" args for common MUAs Eric Wong
2021-01-23 10:27 ` [PATCH 06/10] lei completion: handle URLs with port numbers Eric Wong
2021-01-23 10:27 ` [PATCH 07/10] lei forget-external: just show the location Eric Wong
2021-01-23 10:27 ` [PATCH 08/10] lei q: support a bunch of curl(1) options Eric Wong
2021-01-23 10:27 ` [PATCH 09/10] lei forget-external: don't show redundant "not found" Eric Wong
2021-01-23 10:27 ` [PATCH 10/10] lei add-external: don't allow non-existent directories Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210123102755.425-3-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).