From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 0E6A91FC11 for ; Sun, 28 Mar 2021 09:01:26 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 10/12] lei blob: add remote external support Date: Sun, 28 Mar 2021 09:01:22 +0000 Message-Id: <20210328090124.3541-11-e@80x24.org> In-Reply-To: <20210328090124.3541-1-e@80x24.org> References: <20210328090124.3541-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Introduce a new LeiRemote wrapper to provide an internal API which SolverGit expects. This lets us use HTTP/HTTPS endpoints to reconstruct blobs off patches as we would with local endpoints, just more slowly... --- MANIFEST | 1 + lib/PublicInbox/LEI.pm | 2 +- lib/PublicInbox/LeiBlob.pm | 16 +++++-- lib/PublicInbox/LeiRemote.pm | 81 ++++++++++++++++++++++++++++++++++++ t/solver_git.t | 16 ++++++- 5 files changed, 110 insertions(+), 6 deletions(-) create mode 100644 lib/PublicInbox/LeiRemote.pm diff --git a/MANIFEST b/MANIFEST index 9048b900..913ce55c 100644 --- a/MANIFEST +++ b/MANIFEST @@ -196,6 +196,7 @@ lib/PublicInbox/LeiMirror.pm lib/PublicInbox/LeiOverview.pm lib/PublicInbox/LeiP2q.pm lib/PublicInbox/LeiQuery.pm +lib/PublicInbox/LeiRemote.pm lib/PublicInbox/LeiSearch.pm lib/PublicInbox/LeiStore.pm lib/PublicInbox/LeiToMail.pm diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index a4f4e58c..a94941a9 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -121,7 +121,7 @@ sub index_opt { my @c_opt = qw(c=s@ C=s@ quiet|q); my @lxs_opt = (qw(remote! local! external! include|I=s@ exclude=s@ only=s@ - import-remote! no-torsocks torsocks=s), + import-remote! no-torsocks torsocks=s), PublicInbox::LeiQuery::curl_opt()); # we generate shell completion + help using %CMD and %OPTDESC, diff --git a/lib/PublicInbox/LeiBlob.pm b/lib/PublicInbox/LeiBlob.pm index f44d8af1..8e610efd 100644 --- a/lib/PublicInbox/LeiBlob.pm +++ b/lib/PublicInbox/LeiBlob.pm @@ -6,7 +6,7 @@ package PublicInbox::LeiBlob; use strict; use v5.10.1; use parent qw(PublicInbox::IPC); -use PublicInbox::Spawn qw(spawn popen_rd); +use PublicInbox::Spawn qw(spawn popen_rd which); use PublicInbox::DS; sub sol_done_wait { # dwaitpid callback @@ -66,7 +66,10 @@ sub do_solve_blob { # via wq_do } open my $log, '+>', \(my $log_buf = '') or die "PerlIO::scalar: $!"; $lei->{log_buf} = \$log_buf; - my $git = $lei->ale->git; + my $git = $lei->{ale}->git; + my @rmt = map { + PublicInbox::LeiRemote->new($lei, $_) + } $self->{lxs}->remotes; my $solver = bless { gits => [ map { PublicInbox::Git->new($lei->rel2abs($_)) @@ -74,7 +77,7 @@ sub do_solve_blob { # via wq_do user_cb => \&solver_user_cb, uarg => $self, # -cur_di, -qsp, -msg => temporary fields for Qspawn callbacks - inboxes => [ $self->{lxs}->locals ], + inboxes => [ $self->{lxs}->locals, @rmt ], }, 'PublicInbox::SolverGit'; $lei->{env}->{'psgi.errors'} = $lei->{2}; # ugh... local $PublicInbox::DS::in_loop = 0; # waitpid synchronously @@ -105,8 +108,15 @@ sub lei_blob { } return $lei->fail('no --git-dir to try') unless @$git_dirs; my $lxs = $lei->lxs_prepare or return; + if ($lxs->remotes) { + require PublicInbox::LeiRemote; + $lei->{curl} //= which('curl') or return + $lei->fail('curl needed for', $lxs->remotes); + $lei->_lei_store(1)->write_prepare($lei); + } require PublicInbox::SolverGit; my $self = bless { lxs => $lxs, oid_b => $blob }, __PACKAGE__; + $lei->ale; my ($op_c, $ops) = $lei->workers_start($self, 'lei_solve', 1, { '' => [ \&sol_done, $lei ] }); $lei->{sol} = $self; diff --git a/lib/PublicInbox/LeiRemote.pm b/lib/PublicInbox/LeiRemote.pm new file mode 100644 index 00000000..399fc936 --- /dev/null +++ b/lib/PublicInbox/LeiRemote.pm @@ -0,0 +1,81 @@ +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ + +# Make remote externals HTTP(S) inboxes behave like +# PublicInbox::Inbox and PublicInbox::Search/ExtSearch. +# This exists solely for SolverGit. It is a high-latency a +# synchronous API that is not at all fast. +package PublicInbox::LeiRemote; +use v5.10.1; +use strict; +use IO::Uncompress::Gunzip; +use PublicInbox::OnDestroy; +use PublicInbox::MboxReader; +use PublicInbox::Spawn qw(popen_rd); +use PublicInbox::LeiCurl; +use PublicInbox::ContentHash qw(git_sha); + +sub new { + my ($cls, $lei, $uri) = @_; + bless { uri => $uri, lei => $lei }, $cls; +} + +sub isrch { $_[0] } # SolverGit expcets this + +sub _each_mboxrd_eml { # callback for MboxReader->mboxrd + my ($eml, $self) = @_; + my $lei = $self->{lei}; + my $xoids = $lei->{ale}->xoids_for($eml, 1); + if ($lei->{sto} && !$xoids) { # memoize locally + $lei->{sto}->ipc_do('add_eml', $eml); + } + my $smsg = bless {}, 'PublicInbox::Smsg'; + $smsg->{blob} = $xoids ? (keys(%$xoids))[0] + : git_sha(1, $eml)->hexdigest; + $smsg->populate($eml); + $smsg->{mid} //= '(none)'; + push @{$self->{smsg}}, $smsg; +} + +sub mset { + my ($self, $qstr, undef) = @_; # $opt ($_[2]) ignored + my $lei = $self->{lei}; + my $curl = PublicInbox::LeiCurl->new($lei, $lei->{curl}); + push @$curl, '-s', '-d', ''; + my $uri = $self->{uri}->clone; + $uri->query_form(q => $qstr, x => 'm', r => 1); # r=1: relevance + my $cmd = $curl->for_uri($self->{lei}, $uri); + $self->{lei}->qerr("# $cmd"); + my $rdr = { 2 => $lei->{2}, pgid => 0 }; + my ($fh, $pid) = popen_rd($cmd, undef, $rdr); + my $reap = PublicInbox::OnDestroy->new($lei->can('sigint_reap'), $pid); + $self->{smsg} = []; + $fh = IO::Uncompress::Gunzip->new($fh); + PublicInbox::MboxReader->mboxrd($fh, \&_each_mboxrd_eml, $self); + my $err = waitpid($pid, 0) == $pid ? undef + : "BUG: waitpid($cmd): $!"; + @$reap = (); # cancel OnDestroy + my $wait = $self->{lei}->{sto}->ipc_do('done'); + die $err if $err; + $self; # we are the mset (and $ibx, and $self) +} + +sub size { scalar @{$_[0]->{smsg}} } # size of previous results + +sub mset_to_smsg { + my ($self, $ibx, $mset) = @_; # all 3 are $self + wantarray ? ($self->size, @{$self->{smsg}}) : $self->{smsg}; +} + +sub base_url { "$_[0]->{uri}" } + +sub smsg_eml { + my ($self, $smsg) = @_; + if (my $bref = $self->{lei}->ale->git->cat_file($smsg->{blob})) { + return PublicInbox::Eml->new($bref); + } + $self->{lei}->err("E: $self->{uri} $smsg->{blob} gone <$smsg->{mid}>"); + undef; +} + +1; diff --git a/t/solver_git.t b/t/solver_git.t index 6d4b93c7..2d803d47 100644 --- a/t/solver_git.t +++ b/t/solver_git.t @@ -7,7 +7,7 @@ use PublicInbox::TestCommon; use Cwd qw(abs_path); require_git(2.6); use Digest::SHA qw(sha1_hex); -use PublicInbox::Spawn qw(popen_rd); +use PublicInbox::Spawn qw(popen_rd which); require_mods(qw(DBD::SQLite Search::Xapian Plack::Util)); my $git_dir = xqx([qw(git rev-parse --git-dir)], undef, {2 => \(my $null)}); $? == 0 or plan skip_all => "$0 must be run from a git working tree"; @@ -227,8 +227,20 @@ EOF my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ]; my $td = start_script($cmd, $env, { 3 => $sock }); my ($h, $p) = tcp_host_port($sock); - local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p"; + my $url = "http://$h:$p"; + local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = $url; Plack::Test::ExternalServer::test_psgi(client => $client); + skip 'no curl', 1 unless which('curl'); + + mkdir "$tmpdir/ext" // xbail "mkdir $!"; + test_lei({tmpdir => "$tmpdir/ext"}, sub { + my $rurl = "$url/$name"; + lei_ok(qw(blob --no-mail 69df7d5 -I), $rurl); + is(sha1_hex("blob ".length($lei_out)."\0".$lei_out), + $expect, 'blob contents output'); + ok(!lei(qw(blob -I), $rurl, $non_existent), + 'non-existent blob fails'); + }); } }