From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-3.0 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, URIBL_SBL,URIBL_SBL_A shortcircuit=no autolearn=no autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id DB28B1FC97 for ; Sat, 6 Feb 2021 12:18:45 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 13/17] lei: add-external --mirror support Date: Sat, 6 Feb 2021 12:18:40 +0000 Message-Id: <20210206121844.10979-14-e@80x24.org> In-Reply-To: <20210206121844.10979-1-e@80x24.org> References: <20210206121844.10979-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This can be useful for users who want to clone and mirror an existing public-inbox. This doesn't have update support, yet, so users will need to run "git fetch && public-inbox-index" for now. --- MANIFEST | 3 + contrib/completion/lei-completion.bash | 2 +- lib/PublicInbox/Admin.pm | 7 +- lib/PublicInbox/LEI.pm | 17 +- lib/PublicInbox/LeiCurl.pm | 65 ++++++ lib/PublicInbox/LeiExternal.pm | 28 ++- lib/PublicInbox/LeiMirror.pm | 288 +++++++++++++++++++++++++ lib/PublicInbox/LeiXSearch.pm | 33 +-- lib/PublicInbox/TestCommon.pm | 5 +- t/lei-mirror.t | 24 +++ 10 files changed, 427 insertions(+), 45 deletions(-) create mode 100644 lib/PublicInbox/LeiCurl.pm create mode 100644 lib/PublicInbox/LeiMirror.pm create mode 100644 t/lei-mirror.t diff --git a/MANIFEST b/MANIFEST index 52dea385..4236f87c 100644 --- a/MANIFEST +++ b/MANIFEST @@ -177,9 +177,11 @@ lib/PublicInbox/InputPipe.pm lib/PublicInbox/Isearch.pm lib/PublicInbox/KQNotify.pm lib/PublicInbox/LEI.pm +lib/PublicInbox/LeiCurl.pm lib/PublicInbox/LeiDedupe.pm lib/PublicInbox/LeiExternal.pm lib/PublicInbox/LeiImport.pm +lib/PublicInbox/LeiMirror.pm lib/PublicInbox/LeiOverview.pm lib/PublicInbox/LeiQuery.pm lib/PublicInbox/LeiSearch.pm @@ -357,6 +359,7 @@ t/kqnotify.t t/lei-daemon.t t/lei-externals.t t/lei-import.t +t/lei-mirror.t t/lei.t t/lei_dedupe.t t/lei_external.t diff --git a/contrib/completion/lei-completion.bash b/contrib/completion/lei-completion.bash index fbda474c..619805fb 100644 --- a/contrib/completion/lei-completion.bash +++ b/contrib/completion/lei-completion.bash @@ -5,7 +5,7 @@ # Needs a lot of work, see `lei__complete' in lib/PublicInbox::LEI.pm _lei() { case ${COMP_WORDS[@]} in - *' add-external http'*) + *' add-external h'* | *' --mirror h'*) compopt -o nospace ;; *) compopt +o nospace ;; # the default diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index 3b38a5a3..b21fb241 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -273,8 +273,8 @@ EOM $idx->{nidx} // 0; # returns number processed } -sub progress_prepare ($) { - my ($opt) = @_; +sub progress_prepare ($;$) { + my ($opt, $dst) = @_; # public-inbox-index defaults to quiet, -xcpdb and -compact do not if (defined($opt->{quiet}) && $opt->{quiet} < 0) { @@ -286,7 +286,8 @@ sub progress_prepare ($) { $opt->{1} = $null; # suitable for spawn() redirect } else { $opt->{verbose} ||= 1; - $opt->{-progress} = sub { print STDERR @_ }; + $dst //= *STDERR{GLOB}; + $opt->{-progress} = sub { print $dst @_ }; } } diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 28ad88e7..bdeab7e3 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -98,6 +98,13 @@ sub _config_path ($) { .'/lei/config'); } +sub index_opt { + # TODO: drop underscore variants everywhere, they're undocumented + qw(fsync|sync! jobs|j=i indexlevel|index-level|L=s compact+ + max_size|max-size=s sequential_shard|sequential-shard + batch_size|batch-size=s skip-docdata quiet|q verbose|v+) +} + # TODO: generate shell completion + help using %CMD and %OPTDESC # command => [ positional_args, 1-line description, Getopt::Long option spec ] our %CMD = ( # sorted in order of importance/use: @@ -105,7 +112,7 @@ our %CMD = ( # sorted in order of importance/use: save-as=s output|mfolder|o=s format|f=s dedupe|d=s thread|t augment|a sort|s=s reverse|r offset=i remote! local! external! pretty include|I=s@ exclude=s@ only=s@ jobs|j=s globoff|g stdin| - mua-cmd|mua=s no-torsocks torsocks=s verbose|v quiet|q + mua-cmd|mua=s no-torsocks torsocks=s verbose|v+ quiet|q received-after=s received-before=s sent-after=s sent-since=s), PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ], @@ -115,7 +122,8 @@ our %CMD = ( # sorted in order of importance/use: 'add-external' => [ 'URL_OR_PATHNAME', 'add/set priority of a publicinbox|extindex for extra matches', - qw(boost=i quiet|q) ], + qw(boost=i c=s@ mirror=s no-torsocks torsocks=s inbox-version=i), + index_opt(), PublicInbox::LeiQuery::curl_opt() ], 'ls-external' => [ '[FILTER...]', 'list publicinbox|extindex locations', qw(format|f=s z|0 local remote quiet|q) ], 'forget-external' => [ 'URL_OR_PATHNAME...|--prune', @@ -204,7 +212,7 @@ my %OPTDESC = ( 'help|h' => 'show this built-in help', 'quiet|q' => 'be quiet', 'globoff|g' => "do not match locations using '*?' wildcards and '[]' ranges", -'verbose|v' => 'be more verbose', +'verbose|v+' => 'be more verbose', 'solve!' => 'do not attempt to reconstruct blobs from emails', 'torsocks=s' => ['auto|no|yes', 'whether or not to wrap git and curl commands with torsocks'], @@ -286,7 +294,7 @@ my %CONFIG_KEYS = ( 'leistore.dir' => 'top-level storage location', ); -my @WQ_KEYS = qw(lxs l2m imp); # internal workers +my @WQ_KEYS = qw(lxs l2m imp mrr); # internal workers # pronounced "exit": x_it(1 << 8) => exit(1); x_it(13) => SIGPIPE sub x_it ($$) { @@ -714,6 +722,7 @@ sub lei__complete { } puts $self, grep(/$re/, map { # generate short/long names if (s/[:=].+\z//) { # req/optional args, e.g output|o=i + } elsif (s/\+\z//) { # verbose|v+ } elsif (s/!\z//) { # negation: solve! => no-solve|solve s/([\w\-]+)/$1|no-$1/g diff --git a/lib/PublicInbox/LeiCurl.pm b/lib/PublicInbox/LeiCurl.pm new file mode 100644 index 00000000..c8747d4f --- /dev/null +++ b/lib/PublicInbox/LeiCurl.pm @@ -0,0 +1,65 @@ +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ + +# common option and torsocks(1) wrapping for curl(1) +package PublicInbox::LeiCurl; +use strict; +use v5.10.1; +use PublicInbox::Spawn qw(which); +use PublicInbox::Config; + +# prepares a common command for curl(1) based on $lei command +sub new { + my ($cls, $lei, $curl) = @_; + $curl //= which('curl') // return $lei->fail('curl not found'); + my $opt = $lei->{opt}; + my @cmd = ($curl, qw(-Sf)); + $cmd[-1] .= 's' if $opt->{quiet}; # already the default for "lei q" + $cmd[-1] .= 'v' if $opt->{verbose}; # we use ourselves, too + for my $o ($lei->curl_opt) { + $o =~ s/\|[a-z0-9]\b//i; # remove single char short option + if ($o =~ s/=[is]@\z//) { + my $ary = $opt->{$o} or next; + push @cmd, map { ("--$o", $_) } @$ary; + } elsif ($o =~ s/=[is]\z//) { + my $val = $opt->{$o} // next; + push @cmd, "--$o", $val; + } elsif ($opt->{$o}) { + push @cmd, "--$o"; + } + } + push @cmd, '-v' if $opt->{verbose}; # lei uses this itself + bless \@cmd, $cls; +} + +sub torsocks { # useful for "git clone" and "git fetch", too + my ($self, $lei, $uri)= @_; + my $opt = $lei->{opt}; + $opt->{torsocks} = 'false' if $opt->{'no-torsocks'}; + my $torsocks = $opt->{torsocks} //= 'auto'; + if ($torsocks eq 'auto' && substr($uri->host, -6) eq '.onion' && + (($lei->{env}->{LD_PRELOAD}//'') !~ /torsocks/)) { + # "auto" continues anyways if torsocks is missing; + # a proxy may be specified via CLI, curlrc, + # environment variable, or even firewall rule + [ ($lei->{torsocks} //= which('torsocks')) // () ] + } elsif (PublicInbox::Config::git_bool($torsocks)) { + my $x = $lei->{torsocks} //= which('torsocks'); + $x or return $lei->fail(<path, -3) eq '.gz' ? () : '--compressed', + $uri->as_string ] +} + +1; diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm index accacf1a..6a5c2517 100644 --- a/lib/PublicInbox/LeiExternal.pm +++ b/lib/PublicInbox/LeiExternal.pm @@ -88,19 +88,35 @@ sub get_externals { (); } -sub lei_add_external { +sub add_external_finish { my ($self, $location) = @_; my $cfg = $self->_lei_cfg(1); my $new_boost = $self->{opt}->{boost} // 0; - $location = ext_canonicalize($location); - if ($location !~ m!\Ahttps?://! && !-d $location) { - return $self->fail("$location not a directory"); - } my $key = "external.$location.boost"; my $cur_boost = $cfg->{$key}; return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent $self->lei_config($key, $new_boost); - $self->_lei_store(1)->done; # just create the store +} + +sub lei_add_external { + my ($self, $location) = @_; + $self->_lei_store(1)->write_prepare($self); + my $new_boost = $self->{opt}->{boost} // 0; + $location = ext_canonicalize($location); + my $mirror = $self->{opt}->{mirror}; + if (defined($mirror) && -d $location) { + $self->fail(<<""); # TODO: did you mean "update-external?" +--mirror destination `$location' already exists + + } + if ($location !~ m!\Ahttps?://! && !-d $location) { + $mirror // return $self->fail("$location not a directory"); + $mirror = ext_canonicalize($mirror); + require PublicInbox::LeiMirror; + PublicInbox::LeiMirror->start($self, $mirror => $location); + } else { + add_external_finish($self, $location); + } } sub lei_forget_external { diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm new file mode 100644 index 00000000..bb172e6a --- /dev/null +++ b/lib/PublicInbox/LeiMirror.pm @@ -0,0 +1,288 @@ +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ + +# "lei add-external --mirror" support +package PublicInbox::LeiMirror; +use strict; +use v5.10.1; +use parent qw(PublicInbox::IPC); +use IO::Uncompress::Gunzip qw(gunzip $GunzipError); +use PublicInbox::Spawn qw(popen_rd spawn); +use PublicInbox::PktOp; + +sub mirror_done { # EOF callback for main daemon + my ($lei) = @_; + my $mrr = delete $lei->{mrr}; + $mrr->wq_wait_old($lei) if $mrr; + # FIXME: check $? before finish + $lei->add_external_finish($mrr->{dst}); + $lei->dclose; +} + +# for old installations without manifest.js.gz +sub try_scrape { + my ($self) = @_; + my $uri = URI->new($self->{src}); + my $lei = $self->{lei}; + my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return; + my $cmd = $curl->for_uri($lei, $uri); + my $opt = { 0 => $lei->{0}, 2 => $lei->{2} }; + my $fh = popen_rd($cmd, $lei->{env}, $opt); + my $html = do { local $/; <$fh> } // die "read(curl $uri): $!"; + close($fh) or return $lei->child_error($?, "@$cmd failed"); + + # we grep with URL below, we don't want Subject/From headers + # making us clone random URLs + my @urls = ($html =~ m!\bgit clone --mirror ([a-z\+]+://\S+)!g); + my $url = $uri->as_string; + chop($url) eq '/' or die "BUG: $uri not canonicalized"; + + # since this is for old instances w/o manifest.js.gz, try v1 first + return clone_v1($self) if grep(m!\A\Q$url\E/*\z!, @urls); + if (my @v2_urls = grep(m!\A\Q$url\E/[0-9]+\z!, @urls)) { + my %v2_uris = map { $_ => URI->new($_) } @v2_urls; # uniq + return clone_v2($self, [ values %v2_uris ]); + } + + # filter out common URLs served by WWW (e.g /$MSGID/T/) + if (@urls && $url =~ s!/+[^/]+\@[^/]+/.*\z!! && + grep(m!\A\Q$url\E/*\z!, @urls)) { + die <<""; +E: confused by scraping <$uri>, did you mean <$url>? + + } + @urls and die <<""; +E: confused by scraping <$uri>, got ambiguous results: +@urls + + die "E: scraping <$uri> revealed nothing\n"; +} + +sub clone_cmd { + my ($lei) = @_; + my @cmd = qw(git); + # we support "-c $key=$val" for arbitrary git config options + # e.g.: git -c http.proxy=socks5h://127.0.0.1:9050 + push(@cmd, '-c', $_) for @{$lei->{opt}->{c} // []}; + push @cmd, qw(clone --mirror); + push @cmd, '-q' if $lei->{opt}->{quiet}; + push @cmd, '-v' if $lei->{opt}->{verbose}; + # XXX any other options to support? + # --reference is tricky with multiple epochs... + @cmd; +} + +# tries the relatively new /$INBOX/_/text/config/raw endpoint +sub _try_config { + my ($self) = @_; + my $dst = $self->{dst}; + if (!-d $dst || !mkdir($dst)) { + require File::Path; + File::Path::mkpath($dst); + -d $dst or die "mkpath($dst): $!\n"; + } + my $uri = URI->new($self->{src}); + my $lei = $self->{lei}; + my $path = $uri->path; + chop($path) eq '/' or die "BUG: $uri not canonicalized"; + $uri->path($path . '/_/text/config/raw'); + my $cmd = $self->{curl}->for_uri($lei, $uri); + push @$cmd, '--compressed'; # curl decompresses for us + my $ce = "$dst/inbox.config.example"; + my $f = "$ce-$$.tmp"; + open(my $fh, '+>', $f) or return $lei->err("open $f: $! (non-fatal)"); + my $opt = { 0 => $lei->{0}, 1 => $fh, 2 => $lei->{2} }; + $lei->qerr("# @$cmd"); + my $pid = spawn($cmd, $lei->{env}, $opt); + waitpid($pid, 0) == $pid or return $lei->err("waitpid @$cmd: $!"); + if (($? >> 8) == 22) { # 404 missing + unlink($f) if -s $fh == 0; + return; + } + return $lei->err("# @$cmd failed (non-fatal)") if $?; + rename($f, $ce) or return $lei->err("link($f, $ce): $! (non-fatal)"); + my $cfg = PublicInbox::Config::git_config_dump($f); + my $ibx = $self->{ibx} = {}; + for my $sec (grep(/\Apublicinbox\./, @{$cfg->{-section_order}})) { + for (qw(address newsgroup nntpmirror)) { + $ibx->{$_} = $cfg->{"$sec.$_"}; + } + } +} + +sub index_cloned_inbox { + my ($self, $iv) = @_; + my $ibx = delete($self->{ibx}) // { + address => [ 'lei@example.com' ], + version => $iv, + }; + $ibx->{inboxdir} = $self->{dst}; + PublicInbox::Inbox->new($ibx); + PublicInbox::InboxWritable->new($ibx); + my $opt = {}; + my $lei = $self->{lei}; + for my $sw ($lei->index_opt) { + my ($k) = ($sw =~ /\A([\w-]+)/); + $opt->{$k} = $lei->{opt}->{$k}; + } + # force synchronous dwaitpid for v2: + local $PublicInbox::DS::in_loop = 0; + my $cfg = PublicInbox::Config->new; + my $env = PublicInbox::Admin::index_prepare($opt, $cfg); + local %ENV = (%ENV, %$env) if $env; + PublicInbox::Admin::progress_prepare($opt, $lei->{2}); + PublicInbox::Admin::index_inbox($ibx, undef, $opt); +} + +sub clone_v1 { + my ($self) = @_; + my $lei = $self->{lei}; + my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return; + my $uri = URI->new($self->{src}); + my $pfx = $curl->torsocks($lei, $uri) or return; + my $cmd = [ @$pfx, clone_cmd($lei), $uri->as_string, $self->{dst} ]; + $lei->qerr("# @$cmd"); + my $pid = spawn($cmd, $lei->{env}, $lei); + waitpid($pid, 0) == $pid or die "BUG: waitpid @$cmd: $!"; + $? == 0 or return $lei->child_error($?, "@$cmd failed"); + _try_config($self); + index_cloned_inbox($self, 1); +} + +sub clone_v2 { + my ($self, $v2_uris) = @_; + my $lei = $self->{lei}; + my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return; + my $pfx //= $curl->torsocks($lei, $v2_uris->[0]) or return; + my @epochs; + my $dst = $self->{dst}; + my @src_edst; + for my $uri (@$v2_uris) { + my $src = $uri->as_string; + my $edst = $dst; + $src =~ m!/([0-9]+)(?:\.git)?\z! or die <<""; +failed to extract epoch number from $src + + my $nr = $1 + 0; + $edst .= "/git/$nr.git"; + push @src_edst, [ $src, $edst ]; + } + my $lk = bless { lock_path => "$dst/inbox.lock" }, 'PublicInbox::Lock'; + _try_config($self); + my $on_destroy = $lk->lock_for_scope($$); + my @cmd = clone_cmd($lei); + while (my $pair = shift(@src_edst)) { + my $cmd = [ @$pfx, @cmd, @$pair ]; + $lei->qerr("# @$cmd"); + my $pid = spawn($cmd, $lei->{env}, $lei); + waitpid($pid, 0) == $pid or die "BUG: waitpid @$cmd: $!"; + $? == 0 or return $lei->child_error($?, "@$cmd failed"); + } + undef $on_destroy; # unlock + index_cloned_inbox($self, 2); +} + +sub try_manifest { + my ($self) = @_; + my $uri = URI->new($self->{src}); + my $lei = $self->{lei}; + my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return; + my $path = $uri->path; + chop($path) eq '/' or die "BUG: $uri not canonicalized"; + $uri->path($path . '/manifest.js.gz'); + my $cmd = $curl->for_uri($lei, $uri); + $lei->qerr("# @$cmd"); + my $opt = { 0 => $lei->{0}, 2 => $lei->{2} }; + my $fh = popen_rd($cmd, $lei->{env}, $opt); + my $gz = do { local $/; <$fh> } // die "read(curl $uri): $!"; + unless (close $fh) { + return try_scrape($self) if ($? >> 8) == 22; # 404 missing + return $lei->child_error($?, "@$cmd failed"); + } + my $js; + gunzip(\$gz => \$js, MultiStream => 1) or + die "gunzip($uri): $GunzipError"; + my $m = eval { PublicInbox::Config->json->decode($js) }; + die "$uri: error decoding `$js': $@" if $@; + ref($m) eq 'HASH' or die "$uri unknown type: ".ref($m); + + my $v1_bare = $m->{$path}; + my @v2_epochs = grep(m!\A\Q$path\E/git/[0-9]+\.git\z!, keys %$m); + if (@v2_epochs) { + # It may be possible to have v1 + v2 in parallel someday: + $lei->err(<path($_); $uri->clone } @v2_epochs; + clone_v2($self, \@v2_epochs); + } elsif ($v1_bare) { + clone_v1($self); + } elsif (my @maybe = grep(m!\Q$path\E!, keys %$m)) { + die "E: confused by <$uri>, possible matches:\n@maybe"; + } else { + die "E: confused by <$uri>"; + } +} + +sub start_clone_url { + my ($self) = @_; + return try_manifest($self) if $self->{src} =~ m!\Ahttps?://!; + die "TODO: non-HTTP/HTTPS clone of $self->{src} not supported, yet"; +} + +sub do_mirror { # via wq_do + my ($self) = @_; + my $lei = $self->{lei}; + eval { + my $iv = $lei->{opt}->{'inbox-version'}; + if (defined $iv) { + return clone_v1($self) if $iv == 1; + return try_scrape($self) if $iv == 2; + die "bad --inbox-version=$iv\n"; + } + return start_clone_url($self) if $self->{src} =~ m!://!; + die "TODO: cloning local directories not supported, yet"; + }; + return $lei->fail($@) if $@; + $lei->qerr("# mirrored $self->{src} => $self->{dst}"); +} + +sub start { + my ($cls, $lei, $src, $dst) = @_; + my $self = bless { lei => $lei, src => $src, dst => $dst }, $cls; + $lei->{mrr} = $self; + if ($src =~ m!https?://!) { + require URI; + require PublicInbox::LeiCurl; + } + require PublicInbox::Lock; + require PublicInbox::Inbox; + require PublicInbox::Admin; + require PublicInbox::InboxWritable; + my $ops = { + '!' => [ $lei->can('fail_handler'), $lei ], + 'x_it' => [ $lei->can('x_it'), $lei ], + 'child_error' => [ $lei->can('child_error'), $lei ], + '' => [ \&mirror_done, $lei ], + }; + ($lei->{pkt_op_c}, $lei->{pkt_op_p}) = PublicInbox::PktOp->pair($ops); + $self->wq_workers_start('lei_mirror', 1, $lei->oldset, {lei => $lei}); + my $op = delete $lei->{pkt_op_c}; + delete $lei->{pkt_op_p}; + $self->wq_do('do_mirror', []); + $self->wq_close(1); + $lei->event_step_init; # wait for shutdowns + if ($lei->{oneshot}) { + while ($op->{sock}) { $op->event_step } + } +} + +sub ipc_atfork_child { + my ($self) = @_; + $self->{lei}->lei_atfork_child; + $self->SUPER::ipc_atfork_child; +} + +1; diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index f8068362..1e5d7ca6 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -212,7 +212,6 @@ sub query_remote_mboxrd { my ($opt, $env) = @$lei{qw(opt env)}; my @qform = (q => $lei->{mset_opt}->{qstr}, x => 'm'); push(@qform, t => 1) if $opt->{thread}; - my @cmd = ($self->{curl}, qw(-sSf -d), ''); my $verbose = $opt->{verbose}; my $reap; my $cerr = File::Temp->new(TEMPLATE => 'curl.err-XXXX', TMPDIR => 1); @@ -223,43 +222,18 @@ sub query_remote_mboxrd { # spawn a process to force line-buffering, otherwise curl # will write 1 character at-a-time and parallel outputs # mmmaaayyy llloookkk llliiikkkeee ttthhhiiisss - push @cmd, '-v'; my $o = { 1 => $lei->{2}, 2 => $lei->{2} }; my $pid = spawn(['tail', '-f', $cerr->filename], undef, $o); $reap = PublicInbox::OnDestroy->new(\&kill_reap, $pid); } - for my $o ($lei->curl_opt) { - $o =~ s/\|[a-z0-9]\b//i; # remove single char short option - if ($o =~ s/=[is]@\z//) { - my $ary = $opt->{$o} or next; - push @cmd, map { ("--$o", $_) } @$ary; - } elsif ($o =~ s/=[is]\z//) { - my $val = $opt->{$o} // next; - push @cmd, "--$o", $val; - } elsif ($opt->{$o}) { - push @cmd, "--$o"; - } - } - $opt->{torsocks} = 'false' if $opt->{'no-torsocks'}; - my $tor = $opt->{torsocks} //= 'auto'; + my $curl = PublicInbox::LeiCurl->new($lei, $self->{curl}) or return; + push @$curl, '-s', '-d', ''; my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei); for my $uri (@$uris) { $lei->{-current_url} = $uri->as_string; $lei->{-nr_remote_eml} = 0; $uri->query_form(@qform); - my $cmd = [ @cmd, $uri->as_string ]; - if ($tor eq 'auto' && substr($uri->host, -6) eq '.onion' && - (($env->{LD_PRELOAD}//'') !~ /torsocks/)) { - unshift @$cmd, which('torsocks'); - } elsif (PublicInbox::Config::git_bool($tor)) { - unshift @$cmd, which('torsocks'); - } - - # continue anyways if torsocks is missing; a proxy may be - # specified via CLI, curlrc, environment variable, or even - # firewall rule - shift(@$cmd) if !$cmd->[0]; - + my $cmd = $curl->for_uri($lei, $uri); $lei->err("# @$cmd") if $verbose; my ($fh, $pid) = popen_rd($cmd, $env, $rdr); $fh = IO::Uncompress::Gunzip->new($fh); @@ -440,6 +414,7 @@ sub add_uri { if (my $curl = $self->{curl} //= which('curl') // 0) { require PublicInbox::MboxReader; require IO::Uncompress::Gunzip; + require PublicInbox::LeiCurl; push @{$self->{remotes}}, $uri; } else { warn "curl missing, ignoring $uri\n"; diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index c861dc5d..5cce44e4 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -461,8 +461,9 @@ SKIP: { Socket::MsgHdr missing or Inline::C is unconfigured/missing EOM $lei_opt = { 1 => \$lei_out, 2 => \$lei_err }; - my $daemon_pid; - my ($tmpdir, $for_destroy) = tmpdir(); + my ($daemon_pid, $for_destroy); + my $tmpdir = $test_opt->{tmpdir}; + ($tmpdir, $for_destroy) = tmpdir unless $tmpdir; SKIP: { skip 'TEST_LEI_ONESHOT set', 1 if $ENV{TEST_LEI_ONESHOT}; my $home = "$tmpdir/lei-daemon"; diff --git a/t/lei-mirror.t b/t/lei-mirror.t new file mode 100644 index 00000000..cf34c7ae --- /dev/null +++ b/t/lei-mirror.t @@ -0,0 +1,24 @@ +#!perl -w +# Copyright (C) 2020-2021 all contributors +# License: AGPL-3.0+ +use strict; use v5.10.1; use PublicInbox::TestCommon; +my $sock = tcp_server(); +my ($tmpdir, $for_destroy) = tmpdir(); +my $http = 'http://'.$sock->sockhost.':'.$sock->sockport.'/'; +my ($ro_home, $cfg_path) = setup_public_inboxes; +my $cmd = [ qw(-httpd -W0), "--stdout=$tmpdir/out", "--stderr=$tmpdir/err" ]; +my $td = start_script($cmd, { PI_CONFIG => $cfg_path }, { 3 => $sock }); +test_lei({ tmpdir => $tmpdir }, sub { + my $home = $ENV{HOME}; + my $t1 = "$home/t1-mirror"; + ok($lei->('add-external', $t1, '--mirror', "$http/t1/"), '--mirror v1'); + ok(-f "$t1/public-inbox/msgmap.sqlite3", 't1-mirror indexed'); + my $t2 = "$home/t2-mirror"; + ok($lei->('add-external', $t2, '--mirror', "$http/t2/"), '--mirror v2'); + ok(-f "$t2/msgmap.sqlite3", 't2-mirror indexed'); +}); + +ok($td->kill, 'killed -httpd'); +$td->join; + +done_testing;