From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id A6CFC1FA01 for ; Sat, 27 Mar 2021 11:45:51 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 4/4] lei blob: aka "git-show-harder" for blobs Date: Sat, 27 Mar 2021 11:45:51 +0000 Message-Id: <20210327114551.3607-5-e@80x24.org> In-Reply-To: <20210327114551.3607-1-e@80x24.org> References: <20210327114551.3607-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This implements blob reconstruction via SolverGit, emulating the functionality of /$INBOX/$OID/s/ endpoint in PublicInbox::WWW. It uses the current working tree as a coderepo, and accepts any number of --git-dir=$PATH args. Remote externals are not yet supported. --- MANIFEST | 1 + lib/PublicInbox/LEI.pm | 24 ++++---- lib/PublicInbox/LeiBlob.pm | 119 +++++++++++++++++++++++++++++++++++++ t/lei-import.t | 2 + t/solver_git.t | 14 ++++- 5 files changed, 146 insertions(+), 14 deletions(-) create mode 100644 lib/PublicInbox/LeiBlob.pm diff --git a/MANIFEST b/MANIFEST index 6b2b33ac..64b3626f 100644 --- a/MANIFEST +++ b/MANIFEST @@ -181,6 +181,7 @@ lib/PublicInbox/KQNotify.pm lib/PublicInbox/LEI.pm lib/PublicInbox/LeiALE.pm lib/PublicInbox/LeiAuth.pm +lib/PublicInbox/LeiBlob.pm lib/PublicInbox/LeiConvert.pm lib/PublicInbox/LeiCurl.pm lib/PublicInbox/LeiDedupe.pm diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index e680f5f0..478912cd 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -120,6 +120,9 @@ sub index_opt { } my @c_opt = qw(c=s@ C=s@ quiet|q); +my @lxs_opt = (qw(remote! local! external! include|I=s@ exclude=s@ only=s@ + import-remote! no-torsocks torsocks=s), + PublicInbox::LeiQuery::curl_opt()); # we generate shell completion + help using %CMD and %OPTDESC, # see lei__complete() and PublicInbox::LeiHelp @@ -127,16 +130,15 @@ my @c_opt = qw(c=s@ C=s@ quiet|q); our %CMD = ( # sorted in order of importance/use: 'q' => [ '--stdin|SEARCH_TERMS...', 'search for messages matching terms', 'stdin|', # /|\z/ must be first for lone dash + @lxs_opt, qw(save-as=s output|mfolder|o=s format|f=s dedupe|d=s threads|t+ - sort|s=s reverse|r offset=i remote! local! external! pretty - include|I=s@ exclude=s@ only=s@ jobs|j=s globoff|g augment|a - import-remote! import-before! lock=s@ rsyncable - alert=s@ mua=s no-torsocks torsocks=s verbose|v+), @c_opt, - PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ], + sort|s=s reverse|r offset=i pretty jobs|j=s globoff|g augment|a + import-before! lock=s@ rsyncable alert=s@ mua=s verbose|v+), @c_opt, + opt_dash('limit|n=i', '[0-9]+') ], -'show' => [ 'MID|OID', 'show a given object (Message-ID or object ID)', - qw(type=s solve! format|f=s dedupe|d=s threads|t remote local! - verbose|v+), @c_opt, pass_through('git show') ], +'blob' => [ 'OID', 'display a git blob object, solving if necessary', + qw(git-dir=s@ cwd! verbose|v+ oid-a|A=s path-a|a=s path-b|b=s), + @lxs_opt, @c_opt ], 'add-external' => [ 'LOCATION', 'add/set priority of a publicinbox|extindex for extra matches', @@ -350,7 +352,7 @@ my %CONFIG_KEYS = ( 'leistore.dir' => 'top-level storage location', ); -my @WQ_KEYS = qw(lxs l2m imp mrr cnv p2q mark); # internal workers +my @WQ_KEYS = qw(lxs l2m imp mrr cnv p2q mark sol); # internal workers # pronounced "exit": x_it(1 << 8) => exit(1); x_it(13) => SIGPIPE sub x_it ($$) { @@ -726,10 +728,6 @@ sub _lei_store ($;$) { }; } -sub lei_show { - my ($self, @argv) = @_; -} - sub _config { my ($self, @argv) = @_; my %env = (%{$self->{env}}, GIT_CONFIG => undef); diff --git a/lib/PublicInbox/LeiBlob.pm b/lib/PublicInbox/LeiBlob.pm new file mode 100644 index 00000000..a50255aa --- /dev/null +++ b/lib/PublicInbox/LeiBlob.pm @@ -0,0 +1,119 @@ +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ + +# "lei blob $OID" command +package PublicInbox::LeiBlob; +use strict; +use v5.10.1; +use parent qw(PublicInbox::IPC); +use PublicInbox::Spawn qw(spawn popen_rd); +use PublicInbox::DS; +use PublicInbox::Eml; + +sub sol_done_wait { # dwaitpid callback + my ($arg, $pid) = @_; + my (undef, $lei) = @$arg; + $lei->child_error($?) if $?; + $lei->dclose; +} + +sub sol_done { # EOF callback for main daemon + my ($lei) = @_; + my $sol = delete $lei->{sol} or return; + $sol->wq_wait_old(\&sol_done_wait, $lei); +} + +sub get_git_dir ($) { + my ($d) = @_; + return $d if -d "$d/objects" && -d "$d/refs" && -e "$d/HEAD"; + + my $cmd = [ qw(git rev-parse --git-dir) ]; + my ($r, $pid) = popen_rd($cmd, {GIT_DIR => undef}, { '-C' => $d }); + chomp(my $gd = do { local $/; <$r> }); + waitpid($pid, 0) == $pid or die "BUG: waitpid @$cmd ($!)"; + $? == 0 ? $gd : undef; +} + +sub solver_user_cb { # called by solver when done + my ($res, $self) = @_; + my $lei = $self->{lei}; + my $log_buf = delete $lei->{'log_buf'}; + $$log_buf =~ s/^/# /sgm; + ref($res) eq 'ARRAY' or return $lei->fail($$log_buf); + $lei->qerr($$log_buf); + my ($git, $oid, $type, $size, $di) = @$res; + my $gd = $git->{git_dir}; + + # don't try to support all the git-show(1) options for non-blob, + # this is just a convenience: + $type ne 'blob' and + $lei->err("# $oid is a $type of $size bytes in:\n#\t$gd"); + + my $cmd = [ 'git', "--git-dir=$gd", 'show', $oid ]; + my $rdr = { 1 => $lei->{1}, 2 => $lei->{2} }; + waitpid(spawn($cmd, $lei->{env}, $rdr), 0); + $lei->child_error($?) if $?; +} + +sub do_solve_blob { # via wq_do + my ($self) = @_; + my $lei = $self->{lei}; + my $git_dirs = $lei->{opt}->{'git-dir'}; + my $hints = {}; + for my $x (qw(oid-a path-a path-b)) { + my $v = $lei->{opt}->{$x} // next; + $x =~ tr/-/_/; + $hints->{$x} = $v; + } + open my $log, '+>', \(my $log_buf = '') or die "PerlIO::scalar: $!"; + $lei->{log_buf} = \$log_buf; + my $git = $lei->ale->git; + my $solver = bless { + gits => [ map { PublicInbox::Git->new($_) } @$git_dirs ], + user_cb => \&solver_user_cb, + uarg => $self, + # -cur_di, -qsp, -msg => temporary fields for Qspawn callbacks + inboxes => [ $self->{lxs}->locals ], + }, 'PublicInbox::SolverGit'; + $lei->{env}->{'psgi.errors'} = $lei->{2}; # ugh... + local $PublicInbox::DS::in_loop = 0; # waitpid synchronously + $solver->solve($lei->{env}, $log, $self->{oid_b}, $hints); +} + +sub lei_blob { + my ($lei, $blob) = @_; + $lei->start_pager if -t $lei->{1}; + + # first, see if it's a blob returned by "lei q" JSON output: + my $rdr = { 1 => $lei->{1} }; + open $rdr->{2}, '>', '/dev/null' or die "open: $!"; + my $cmd = [ 'git', '--git-dir='.$lei->ale->git->{git_dir}, + 'cat-file', 'blob', $blob ]; + waitpid(spawn($cmd, $lei->{env}, $rdr), 0); + return if $? == 0; + + # maybe it's a non-email (code) blob from a coderepo + my $git_dirs = $lei->{opt}->{'git-dir'} //= []; + if ($lei->{opt}->{'cwd'} //= 1) { + my $cgd = get_git_dir('.'); + unshift(@$git_dirs, $cgd) if defined $cgd; + } + my $lxs = $lei->lxs_prepare or return; + require PublicInbox::SolverGit; + my $self = bless { lxs => $lxs, oid_b => $blob }, __PACKAGE__; + my $op = $lei->workers_start($self, 'lei_solve', 1, + { '' => [ \&sol_done, $lei ] }); + $lei->{sol} = $self; + $self->wq_io_do('do_solve_blob', []); + $self->wq_close(1); + while ($op && $op->{sock}) { $op->event_step } +} + +sub ipc_atfork_child { + my ($self) = @_; + $self->{lei}->_lei_atfork_child; + $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb(); + $self->SUPER::ipc_atfork_child; +} + +1; diff --git a/t/lei-import.t b/t/lei-import.t index fa40ad01..33ce490d 100644 --- a/t/lei-import.t +++ b/t/lei-import.t @@ -54,6 +54,8 @@ is($res->[0]->{'m'}, 'x@y', 'got expected message'); is($res->[0]->{kw}, undef, 'Status ignored for eml'); lei_ok(qw(q -f mboxrd m:x@y)); unlike($lei_out, qr/^Status:/, 'no Status: in imported message'); +lei_ok('blob', $res->[0]->{blob}); +is($lei_out, "From: a\@b\nMessage-ID: \n", 'got blob back'); $eml->header_set('Message-ID', ''); diff --git a/t/solver_git.t b/t/solver_git.t index 99ffb9e3..22714ae5 100644 --- a/t/solver_git.t +++ b/t/solver_git.t @@ -6,6 +6,7 @@ use v5.10.1; use PublicInbox::TestCommon; use Cwd qw(abs_path); require_git(2.6); +use Digest::SHA qw(sha1_hex); use PublicInbox::Spawn qw(popen_rd); require_mods(qw(DBD::SQLite Search::Xapian Plack::Util)); my $git_dir = xqx([qw(git rev-parse --git-dir)], undef, {2 => \(my $null)}); @@ -27,6 +28,18 @@ my $ibx = create_inbox 'v2', version => 2, }; my $v1_0_0_tag = 'cb7c42b1e15577ed2215356a2bf925aef59cdd8d'; my $v1_0_0_tag_short = substr($v1_0_0_tag, 0, 16); +my $expect = '69df7d565d49fbaaeb0a067910f03dc22cd52bd0'; + +test_lei({tmpdir => $tmpdir}, sub { + lei_ok('blob', '69df7d5', '-I', $ibx->{inboxdir}); + is(sha1_hex("blob ".length($lei_out)."\0".$lei_out), + $expect, 'blob contents output'); + + # fallbacks + lei_ok('blob', $v1_0_0_tag, '-I', $ibx->{inboxdir}); + lei_ok('blob', $v1_0_0_tag_short, '-I', $ibx->{inboxdir}); +}); + my $git = PublicInbox::Git->new($git_dir); $ibx->{-repo_objs} = [ $git ]; my $res; @@ -38,7 +51,6 @@ $solver->solve($psgi_env, $log, '69df7d5', {}); ok($res, 'solved a blob!'); my $wt_git = $res->[0]; is(ref($wt_git), 'PublicInbox::Git', 'got a git object for the blob'); -my $expect = '69df7d565d49fbaaeb0a067910f03dc22cd52bd0'; is($res->[1], $expect, 'resolved blob to unabbreviated identifier'); is($res->[2], 'blob', 'type specified'); is($res->[3], 4405, 'size returned');