From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 589E61F9F4 for ; Tue, 21 Sep 2021 07:41:59 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 02/12] lei inspect: support NNTP URLs Date: Tue, 21 Sep 2021 07:41:49 +0000 Message-Id: <20210921074159.20052-3-e@80x24.org> In-Reply-To: <20210921074159.20052-1-e@80x24.org> References: <20210921074159.20052-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: No reason not to support them, since there's more public-inbox-nntpd instances than -imapd instances, currently. --- lib/PublicInbox/LeiInspect.pm | 48 +++++++++++++++++++++++++++------- lib/PublicInbox/LeiMailSync.pm | 40 +++++++++++++++++++++++++++- lib/PublicInbox/TestCommon.pm | 11 ++++++-- t/lei-import-nntp.t | 21 +++++++++++++++ 4 files changed, 108 insertions(+), 12 deletions(-) diff --git a/lib/PublicInbox/LeiInspect.pm b/lib/PublicInbox/LeiInspect.pm index 48da826b..ab2c98d9 100644 --- a/lib/PublicInbox/LeiInspect.pm +++ b/lib/PublicInbox/LeiInspect.pm @@ -11,6 +11,7 @@ use v5.10.1; use parent qw(PublicInbox::IPC); use PublicInbox::Config; use PublicInbox::MID qw(mids); +use PublicInbox::NetReader qw(imap_uri nntp_uri); sub inspect_blob ($$) { my ($lei, $oidhex) = @_; @@ -32,13 +33,33 @@ sub inspect_imap_uid ($$) { my $ent = {}; my $lms = $lei->lms or return $ent; my $oidhex = $lms->imap_oid($lei, $uid_uri); - if (ref(my $err = $oidhex)) { # art2folder error + if (ref(my $err = $oidhex)) { # arg2folder error $lei->qerr(@{$err->{qerr}}) if $err->{qerr}; } $ent->{$$uid_uri} = $oidhex; $ent; } +sub inspect_nntp_range { + my ($lei, $uri) = @_; + my ($ng, $beg, $end) = $uri->group; + $uri = $uri->clone; + $uri->group($ng); + my $ent = {}; + my $ret = { "$uri" => $ent }; + my $lms = $lei->lms or return $ret; + my $err = $lms->arg2folder($lei, my $folders = [ $$uri ]); + if ($err) { + $lei->qerr(@{$err->{qerr}}) if $err->{qerr}; + } + $end //= $beg; + for my $art ($beg..$end) { + my $oidbin = $lms->imap_oidbin($folders->[0], $art); + $ent->{$art} = $oidbin ? unpack('H*', $oidbin) : undef; + } + $ret; +} + sub inspect_sync_folder ($$) { my ($lei, $folder) = @_; my $ent = {}; @@ -161,14 +182,6 @@ sub inspect1 ($$$) { my $ent; if ($item =~ /\Ablob:(.+)/) { $ent = inspect_blob($lei, $1); - } elsif ($item =~ m!\Aimaps?://!i) { - require PublicInbox::URIimap; - my $uri = PublicInbox::URIimap->new($item); - if (defined($uri->uid)) { - $ent = inspect_imap_uid($lei, $uri); - } else { - $ent = inspect_sync_folder($lei, $item); - } } elsif ($item =~ m!\A(?:maildir|mh):!i || -d $item) { $ent = inspect_sync_folder($lei, $item); } elsif ($item =~ m!\Adocid:([0-9]+)\z!) { @@ -177,6 +190,23 @@ sub inspect1 ($$$) { $ent = inspect_num($lei, $1 + 0); } elsif ($item =~ m!\A(?:mid|m):(.+)\z!) { $ent = inspect_mid($lei, $1); + } elsif (my $iuri = imap_uri($item)) { + if (defined($iuri->uid)) { + $ent = inspect_imap_uid($lei, $iuri); + } else { + $ent = inspect_sync_folder($lei, $item); + } + } elsif (my $nuri = nntp_uri($item)) { + if (defined(my $mid = $nuri->message)) { + $ent = inspect_mid($lei, $mid); + } else { + my ($group, $beg, $end) = $nuri->group; + if (defined($beg)) { + $ent = inspect_nntp_range($lei, $nuri); + } else { + $ent = inspect_sync_folder($lei, $item); + } + } } else { # TODO: more things return $lei->fail("$item not understood"); } diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm index f185b585..d9b9e117 100644 --- a/lib/PublicInbox/LeiMailSync.pm +++ b/lib/PublicInbox/LeiMailSync.pm @@ -247,12 +247,14 @@ sub location_stats { SELECT COUNT(name) FROM blob2name WHERE fid = ? $ret->{'name.count'} = $row if $row; + my $ntype = ($folder =~ m!\A(?:nntps?|s?news)://!i) ? 'article' : + (($folder =~ m!\Aimaps?://!i) ? 'uid' : "TODO<$folder>"); for my $op (qw(count min max)) { ($row) = $dbh->selectrow_array(<<"", undef, $fid); SELECT $op(uid) FROM blob2num WHERE fid = ? $row or last; - $ret->{"uid.$op"} = $row; + $ret->{"$ntype.$op"} = $row; } $ret; } @@ -369,6 +371,30 @@ sub match_imap_url { "E: `$url' is ambiguous:\n\t".join("\n\t", @match)."\n"; } +sub match_nntp_url ($$$) { + my ($self, $url, $all) = @_; # $all = [ $lms->folders ]; + $all //= [ $self->folders ]; + require PublicInbox::URInntps; + my $want = PublicInbox::URInntps->new($url)->canonical; + my ($s, $h, $p) = ($want->scheme, $want->host, $want->port); + my $ng = $want->group; # force scalar (no article ranges) + my @uri = map { PublicInbox::URInntps->new($_)->canonical } + grep(m!\A\Q$s\E://.*?\Q$h\E\b.*?/\Q$ng\E\b!, @$all); + my @match; + for my $x (@uri) { + next if $x->group ne $ng || $x->host ne $h || $x->port != $p; + # maybe user was forgotten on CLI: + if (defined($x->userinfo) && !defined($want->userinfo)) { + push @match, $x; + } elsif (($x->userinfo//"\0") eq ($want->userinfo//"\0")) { + push @match, $x; + } + } + return @match if wantarray; + scalar(@match) <= 1 ? $match[0] : + "E: `$url' is ambiguous:\n\t".join("\n\t", @match)."\n"; +} + # returns undef on failure, number on success sub group2folders { my ($self, $lei, $all, $folders) = @_; @@ -428,6 +454,18 @@ sub arg2folder { $_ = $$res; push(@{$err->{qerr}}, <err($res) if defined $res; + push @no, $orig; + } + } elsif (m!\A(?:nntps?|s?news)://!i) { + my $orig = $_; + my $res = match_nntp_url($self, $orig, \@all); + if (ref $res) { + $_ = $$res; + push(@{$err->{qerr}}, <err($res) if defined $res; diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index 0ee4b228..9e152394 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -18,7 +18,7 @@ BEGIN { run_script start_script key2sub xsys xsys_e xqx eml_load tick have_xapian_compact json_utf8 setup_public_inboxes create_inbox tcp_host_port test_lei lei lei_ok $lei_out $lei_err $lei_opt - test_httpd xbail require_cmd); + test_httpd xbail require_cmd is_xdeeply); require Test::More; my @methods = grep(!/\W/, @Test::More::EXPORT); eval(join('', map { "*$_=\\&Test::More::$_;" } @methods)); @@ -520,6 +520,13 @@ sub json_utf8 () { state $x = ref(PublicInbox::Config->json)->new->utf8->canonical; } +sub is_xdeeply ($$$) { + my ($x, $y, $desc) = @_; + my $ok = is_deeply($x, $y, $desc); + diag explain([$x, '!=', $y]) if !$ok; + $ok; +} + sub test_lei { SKIP: { my ($cb) = pop @_; @@ -590,7 +597,7 @@ SKIP: { my $f = "$daemon_xrd/lei/errors.log"; open my $fh, '<', $f or BAIL_OUT "$f: $!"; my @l = <$fh>; - is_deeply(\@l, [], + is_xdeeply(\@l, [], "$t daemon XDG_RUNTIME_DIR/lei/errors.log empty"); } }; # SKIP if missing git 2.6+ || Xapian || SQLite || json diff --git a/t/lei-import-nntp.t b/t/lei-import-nntp.t index df0594d4..0b080781 100644 --- a/t/lei-import-nntp.t +++ b/t/lei-import-nntp.t @@ -49,6 +49,15 @@ test_lei({ tmpdir => $tmpdir }, sub { my $end = $high - 1; lei_ok qw(import), "$url/$high"; + lei_ok('inspect', $url); is_xdeeply(json_utf8->decode($lei_out), { + $url => { 'article.count' => 1, + 'article.min' => $high, + 'article.max' => $high, } + }, 'inspect output for URL after single message') or diag $lei_out; + lei_ok('inspect', "$url/$high"); + my $x = json_utf8->decode($lei_out); + like($x->{$url}->{$high}, qr/\A[a-f0-9]{40,}\z/, 'inspect shows blob'); + lei_ok 'ls-mail-sync'; is($lei_out, "$url\n", 'article number not stored as folder'); lei_ok qw(q z:0..); my $one = json_utf8->decode($lei_out); @@ -57,6 +66,18 @@ test_lei({ tmpdir => $tmpdir }, sub { local $ENV{HOME} = "$tmpdir/h3"; lei_ok qw(import), "$url/$low-$end"; + lei_ok('inspect', $url); is_xdeeply(json_utf8->decode($lei_out), { + $url => { 'article.count' => $end - $low + 1, + 'article.min' => $low, + 'article.max' => $end, } + }, 'inspect output for URL after range') or diag $lei_out; + lei_ok('inspect', "$url/$low-$end"); + $x = json_utf8->decode($lei_out); + is_deeply([ ($low..$end) ], [ sort { $a <=> $b } keys %{$x->{$url}} ], + 'inspect range shows range'); + is(scalar(grep(/\A[a-f0-9]{40,}\z/, values %{$x->{$url}})), + $end - $low + 1, 'all values are git blobs'); + lei_ok 'ls-mail-sync'; is($lei_out, "$url\n", 'article range not stored as folder'); lei_ok qw(q z:0..); my $start = json_utf8->decode($lei_out);