From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 97ABF1F934 for ; Mon, 25 Jan 2021 01:18:57 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 1/5] lei: reinstate JSON smsg output deduplication Date: Sun, 24 Jan 2021 17:18:53 -0800 Message-Id: <20210125011857.563-2-e@80x24.org> In-Reply-To: <20210125011857.563-1-e@80x24.org> References: <20210125011857.563-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This was accidentally clobbered completely in ("lei q: fix JSON overview with remote externals"). There are now more tests to prevent future regressions. --- lib/PublicInbox/LeiOverview.pm | 7 ++++++- t/lei.t | 19 ++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm index 928d66cb..880c7acc 100644 --- a/lib/PublicInbox/LeiOverview.pm +++ b/lib/PublicInbox/LeiOverview.pm @@ -203,12 +203,14 @@ sub ovv_each_smsg_cb { # runs in wq worker usually my ($self, $lei, $ibxish) = @_; my $json; $lei->{1}->autoflush(1); + my $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing'; if (my $pkg = $self->{json}) { $json = $pkg->new; $json->utf8->canonical; $json->ascii(1) if $lei->{opt}->{ascii}; + $lei->{ovv_buf} = \(my $buf = ''); } - my $l2m = $lei->{l2m}; + my $l2m = $lei->{l2m} or $dedupe->prepare_dedupe; if ($l2m && !$ibxish) { # remote https?:// mboxrd delete $l2m->{-wq_s1}; my $g2m = $l2m->can('git_to_mail'); @@ -241,6 +243,7 @@ sub ovv_each_smsg_cb { # runs in wq worker usually my $git = $ibxish->git; # (LeiXSearch|Inbox|ExtSearch)->git $self->{git} = $git; # for ovv_atexit_child my $g2m = $l2m->can('git_to_mail'); + $dedupe->prepare_dedupe; sub { my ($smsg, $mitem) = @_; $smsg->{pct} = get_pct($mitem) if $mitem; @@ -251,6 +254,7 @@ sub ovv_each_smsg_cb { # runs in wq worker usually $lei->{ovv_buf} = \(my $buf = ''); sub { # DIY prettiness :P my ($smsg, $mitem) = @_; + return if $dedupe->is_smsg_dup($smsg); $smsg = _unbless_smsg($smsg, $mitem); $buf .= "{\n"; $buf .= join(",\n", map { @@ -274,6 +278,7 @@ sub ovv_each_smsg_cb { # runs in wq worker usually $lei->{ovv_buf} = \(my $buf = ''); sub { my ($smsg, $mitem) = @_; + return if $dedupe->is_smsg_dup($smsg); $buf .= $json->encode(_unbless_smsg(@_)) . $ORS; if (length($buf) > 65536) { my $lk = $self->lock_for_scope; diff --git a/t/lei.t b/t/lei.t index 3fd1d1fe..f826a966 100644 --- a/t/lei.t +++ b/t/lei.t @@ -17,6 +17,7 @@ my $err_filter; my @onions = qw(http://hjrcffqmbrq6wope.onion/meta/ http://czquwvybam4bgbro.onion/meta/ http://ou63pmih66umazou.onion/meta/); +my $json = ref(PublicInbox::Config->json)->new->utf8->canonical; my $lei = sub { my ($cmd, $env, $xopt) = @_; $out = $err = ''; @@ -142,8 +143,7 @@ my $setup_publicinboxes = sub { my ($ibx) = @_; my $im = PublicInbox::InboxWritable->new($ibx)->importer(0); my $V = $ibx->version; - my @eml = glob('t/*.eml'); - push(@eml, 't/data/0001.patch') if $V == 2; + my @eml = (glob('t/*.eml'), 't/data/0001.patch'); for (@eml) { next if $_ eq 't/psgi_v2-old.eml'; # dup mid $im->add(eml_load($_)) or BAIL_OUT "v$V add $_"; @@ -176,7 +176,7 @@ SKIP: { my $mid = '20140421094015.GA8962@dcvr.yhbt.net'; ok($lei->('q', "m:$mid"), "query $url"); is($err, '', "no errors on $url"); - my $res = PublicInbox::Config->json->decode($out); + my $res = $json->decode($out); is($res->[0]->{'m'}, "<$mid>", "got expected mid from $url"); ok($lei->('q', "m:$mid", 'd:..20101002'), 'no results, no error'); like($err, qr/404/, 'noted 404'); @@ -246,6 +246,19 @@ my $test_external = sub { # No double-quoting should be imposed on users on the CLI $lei->('q', 's:use boolean prefix'); like($out, qr/search: use boolean prefix/, 'phrase search got result'); + my $res = $json->decode($out); + is(scalar(@$res), 2, 'only 2 element array (1 result)'); + is($res->[1], undef, 'final element is undef'); # XXX should this be? + is(ref($res->[0]), 'HASH', 'first element is hashref'); + $lei->('q', '--pretty', 's:use boolean prefix'); + my $pretty = $json->decode($out); + is_deeply($res, $pretty, '--pretty is identical after decode'); + + for my $fmt (qw(ldjson ndjson jsonl)) { + $lei->('q', '-f', $fmt, 's:use boolean prefix'); + is($out, $json->encode($pretty->[0])."\n", "-f $fmt"); + } + require IO::Uncompress::Gunzip; for my $sfx ('', '.gz') { my $f = "$home/mbox$sfx";