From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: <e@80x24.org> X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 1D8A51F461 for <meta@public-inbox.org>; Fri, 8 Dec 2023 03:54:39 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1702007679; bh=PMskJRNA5lVvyZz+TXG5/mQ8fwqR51zl+dgy3VzVKYQ=; h=From:To:Subject:Date:In-Reply-To:References:From; b=l+mcoCnO9zdZ42axYceoyg8j7bfjMH6FZiLD8IrnYl2n63LrAn63Ub0qmvjuOGq2/ 1I6PQzhE726lqWHZ8tjeqwyTRn+1jDPGTV9cyhNc1TMVHmVsbaFZgq1peCx/wALl6F JSpeAmert0CxkuhFxVUyHrMOsSa/kpSeyEbTrkVI= From: Eric Wong <e@80x24.org> To: meta@public-inbox.org Subject: [PATCH 2/6] *search: favor wantarray form of xap_terms Date: Fri, 8 Dec 2023 03:54:34 +0000 Message-ID: <20231208035438.3710696-3-e@80x24.org> In-Reply-To: <20231208035438.3710696-1-e@80x24.org> References: <20231208035438.3710696-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: <meta.public-inbox.org> Most xap_terms callers do not benefit from the hashref return value, and we can delay hashmap use until List::Util::uniqstr if needed. --- lib/PublicInbox/CodeSearch.pm | 15 ++++++--------- lib/PublicInbox/LeiSearch.pm | 17 +++++++---------- lib/PublicInbox/LeiStore.pm | 13 +++++++------ 3 files changed, 20 insertions(+), 25 deletions(-) diff --git a/lib/PublicInbox/CodeSearch.pm b/lib/PublicInbox/CodeSearch.pm index 3092718d..48697cdc 100644 --- a/lib/PublicInbox/CodeSearch.pm +++ b/lib/PublicInbox/CodeSearch.pm @@ -9,6 +9,7 @@ use v5.12; use parent qw(PublicInbox::Search); use PublicInbox::Config; use PublicInbox::Search qw(retry_reopen int_val xap_terms); +use PublicInbox::Compat qw(uniqstr); use Compress::Zlib qw(uncompress); use constant { AT => 0, # author time YYYYMMDDHHMMSS, dt: for mail) @@ -199,12 +200,11 @@ sub roots2paths { # for diagnostics do { my $mset = $enq->get_mset($off += $size, $lim); for my $x ($mset->items) { - my $tmp = xap_terms('P', $x->get_document); - push @$dirs, keys %$tmp; + push @$dirs, xap_terms('P', $x->get_document); } $size = $mset->size; } while ($size); - @$dirs = sort @$dirs; + @$dirs = sort(uniqstr(@$dirs)); } \%ret; } @@ -223,12 +223,9 @@ sub root_oids ($$) { my @ids = docids_of_git_dir $self, $git_dir or warn <<""; BUG? (non-fatal) `$git_dir' not indexed in $self->{topdir} - my %ret; - for my $docid (@ids) { - my @oids = xap_terms('G', $self->xdb, $docid); - @ret{@oids} = @oids; - } - sort keys %ret; + my @ret = map { xap_terms('G', $self->xdb, $_) } @ids; + @ret = uniqstr(@ret) if @ids > 1; + @ret; } sub paths2roots { diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index ba4c4309..29e3213f 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -9,6 +9,7 @@ use parent qw(PublicInbox::ExtSearch); # PublicInbox::Search->reopen use PublicInbox::Search qw(xap_terms); use PublicInbox::ContentHash qw(content_digest content_hash git_sha); use PublicInbox::MID qw(mids mids_for_index); +use PublicInbox::Compat qw(uniqstr); use Carp qw(croak); sub _msg_kw { # retry_reopen callback @@ -44,20 +45,16 @@ sub oidbin_keywords { sub _xsmsg_vmd { # retry_reopen my ($self, $smsg, $want_label) = @_; my $xdb = $self->xdb; # set {nshard}; - my (%kw, %L, $doc, $x); - $kw{flagged} = 1 if delete($smsg->{lei_q_tt_flagged}); + my (@kw, @L, $doc, $x); + @kw = qw(flagged) if delete($smsg->{lei_q_tt_flagged}); my @num = $self->over->blob_exists($smsg->{blob}); for my $num (@num) { # there should only be one... $doc = $xdb->get_document($self->num2docid($num)); - $x = xap_terms('K', $doc); - %kw = (%kw, %$x); - if ($want_label) { # JSON/JMAP only - $x = xap_terms('L', $doc); - %L = (%L, %$x); - } + push @kw, xap_terms('K', $doc); + push @L, xap_terms('L', $doc) if $want_label # JSON/JMAP only } - $smsg->{kw} = [ sort keys %kw ] if scalar(keys(%kw)); - $smsg->{L} = [ sort keys %L ] if scalar(keys(%L)); + @{$smsg->{kw}} = sort(uniqstr(@kw)) if @kw; + @{$smsg->{L}} = uniqstr(@L) if @L; } # lookup keywords+labels for external messages diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index aebb85a9..a752174d 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -27,6 +27,7 @@ use PublicInbox::MDA; use PublicInbox::Spawn qw(spawn); use PublicInbox::MdirReader; use PublicInbox::LeiToMail; +use PublicInbox::Compat qw(uniqstr); use File::Temp qw(tmpnam); use POSIX (); use IO::Handle (); # ->autoflush @@ -341,15 +342,15 @@ sub _add_vmd ($$$$) { sub _docids_and_maybe_kw ($$) { my ($self, $docids) = @_; return $docids unless wantarray; - my $kw = {}; + my (@kw, $idx, @tmp); for my $num (@$docids) { # likely only 1, unless ContentHash changes # can't use ->search->msg_keywords on uncommitted docs - my $idx = $self->{priv_eidx}->idx_shard($num); - my $tmp = eval { $idx->ipc_do('get_terms', 'K', $num) }; - if ($@) { warn "#$num get_terms: $@" } - else { @$kw{keys %$tmp} = values(%$tmp) }; + $idx = $self->{priv_eidx}->idx_shard($num); + @tmp = eval { $idx->ipc_do('get_terms', 'K', $num) }; + $@ ? warn("#$num get_terms: $@") : push(@kw, @tmp); } - ($docids, [ sort keys %$kw ]); + @kw = sort(uniqstr(@kw)) if @$docids > 1; + ($docids, \@kw); } sub _reindex_1 { # git->cat_async callback