From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <e@80x24.org>
X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net
X-Spam-Level: 
X-Spam-ASN:  
X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00,
	DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,
	T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no
	version=3.4.6
Received: from localhost (dcvr.yhbt.net [127.0.0.1])
	by dcvr.yhbt.net (Postfix) with ESMTP id 1D8A51F461
	for <meta@public-inbox.org>; Fri,  8 Dec 2023 03:54:39 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org;
	s=selector1; t=1702007679;
	bh=PMskJRNA5lVvyZz+TXG5/mQ8fwqR51zl+dgy3VzVKYQ=;
	h=From:To:Subject:Date:In-Reply-To:References:From;
	b=l+mcoCnO9zdZ42axYceoyg8j7bfjMH6FZiLD8IrnYl2n63LrAn63Ub0qmvjuOGq2/
	 1I6PQzhE726lqWHZ8tjeqwyTRn+1jDPGTV9cyhNc1TMVHmVsbaFZgq1peCx/wALl6F
	 JSpeAmert0CxkuhFxVUyHrMOsSa/kpSeyEbTrkVI=
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 2/6] *search: favor wantarray form of xap_terms
Date: Fri,  8 Dec 2023 03:54:34 +0000
Message-ID: <20231208035438.3710696-3-e@80x24.org>
In-Reply-To: <20231208035438.3710696-1-e@80x24.org>
References: <20231208035438.3710696-1-e@80x24.org>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
List-Id: <meta.public-inbox.org>

Most xap_terms callers do not benefit from the hashref
return value, and we can delay hashmap use until
List::Util::uniqstr if needed.
---
 lib/PublicInbox/CodeSearch.pm | 15 ++++++---------
 lib/PublicInbox/LeiSearch.pm  | 17 +++++++----------
 lib/PublicInbox/LeiStore.pm   | 13 +++++++------
 3 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/lib/PublicInbox/CodeSearch.pm b/lib/PublicInbox/CodeSearch.pm
index 3092718d..48697cdc 100644
--- a/lib/PublicInbox/CodeSearch.pm
+++ b/lib/PublicInbox/CodeSearch.pm
@@ -9,6 +9,7 @@ use v5.12;
 use parent qw(PublicInbox::Search);
 use PublicInbox::Config;
 use PublicInbox::Search qw(retry_reopen int_val xap_terms);
+use PublicInbox::Compat qw(uniqstr);
 use Compress::Zlib qw(uncompress);
 use constant {
 	AT => 0, # author time YYYYMMDDHHMMSS, dt: for mail)
@@ -199,12 +200,11 @@ sub roots2paths { # for diagnostics
 		do {
 			my $mset = $enq->get_mset($off += $size, $lim);
 			for my $x ($mset->items) {
-				my $tmp = xap_terms('P', $x->get_document);
-				push @$dirs, keys %$tmp;
+				push @$dirs, xap_terms('P', $x->get_document);
 			}
 			$size = $mset->size;
 		} while ($size);
-		@$dirs = sort @$dirs;
+		@$dirs = sort(uniqstr(@$dirs));
 	}
 	\%ret;
 }
@@ -223,12 +223,9 @@ sub root_oids ($$) {
 	my @ids = docids_of_git_dir $self, $git_dir or warn <<"";
 BUG? (non-fatal) `$git_dir' not indexed in $self->{topdir}
 
-	my %ret;
-	for my $docid (@ids) {
-		my @oids = xap_terms('G', $self->xdb, $docid);
-		@ret{@oids} = @oids;
-	}
-	sort keys %ret;
+	my @ret = map { xap_terms('G', $self->xdb, $_) } @ids;
+	@ret = uniqstr(@ret) if @ids > 1;
+	@ret;
 }
 
 sub paths2roots {
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
index ba4c4309..29e3213f 100644
--- a/lib/PublicInbox/LeiSearch.pm
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -9,6 +9,7 @@ use parent qw(PublicInbox::ExtSearch); # PublicInbox::Search->reopen
 use PublicInbox::Search qw(xap_terms);
 use PublicInbox::ContentHash qw(content_digest content_hash git_sha);
 use PublicInbox::MID qw(mids mids_for_index);
+use PublicInbox::Compat qw(uniqstr);
 use Carp qw(croak);
 
 sub _msg_kw { # retry_reopen callback
@@ -44,20 +45,16 @@ sub oidbin_keywords {
 sub _xsmsg_vmd { # retry_reopen
 	my ($self, $smsg, $want_label) = @_;
 	my $xdb = $self->xdb; # set {nshard};
-	my (%kw, %L, $doc, $x);
-	$kw{flagged} = 1 if delete($smsg->{lei_q_tt_flagged});
+	my (@kw, @L, $doc, $x);
+	@kw = qw(flagged) if delete($smsg->{lei_q_tt_flagged});
 	my @num = $self->over->blob_exists($smsg->{blob});
 	for my $num (@num) { # there should only be one...
 		$doc = $xdb->get_document($self->num2docid($num));
-		$x = xap_terms('K', $doc);
-		%kw = (%kw, %$x);
-		if ($want_label) { # JSON/JMAP only
-			$x = xap_terms('L', $doc);
-			%L = (%L, %$x);
-		}
+		push @kw, xap_terms('K', $doc);
+		push @L, xap_terms('L', $doc) if $want_label # JSON/JMAP only
 	}
-	$smsg->{kw} = [ sort keys %kw ] if scalar(keys(%kw));
-	$smsg->{L} = [ sort keys %L ] if scalar(keys(%L));
+	@{$smsg->{kw}} = sort(uniqstr(@kw)) if @kw;
+	@{$smsg->{L}} = uniqstr(@L) if @L;
 }
 
 # lookup keywords+labels for external messages
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index aebb85a9..a752174d 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -27,6 +27,7 @@ use PublicInbox::MDA;
 use PublicInbox::Spawn qw(spawn);
 use PublicInbox::MdirReader;
 use PublicInbox::LeiToMail;
+use PublicInbox::Compat qw(uniqstr);
 use File::Temp qw(tmpnam);
 use POSIX ();
 use IO::Handle (); # ->autoflush
@@ -341,15 +342,15 @@ sub _add_vmd ($$$$) {
 sub _docids_and_maybe_kw ($$) {
 	my ($self, $docids) = @_;
 	return $docids unless wantarray;
-	my $kw = {};
+	my (@kw, $idx, @tmp);
 	for my $num (@$docids) { # likely only 1, unless ContentHash changes
 		# can't use ->search->msg_keywords on uncommitted docs
-		my $idx = $self->{priv_eidx}->idx_shard($num);
-		my $tmp = eval { $idx->ipc_do('get_terms', 'K', $num) };
-		if ($@) { warn "#$num get_terms: $@" }
-		else { @$kw{keys %$tmp} = values(%$tmp) };
+		$idx = $self->{priv_eidx}->idx_shard($num);
+		@tmp = eval { $idx->ipc_do('get_terms', 'K', $num) };
+		$@ ? warn("#$num get_terms: $@") : push(@kw, @tmp);
 	}
-	($docids, [ sort keys %$kw ]);
+	@kw = sort(uniqstr(@kw)) if @$docids > 1;
+	($docids, \@kw);
 }
 
 sub _reindex_1 { # git->cat_async callback