From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id BA8811FC57 for ; Sat, 10 Aug 2024 09:00:13 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1723280413; bh=JglmMkOO54ZiJW4zMCRvKuDqGF8rw9PXbqdbsNiaj7Q=; h=From:To:Subject:Date:In-Reply-To:References:From; b=WZhDOIJxh1UbO/xh9uODqiFr22uu9HyTzxRBbndRxdMQcNb2XIE0V6zhZn4qgzhj4 cZXYZv1hVUYTWFG4ka+/fq+XynXMwLIXu3LYW0am3oBmFs3Y+7MTjcP1sjUmkQZ05x VBSO1hBivyDeCNwS/ZWcU5aLG+hFB0KnkQE10AR8= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 02/11] indexheader: deduplicate common values Date: Sat, 10 Aug 2024 09:00:03 +0000 Message-ID: <20240810090012.23269-3-e@80x24.org> In-Reply-To: <20240810090012.23269-1-e@80x24.org> References: <20240810090012.23269-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Since we plan on sharing IndexHeader across multiple inboxes for large installations with thousands of inboxes, it makes sense to deduplicate the values to save some memory at the cost of increased startup time. --- lib/PublicInbox/IndexHeader.pm | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/IndexHeader.pm b/lib/PublicInbox/IndexHeader.pm index 53e9373b..07827959 100644 --- a/lib/PublicInbox/IndexHeader.pm +++ b/lib/PublicInbox/IndexHeader.pm @@ -17,7 +17,8 @@ sub extra_indexer_new_common ($$$$) { my ($self, $spec, $pfx, $query) = @_; $pfx =~ /\A[a-z][a-z0-9]*\z/ or warn "W: non-word prefix in `$spec' not searchable\n"; - $self->{prefix} = $pfx; + my %dedupe = ($pfx => undef); + ($self->{prefix}) = keys %dedupe; my %params = map { my ($k, $v) = split /=/, uri_unescape($_), 2; ($k, $v // ''); @@ -25,7 +26,8 @@ sub extra_indexer_new_common ($$$$) { my $xpfx = delete($params{index_prefix}) // "X\U$pfx"; $xpfx =~ /\A[A-Z][A-Z0-9]*\z/ or die die "E: `index_prefix' in `$spec' must be ALL CAPS\n"; - $self->{xprefix} = $xpfx; + %dedupe = ($xpfx => undef); + ($self->{xprefix}) = keys %dedupe; \%params; } @@ -34,14 +36,18 @@ sub new { my ($type, $pfx, $header, $query) = split /:/, $spec, 4; $pfx // die "E: `$spec' has no user prefix\n"; $header // die "E: `$spec' has no mail header\n"; + $T2IDX{$type} // die + "E: `$type' not supported in $spec, must be one of: ", + join(', ', sort keys %T2IDX), "\n"; + my %dedupe = ($type => undef); + ($type) = keys %dedupe; + %dedupe = ($header => undef); + ($header) = keys %dedupe; my $self = bless { header => $header, type => $type }, $cls; my $params = extra_indexer_new_common $self, $spec, $pfx, $query; $self->{hdr_method} = delete $params->{raw} ? 'header_raw' : 'header'; my @k = keys %$params; warn "W: unknown params in `$spec': ", join(', ', @k), "\n" if @k; - $T2IDX{$type} // die - "E: `$type' not supported in $spec, must be one of: ", - join(', ', sort keys %T2IDX), "\n"; $self; }