From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.1 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 5C5251F729; Mon, 20 Jun 2022 19:27:30 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1655753250; bh=JhXnkK2EwbuT3IMLJmwQXSRe+PUGVUDcfWLd1F+bRas=; h=From:To:Subject:Date:In-Reply-To:References:From; b=w1hz8CwE1SPTdtwdMJ7u8dTQiG+nYluylRD0QKqObBYIdTDTNuqJmWBrXUON0PZeK mS7Ttk3zUNSs149X3vFMOx8KgDGhQ1zp4gHTLirY4UwJ5ZBDahwbp+NIF6zMl1isfc XdPwKLS5tt0R5qXbDHLCBbcB+xv4PN5Ymhm5JrjY= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/3] search: support "patchid:" prefix (git patch-id --stable) Date: Mon, 20 Jun 2022 19:27:29 +0000 Message-Id: <20220620192730.550803-3-e@80x24.org> In-Reply-To: <20220620192730.550803-1-e@80x24.org> References: <20220620192730.550803-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This allows easy searching via patch-id from a git commit. Currently, abbreviations are not supported, and it seems needless to support them since AFAIK (git) doesn't generate nor resolve abbreviated patch-ids anywhere. --- TODO | 3 --- lib/PublicInbox/Search.pm | 5 +++-- lib/PublicInbox/SearchIdx.pm | 15 +++++++++++++++ t/extsearch.t | 7 ++++++- t/v2mda.t | 10 ++++++++-- 5 files changed, 32 insertions(+), 8 deletions(-) diff --git a/TODO b/TODO index 5be4b5e3..43eee063 100644 --- a/TODO +++ b/TODO @@ -137,9 +137,6 @@ all need to be considered for everything we introduce) * make "git cat-file --batch" detect unlinked packfiles so we don't have to restart processes (very long-term) -* support searching based on `git-patch-id --stable` to improve - bidirectional mapping of commits <=> emails - * linter to check validity of config file * linter option and WWW endpoint to graph relationships and flows diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 523003b3..6f9fdde1 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # based on notmuch, but with no concept of folders, files or flags # @@ -118,9 +118,10 @@ my %bool_pfx_external = ( dfpre => 'XDFPRE', dfpost => 'XDFPOST', dfblob => 'XDFPRE XDFPOST', + patchid => 'XDFID', ); -my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST'; +my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST XDFID'; my %prob_prefix = ( # for mairix compatibility s => 'S', diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 50e26050..53ec23a5 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -18,6 +18,7 @@ use PublicInbox::MsgIter; use PublicInbox::IdxStack; use Carp qw(croak carp); use POSIX qw(strftime); +use Fcntl qw(SEEK_SET); use Time::Local qw(timegm); use PublicInbox::OverIdx; use PublicInbox::Spawn qw(spawn); @@ -349,6 +350,20 @@ sub index_xapian { # msg_iter callback defined $s or return; $_[0]->[0] = $part = undef; # free memory + if ($s =~ /^(?:diff|---|\+\+\+) /ms) { + open(my $fh, '+>:utf8', undef) or die "open: $!"; + open(my $eh, '+>', undef) or die "open: $!"; + $fh->autoflush(1); + print $fh $s or die "print: $!"; + sysseek($fh, 0, SEEK_SET) or die "sysseek: $!"; + my $id = ($self->{ibx} // $self->{eidx})->git->qx( + [qw(patch-id --stable)], + {}, { 0 => $fh, 2 => $eh }); + $id =~ /\A([a-f0-9]{40,})/ and $doc->add_term('XDFID'.$1); + seek($eh, 0, SEEK_SET) or die "seek: $!"; + while (<$eh>) { warn $_ } + } + # split off quoted and unquoted blocks: my @sections = PublicInbox::MsgIter::split_quotes($s); undef $s; # free memory diff --git a/t/extsearch.t b/t/extsearch.t index 09cbdabe..2d7375d6 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -314,7 +314,12 @@ if ('reindex catches missed messages') { is($new->{subject}, $eml->header('Subject'), 'new message added'); $es->{xdb}->reopen; - my $mset = $es->mset("mid:$new->{mid}"); + # git patch-id --stable search->mset("patchid:$patchid"); + is($mset->size, 1, 'patchid search works'); + + $mset = $es->mset("mid:$new->{mid}"); is($mset->size, 1, 'previously unseen, now indexed in Xapian'); ok($im->remove($eml), 'remove new message from v2 inbox'); diff --git a/t/v2mda.t b/t/v2mda.t index 3dfc569e..8f2f335d 100644 --- a/t/v2mda.t +++ b/t/v2mda.t @@ -1,7 +1,8 @@ -# Copyright (C) 2018-2021 all contributors +#!perl -w +# Copyright (C) all contributors # License: AGPL-3.0+ +use v5.10.1; use strict; -use warnings; use Test::More; use Fcntl qw(SEEK_SET); use Cwd; @@ -88,6 +89,11 @@ is($eml->as_string, $mime->as_string, 'injected message'); $pre = $ibx->search->mset_to_smsg($ibx, $pre); $post = $ibx->search->mset_to_smsg($ibx, $post); is($post->[0]->{blob}, $pre->[0]->{blob}, 'same message in both cases'); + + # git patch-id --stable search->mset("patchid:$patchid"); + is($mset->size, 1, 'patchid search works'); } done_testing();