From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id B9A5C1F513 for ; Thu, 24 Aug 2023 01:22:36 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1692840156; bh=xAzAXs+/0387XEclachkGp7b6UqxF0QOWRv8CoYfY04=; h=From:To:Subject:Date:In-Reply-To:References:From; b=JXeWnX4bd9xrpz6czh9zmxjMS4/X0AQgzxF25sNCG2jlBRTy+6bEUCz7XR5W5sJmx HlulbTHc+jNrwnHRfDbahY+o1OzcmbBil8V/069sb3M5c5UVWmoVHZpxOm1ONz4p3E aBDYvRG2u4X3b115cIMTz/oR97I/id8Gpd+Ut02c= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 3/7] cindex: add --show-roots switch Date: Thu, 24 Aug 2023 01:22:32 +0000 Message-Id: <20230824012236.3968030-4-e@80x24.org> In-Reply-To: <20230824012236.3968030-1-e@80x24.org> References: <20230824012236.3968030-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This aids in development, but I'm not sure it's going to stay or be moved into another interface. --- lib/PublicInbox/CodeSearchIdx.pm | 32 ++++++++++++++++++++++++++++++++ script/public-inbox-cindex | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm index 2480dbd2..e795c2b3 100644 --- a/lib/PublicInbox/CodeSearchIdx.pm +++ b/lib/PublicInbox/CodeSearchIdx.pm @@ -1058,6 +1058,37 @@ sub _prep_ibx { # each_inbox callback push @{$self->{IBX}}, $ibx; } +sub show_roots { # for diagnostics + my ($self) = @_; + local $self->{xdb}; + my $cur = $self->xdb->allterms_begin('G'); + my $end = $self->{xdb}->allterms_end('G'); + my $qrepo = $PublicInbox::Search::X{Query}->new('T'.'r'); + my $enq = $PublicInbox::Search::X{Enquire}->new($self->{xdb}); + $enq->set_weighting_scheme($PublicInbox::Search::X{BoolWeight}->new); + $enq->set_docid_order($PublicInbox::Search::ENQ_ASCENDING); + for (; $cur != $end; $cur++) { + my $G_oidhex = $cur->get_termname; + my $qry = $PublicInbox::Search::X{Query}->new( + PublicInbox::Search::OP_FILTER(), + $qrepo, $G_oidhex); + $enq->set_query($qry); + my ($off, $lim) = (0, 10000); + say 'commit ',substr($G_oidhex, 1), ' appears in:'; + while (1) { + my $mset = $enq->get_mset($off, $lim); + my $size = $mset->size or last; + for my $x ($mset->items) { + my $doc = $x->get_document; + for (xap_terms('P', $x->get_document)) { + say '- /', substr($_, 1); + } + } + $off += $size; + } + } +} + sub cidx_run { # main entry point my ($self) = @_; my $restore_umask = prep_umask($self); @@ -1150,6 +1181,7 @@ sub cidx_run { # main entry point PublicInbox::DS::event_loop($MY_SIG, $SIGSET) if shards_active(); PublicInbox::DS->Reset; $self->lock_release(!!$NCHANGE); + show_roots($self) if $self->{-opt}->{'show-roots'} # for diagnostics } sub ipc_atfork_child { # @IDX_SHARDS diff --git a/script/public-inbox-cindex b/script/public-inbox-cindex index 888c8b10..0526434c 100755 --- a/script/public-inbox-cindex +++ b/script/public-inbox-cindex @@ -29,7 +29,7 @@ GetOptions($opt, qw(quiet|q verbose|v+ reindex jobs|j=i fsync|sync! dangerous indexlevel|index-level|L=s associate associate-max=i associate-date-range=s associate-prefixes=s@ batch_size|batch-size=s max_size|max-size=s - include|I=s@ only=s@ all + include|I=s@ only=s@ all show-roots project-list=s exclude=s@ sort-parallel=s sort-compress-program=s sort-buffer-size=s d=s update|u scan! prune dry-run|n C=s@ help|h))