From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 5/8] overidx: each_by_mid: pass self and args to callbacks
Date: Fri, 17 Jul 2020 06:31:52 +0000 [thread overview]
Message-ID: <20200717063155.3734-6-e@yhbt.net> (raw)
In-Reply-To: <20200717063155.3734-1-e@yhbt.net>
This saves runtime allocations and reduces the likelyhood of
memory leaks either from cycles or buggy old Perl versions.
---
lib/PublicInbox/OverIdx.pm | 99 +++++++++++++++++++++-----------------
1 file changed, 54 insertions(+), 45 deletions(-)
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index ea8da723..52f6328e 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -107,7 +107,7 @@ DELETE FROM $_ WHERE num = ?
# this includes ghosts
sub each_by_mid {
- my ($self, $mid, $cols, $cb) = @_;
+ my ($self, $mid, $cols, $cb, @arg) = @_;
my $dbh = $self->{dbh};
=over
@@ -152,27 +152,29 @@ SELECT $cols FROM over WHERE over.num = ? LIMIT 1
foreach (@$nums) {
$sth->execute($_->[0]);
my $smsg = $sth->fetchrow_hashref;
- $cb->(PublicInbox::Over::load_from_row($smsg)) or
- return;
+ $smsg = PublicInbox::Over::load_from_row($smsg);
+ $cb->($self, $smsg, @arg) or return;
}
return if $nr != $lim;
}
}
+sub _resolve_mid_to_tid {
+ my ($self, $smsg, $tid) = @_;
+ my $cur_tid = $smsg->{tid};
+ if (defined $$tid) {
+ merge_threads($self, $$tid, $cur_tid);
+ } else {
+ $$tid = $cur_tid;
+ }
+ 1;
+}
+
# this will create a ghost as necessary
sub resolve_mid_to_tid {
my ($self, $mid) = @_;
my $tid;
- each_by_mid($self, $mid, ['tid'], sub {
- my ($smsg) = @_;
- my $cur_tid = $smsg->{tid};
- if (defined $tid) {
- merge_threads($self, $tid, $cur_tid);
- } else {
- $tid = $cur_tid;
- }
- 1;
- });
+ each_by_mid($self, $mid, ['tid'], \&_resolve_mid_to_tid, \$tid);
defined $tid ? $tid : create_ghost($self, $mid);
}
@@ -271,6 +273,22 @@ sub add_overview {
add_over($self, [ @$smsg{qw(ts ds num)}, $mids, $refs, $xpath, $dd ]);
}
+sub _add_over {
+ my ($self, $smsg, $mid, $refs, $old_tid, $v) = @_;
+ my $cur_tid = $smsg->{tid};
+ my $n = $smsg->{num};
+ die "num must not be zero for $mid" if !$n;
+ $$old_tid = $cur_tid unless defined $$old_tid;
+ if ($n > 0) { # regular mail
+ merge_threads($self, $$old_tid, $cur_tid);
+ } elsif ($n < 0) { # ghost
+ link_refs($self, $refs, $$old_tid);
+ $self->delete_by_num($n);
+ $$v++;
+ }
+ 1;
+}
+
sub add_over {
my ($self, $values) = @_;
my ($ts, $ds, $num, $mids, $refs, $xpath, $ddd) = @$values;
@@ -281,21 +299,8 @@ sub add_over {
$self->delete_by_num($num, \$old_tid);
foreach my $mid (@$mids) {
my $v = 0;
- each_by_mid($self, $mid, ['tid'], sub {
- my ($cur) = @_;
- my $cur_tid = $cur->{tid};
- my $n = $cur->{num};
- die "num must not be zero for $mid" if !$n;
- $old_tid = $cur_tid unless defined $old_tid;
- if ($n > 0) { # regular mail
- merge_threads($self, $old_tid, $cur_tid);
- } elsif ($n < 0) { # ghost
- link_refs($self, $refs, $old_tid);
- $self->delete_by_num($n);
- $v++;
- }
- 1;
- });
+ each_by_mid($self, $mid, ['tid'], \&_add_over,
+ $mid, $refs, \$old_tid, \$v);
$v > 1 and warn "BUG: vivified multiple ($v) ghosts for $mid\n";
$vivified += $v;
}
@@ -320,35 +325,39 @@ INSERT INTO id2num (id, num) VALUES (?,?)
}
}
+sub _remove_oid {
+ my ($self, $smsg, $oid, $nr) = @_;
+ if (!defined($oid) || $smsg->{blob} eq $oid) {
+ $self->delete_by_num($smsg->{num});
+ $$nr++;
+ }
+ 1;
+}
+
# returns number of removed messages
# $oid may be undef to match only on $mid
sub remove_oid {
my ($self, $oid, $mid) = @_;
my $nr = 0;
$self->begin_lazy;
- each_by_mid($self, $mid, ['ddd'], sub {
- my ($smsg) = @_;
- if (!defined($oid) || $smsg->{blob} eq $oid) {
- $self->delete_by_num($smsg->{num});
- $nr++;
- }
- 1;
- });
+ each_by_mid($self, $mid, ['ddd'], \&_remove_oid, $oid, \$nr);
$nr;
}
+sub _num_mid0_for_oid {
+ my ($self, $smsg, $oid, $res) = @_;
+ my $blob = $smsg->{blob};
+ return 1 if (!defined($blob) || $blob ne $oid); # continue;
+ @$res = ($smsg->{num}, $smsg->{mid});
+ 0; # done
+}
+
sub num_mid0_for_oid {
my ($self, $oid, $mid) = @_;
- my ($num, $mid0);
+ my $res = [];
$self->begin_lazy;
- each_by_mid($self, $mid, ['ddd'], sub {
- my ($smsg) = @_;
- my $blob = $smsg->{blob};
- return 1 if (!defined($blob) || $blob ne $oid); # continue;
- ($num, $mid0) = ($smsg->{num}, $smsg->{mid});
- 0; # done
- });
- ($num, $mid0);
+ each_by_mid($self, $mid, ['ddd'], \&_num_mid0_for_oid, $oid, $res);
+ @$res, # ($num, $mid0);
}
sub create_tables {
next prev parent reply other threads:[~2020-07-17 6:31 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-17 6:31 [PATCH 0/8] indexing cleanup and code reduction Eric Wong
2020-07-17 6:31 ` [PATCH 1/8] v2: use v5.10.1, parent.pm, drop warnings Eric Wong
2020-07-17 6:31 ` [PATCH 2/8] drop binmode usage Eric Wong
2020-07-17 6:31 ` [PATCH 3/8] import: use common capitalization for filtering headers Eric Wong
2020-07-17 6:31 ` [PATCH 4/8] with_umask: pass args to callback Eric Wong
2020-07-17 6:31 ` Eric Wong [this message]
2020-07-17 6:31 ` [PATCH 6/8] overidx: favor non-OO sub dispatch for internal subs Eric Wong
2020-07-17 6:31 ` [PATCH 7/8] searchidx: use v5.10.1, parent.pm, drop warnings Eric Wong
2020-07-17 6:31 ` [PATCH 8/8] search: simplify unindexing Eric Wong
2020-07-17 7:25 ` [9/8 PATCH] v2writable: git_hash_raw: avoid $TMPDIR write Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200717063155.3734-6-e@yhbt.net \
--to=e@yhbt.net \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).