unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] net_reader: hoist out _imap_fetch_bodies
@ 2021-10-08 22:00 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2021-10-08 22:00 UTC (permalink / raw)
  To: meta

We'll be supporting pipelining in a future commit, since
Tor is too slow and increasing batch size can use too much
memory.
---
 lib/PublicInbox/NetReader.pm | 68 +++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 29 deletions(-)

diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm
index 2b74af414215..4da19ab969b5 100644
--- a/lib/PublicInbox/NetReader.pm
+++ b/lib/PublicInbox/NetReader.pm
@@ -539,6 +539,41 @@ sub perm_fl_ok ($) {
 # may be overridden in NetWriter or Watch
 sub folder_select { $_[0]->{each_old} ? 'select' : 'examine' }
 
+sub _imap_fetch_bodies ($$$$) {
+	my ($self, $mic, $uri, $uids) = @_;
+	my $req = $mic->imap4rev1 ? 'BODY.PEEK[]' : 'RFC822.PEEK';
+	my $key = $req;
+	$key =~ s/\.PEEK//;
+	my $sec = uri_section($uri);
+	my $mbx = $uri->mailbox;
+	my $bs = $self->{cfg_opt}->{$sec}->{batch_size} // 1;
+	my ($last_uid, $err);
+	my $use_fl = $self->{-use_fl};
+
+	while (scalar @$uids) {
+		my @batch = splice(@$uids, 0, $bs);
+		my $batch = join(',', @batch);
+		local $0 = "UID:$batch $mbx $sec";
+		my $r = $mic->fetch_hash($batch, $req, 'FLAGS');
+		unless ($r) { # network error?
+			last if $!{EINTR} && $self->{quit};
+			$err = "E: $uri UID FETCH $batch error: $!";
+			last;
+		}
+		for my $uid (@batch) {
+			# messages get deleted, so holes appear
+			my $per_uid = delete $r->{$uid} // next;
+			my $raw = delete($per_uid->{$key}) // next;
+			my $fl = $use_fl ? $per_uid->{FLAGS} : undef;
+			_imap_do_msg($self, $uri, $uid, \$raw, $fl);
+			$last_uid = $uid;
+			last if $self->{quit};
+		}
+		last if $self->{quit};
+	}
+	($last_uid, $err);
+}
+
 sub _imap_fetch_all ($$$) {
 	my ($self, $mic, $orig_uri) = @_;
 	my $sec = uri_section($orig_uri);
@@ -586,6 +621,7 @@ EOF
 	$mic->Uid(1); # the default, we hope
 	my $err;
 	my $use_fl = perm_fl_ok($perm_fl);
+	local $self->{-use_fl} = $use_fl;
 	if (!defined($single_uid) && $self->{each_old} && $use_fl) {
 		$err = each_old_flags($self, $mic, $uri, $l_uid);
 		return $err if $err;
@@ -597,15 +633,12 @@ EOF
 		my $m = $mod ? " [(UID % $mod) == $shard]" : '';
 		warn "# $uri fetching UID $l_uid:$r_uid$m\n";
 	}
-	my $bs = $self->{cfg_opt}->{$sec}->{batch_size} // 1;
-	my $req = $mic->imap4rev1 ? 'BODY.PEEK[]' : 'RFC822.PEEK';
-	my $key = $req;
-	$key =~ s/\.PEEK//;
-	my ($uids, $batch);
+	my $fetch_cb = \&_imap_fetch_bodies;
 	do {
 		# I wish "UID FETCH $START:*" could work, but:
 		# 1) servers do not need to return results in any order
 		# 2) Mail::IMAPClient doesn't offer a streaming API
+		my $uids;
 		if (defined $single_uid) {
 			$uids = [ $single_uid ];
 		} elsif (!($uids = $mic->search("UID $l_uid:*"))) {
@@ -623,31 +656,8 @@ EOF
 		return if $uids->[0] < $l_uid;
 
 		$l_uid = $uids->[-1] + 1; # for next search
-		my $last_uid;
-		my $n = $self->{max_batch};
-
 		@$uids = grep { ($_ % $mod) == $shard } @$uids if $mod;
-		while (scalar @$uids) {
-			my @batch = splice(@$uids, 0, $bs);
-			$batch = join(',', @batch);
-			local $0 = "UID:$batch $mbx $sec";
-			my $r = $mic->fetch_hash($batch, $req, 'FLAGS');
-			unless ($r) { # network error?
-				last if $!{EINTR} && $self->{quit};
-				$err = "E: $uri UID FETCH $batch error: $!";
-				last;
-			}
-			for my $uid (@batch) {
-				# messages get deleted, so holes appear
-				my $per_uid = delete $r->{$uid} // next;
-				my $raw = delete($per_uid->{$key}) // next;
-				my $fl = $use_fl ? $per_uid->{FLAGS} : undef;
-				_imap_do_msg($self, $uri, $uid, \$raw, $fl);
-				$last_uid = $uid;
-				last if $self->{quit};
-			}
-			last if $self->{quit};
-		}
+		(my $last_uid, $err) = $fetch_cb->($self, $mic, $uri, $uids);
 		run_commit_cb($self);
 		$itrk->update_last($r_uidval, $last_uid) if $itrk;
 	} until ($err || $self->{quit} || defined($single_uid));

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2021-10-08 22:00 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-08 22:00 [PATCH] net_reader: hoist out _imap_fetch_bodies Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).