unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] lei import: import IMAP flag changes from old messages
Date: Sun, 30 May 2021 11:45:44 +0000	[thread overview]
Message-ID: <20210530114544.24571-1-e@80x24.org> (raw)

This makes "lei import" behavior with IMAP folders more
consistent with that with Maildir.

Opening IMAP folders read-write with "SELECT" (instead of
read-only with "EXAMINE") was necessary, since it lets an IMAP
server communicate to us as to whether or not it's worth
refetching IMAP flags of previously imported messages.

Fetching UID+FLAGS only is one of the fastest IMAP operations
with dovecot, our -imapd and presumably other common IMAP servers.
It is issued by common MUAs such as mutt after every SELECT.

Users may now rely on "lei import" exclusively to merge mail and
keywords into lei/store, and "lei export-kw" to propagate
keyword changes back to IMAP servers.

A sticks-and-stones workflow for personal mailboxes is currently:

	lei import imaps://$MY_PERSONAL_INBOX
	lei q --mua=$MUA -o /tmp/results SEARCH TERMS...
	# do stuff from within $MUA to /tmp/results
	lei import /tmp/results # read keyword changes from MUA
	lei export-kw imaps://$MY_PERSONAL_INBOX
	# repeat when new stuff shows up in personal inbox

The next goal is to automate repeated imports + export-kw
commands with with inotify and IMAP IDLE.
---
 lib/PublicInbox/LeiImport.pm   | 18 ++++++++-
 lib/PublicInbox/LeiMailSync.pm | 21 +++++++----
 lib/PublicInbox/NetReader.pm   | 68 +++++++++++++++++++++++++++++-----
 3 files changed, 88 insertions(+), 19 deletions(-)

diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 01e6c93c..f9a46ec5 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -45,7 +45,16 @@ sub input_net_cb { # imap_each / nntp_each
 	my ($uri, $uid, $kw, $eml, $self) = @_;
 	my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
 	$vmd->{sync_info} = [ $$uri, $uid ] if $self->{-mail_sync};
-	$self->input_eml_cb($eml, $vmd);
+	if (defined $eml) {
+		$self->input_eml_cb($eml, $vmd);
+	} elsif ($vmd) { # old message, kw only
+		my $oid = $self->{-lms_ro}->imap_oid2($uri, $uid) // return;
+		my @docids = $self->{lse}->over->blob_exists($oid) or return;
+		my $lei = $self->{lei};
+		$lei->qerr("# $oid => @$kw\n") if $lei->{opt}->{verbose};
+		$self->{lei}->{sto}->ipc_do('set_eml_vmd', undef,
+						$vmd, \@docids);
+	}
 }
 
 sub do_import_index ($$@) {
@@ -65,7 +74,12 @@ sub do_import_index ($$@) {
 		# $j = $net->net_concurrency($j); TODO
 		if ($lei->{opt}->{incremental} // 1) {
 			$net->{incremental} = 1;
-			$net->{-lms_ro} = $lei->_lei_store->search->lms // 0;
+			$net->{-lms_ro} = $sto->search->lms // 0;
+			if ($self->{-import_kw}) {
+				$net->{each_old} = 1;
+				$self->{-lms_ro} = $net->{-lms_ro};
+				$self->{lse} = $sto->search;
+			}
 		}
 	} else {
 		my $nproc = $self->detect_nproc;
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index c7f78239..36cd564c 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -361,6 +361,17 @@ sub forget_folder {
 	$dbh->do('DELETE FROM folders WHERE fid = ?', undef, $fid);
 }
 
+sub imap_oid2 ($$$) {
+	my ($self, $uri, $uid) = @_; # $uri MUST have UIDVALIDITY
+	my $fid = $self->{fmap}->{"$uri"} //= fid_for($self, "$uri") // return;
+	my $sth = $self->{dbh}->prepare_cached(<<EOM, undef, 1);
+SELECT oidbin FROM blob2num WHERE fid = ? AND uid = ?
+EOM
+	$sth->execute($fid, $uid);
+	my ($oidbin) = $sth->fetchrow_array;
+	$oidbin ? unpack('H*', $oidbin) : undef;
+}
+
 sub imap_oid {
 	my ($self, $lei, $uid_uri) = @_;
 	my $mailbox_uri = $uid_uri->clone;
@@ -373,16 +384,10 @@ sub imap_oid {
 		}
 		$lei->qerr(@{$err->{qerr}}) if $err->{qerr};
 	}
-	my $fid = $self->{fmap}->{$folders->[0]} //=
-		fid_for($self, $folders->[0]) // return;
-	my $sth = $self->{dbh}->prepare_cached(<<EOM, undef, 1);
-SELECT oidbin FROM blob2num WHERE fid = ? AND uid = ?
-EOM
-	$sth->execute($fid, $uid_uri->uid);
-	my ($oidbin) = $sth->fetchrow_array;
-	$oidbin ? unpack('H*', $oidbin) : undef;
+	imap_oid2($self, $folders->[0], $uid_uri->uid);
 }
 
+
 # FIXME: something with "lei <up|q>" is causing uncommitted transaction
 # warnings, not sure what...
 sub DESTROY {
diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm
index 54c6b082..b97444fd 100644
--- a/lib/PublicInbox/NetReader.pm
+++ b/lib/PublicInbox/NetReader.pm
@@ -396,10 +396,8 @@ sub errors {
 	undef;
 }
 
-sub _imap_do_msg ($$$$$) {
-	my ($self, $uri, $uid, $raw, $flags) = @_;
-	# our target audience expects LF-only, save storage
-	$$raw =~ s/\r\n/\n/sg;
+sub flags2kw ($$$$) {
+	my ($self, $uri, $uid, $flags) = @_;
 	my $kw = [];
 	for my $f (split(/ /, $flags)) {
 		if (my $k = $IMAPflags2kw{$f}) {
@@ -412,6 +410,14 @@ sub _imap_do_msg ($$$$$) {
 		}
 	}
 	@$kw = sort @$kw; # for all UI/UX purposes
+	$kw;
+}
+
+sub _imap_do_msg ($$$$$) {
+	my ($self, $uri, $uid, $raw, $flags) = @_;
+	# our target audience expects LF-only, save storage
+	$$raw =~ s/\r\n/\n/sg;
+	my $kw = flags2kw($self, $uri, $uid, $flags) // return;
 	my ($eml_cb, @args) = @{$self->{eml_each}};
 	$eml_cb->($uri, $uid, $kw, PublicInbox::Eml->new($raw), @args);
 }
@@ -447,17 +453,56 @@ sub itrk_last ($$;$$) {
 	($itrk, $l_uid, $l_uidval //= $r_uidval);
 }
 
+# import flags of already-seen messages
+sub each_old_flags ($$$$) {
+	my ($self, $mic, $uri, $l_uid) = @_;
+	$l_uid ||= 1;
+	my $sec = uri_section($uri);
+	my $bs = $self->{imap_opt}->{$sec}->{batch_size} // 10000;
+	my ($eml_cb, @args) = @{$self->{eml_each}};
+	for (my $n = 1; $n <= $l_uid; $n += $bs) {
+		my $end = $n + $bs;
+		$end = $l_uid if $end > $l_uid;
+		my $r = $mic->fetch_hash("$n:$end", 'FLAGS');
+		if (!$r) {
+			return if $!{EINTR} && $self->{quit};
+			return "E: $uri UID FETCH $n:$end error: $!";
+		}
+		while (my ($uid, $per_uid) = each %$r) {
+			my $kw = flags2kw($self, $uri, $uid, $per_uid->{FLAGS})
+				// next;
+			$eml_cb->($uri, $uid, $kw, undef, @args);
+		}
+	}
+}
+
+# returns true if PERMANENTFLAGS indicates FLAGS of already imported
+# messages are meaningful
+sub perm_fl_ok ($) {
+	my ($perm_fl) = @_;
+	return if !defined($perm_fl);
+	for my $f (split(/[ \t]+/, $perm_fl)) {
+		return 1 if $IMAPflags2kw{$f};
+	}
+	undef;
+}
+
 sub _imap_fetch_all ($$$) {
 	my ($self, $mic, $orig_uri) = @_;
 	my $sec = uri_section($orig_uri);
 	my $mbx = $orig_uri->mailbox;
 	$mic->Clear(1); # trim results history
-	$mic->examine($mbx) or return "E: EXAMINE $mbx ($sec) failed: $!";
-	my ($r_uidval, $r_uidnext);
+
+	# we need to check for mailbox writability to see if we care about
+	# FLAGS from already-imported messages.
+	my $cmd = $self->{each_old} ? 'select' : 'examine';
+	$mic->$cmd($mbx) or return "E: \U$cmd\E $mbx ($sec) failed: $!";
+
+	my ($r_uidval, $r_uidnext, $perm_fl);
 	for ($mic->Results) {
+		/^\* OK \[PERMANENTFLAGS \(([^\)]*)\)\].*/ and $perm_fl = $1;
 		/^\* OK \[UIDVALIDITY ([0-9]+)\].*/ and $r_uidval = $1;
 		/^\* OK \[UIDNEXT ([0-9]+)\].*/ and $r_uidnext = $1;
-		last if $r_uidval && $r_uidnext;
 	}
 	$r_uidval //= $mic->uidvalidity($mbx) //
 		return "E: $orig_uri cannot get UIDVALIDITY";
@@ -486,6 +531,13 @@ EOF
 E: $uri local UID exceeds remote ($l_uid > $r_uid)
 E: $uri strangely, UIDVALIDLITY matches ($l_uidval)
 EOF
+	$mic->Uid(1); # the default, we hope
+	my $err;
+	if (!defined($single_uid) && $self->{each_old} &&
+				perm_fl_ok($perm_fl)) {
+		$err = each_old_flags($self, $mic, $uri, $l_uid);
+		return $err if $err;
+	}
 	return if $l_uid >= $r_uid; # nothing to do
 	$l_uid ||= 1;
 	my ($mod, $shard) = @{$self->{shard_info} // []};
@@ -493,13 +545,11 @@ EOF
 		my $m = $mod ? " [(UID % $mod) == $shard]" : '';
 		warn "# $uri fetching UID $l_uid:$r_uid$m\n";
 	}
-	$mic->Uid(1); # the default, we hope
 	my $bs = $self->{imap_opt}->{$sec}->{batch_size} // 1;
 	my $req = $mic->imap4rev1 ? 'BODY.PEEK[]' : 'RFC822.PEEK';
 	my $key = $req;
 	$key =~ s/\.PEEK//;
 	my ($uids, $batch);
-	my $err;
 	do {
 		# I wish "UID FETCH $START:*" could work, but:
 		# 1) servers do not need to return results in any order

                 reply	other threads:[~2021-05-30 11:45 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210530114544.24571-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).