unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] lei import: support adding keywords and labels on import
@ 2021-04-23 11:22 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2021-04-23 11:22 UTC (permalink / raw)
  To: meta

This saves some work and makes it easier to set volatile
metadata on a message at import time.
---
 lib/PublicInbox/LeiImport.pm |  7 +++++
 lib/PublicInbox/LeiInput.pm  | 54 ++++++++++++++++++++++++++++++++++
 lib/PublicInbox/LeiTag.pm    | 56 ++----------------------------------
 lib/PublicInbox/SearchIdx.pm | 41 +++++++++++++++-----------
 t/lei-import.t               | 10 +++++++
 5 files changed, 98 insertions(+), 70 deletions(-)

diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index accf08f5..e3c756e8 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -12,6 +12,10 @@ use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
 sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
 	my ($self, $eml, $vmd) = @_;
 	my $xoids = $self->{lei}->{ale}->xoids_for($eml);
+	if (my $all_vmd = $self->{all_vmd}) {
+		$vmd //= {};
+		@$vmd{keys %$all_vmd} = values %$all_vmd;
+	}
 	$self->{lei}->{sto}->ipc_do('set_eml', $eml, $vmd, $xoids);
 }
 
@@ -53,6 +57,9 @@ sub lei_import { # the main "lei import" method
 	$sto->write_prepare($lei);
 	my $self = bless {}, __PACKAGE__;
 	$self->{-import_kw} = $lei->{opt}->{kw} // 1;
+	my $vmd_mod = $self->vmd_mod_extract(\@inputs);
+	return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err};
+	$self->{all_vmd} = $vmd_mod if scalar keys %$vmd_mod;
 	$self->prepare_inputs($lei, \@inputs) or return;
 	$lei->ale; # initialize for workers to read
 	my $j = $lei->{opt}->{jobs} // scalar(@{$self->{inputs}}) || 1;
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index e416d3ed..de60a076 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -7,6 +7,38 @@ use strict;
 use v5.10.1;
 use PublicInbox::DS;
 
+# JMAP RFC 8621 4.1.1
+# https://www.iana.org/assignments/imap-jmap-keywords/imap-jmap-keywords.xhtml
+our @KW = (qw(seen answered flagged draft), # widely-compatible
+	qw(forwarded), # IMAP + Maildir
+	qw(phishing junk notjunk)); # rarely supported
+
+# note: RFC 8621 states "Users may add arbitrary keywords to an Email",
+# but is it good idea?  Stick to the system and reserved ones, for now.
+# The widely-compatible ones map to IMAP system flags, Maildir flags
+# and mbox Status/X-Status headers.
+my %KW = map { $_ => 1 } @KW;
+my $L_MAX = 244; # Xapian term limit - length('L')
+
+# RFC 8621, sec 2 (Mailboxes) a "label" for us is a JMAP Mailbox "name"
+# "Servers MAY reject names that violate server policy"
+my %ERR = (
+	L => sub {
+		my ($label) = @_;
+		length($label) >= $L_MAX and
+			return "`$label' too long (must be <= $L_MAX)";
+		$label =~ m{\A[a-z0-9_](?:[a-z0-9_\-\./\@,]*[a-z0-9])?\z}i ?
+			undef : "`$label' is invalid";
+	},
+	kw => sub {
+		my ($kw) = @_;
+		$KW{$kw} ? undef : <<EOM;
+`$kw' is not one of: `seen', `flagged', `answered', `draft'
+`junk', `notjunk', `phishing' or `forwarded'
+EOM
+	}
+);
+
 sub check_input_format ($;$) {
 	my ($lei, $files) = @_;
 	my $opt_key = 'in-format';
@@ -183,4 +215,26 @@ sub input_only_atfork_child {
 	undef;
 }
 
+# like Getopt::Long, but for +kw:FOO and -kw:FOO to prepare
+# for update_xvmd -> update_vmd
+sub vmd_mod_extract {
+	my $argv = $_[-1];
+	my $vmd_mod = {};
+	my @new_argv;
+	for my $x (@$argv) {
+		if ($x =~ /\A(\+|\-)(kw|L):(.+)\z/) {
+			my ($op, $pfx, $val) = ($1, $2, $3);
+			if (my $err = $ERR{$pfx}->($val)) {
+				push @{$vmd_mod->{err}}, $err;
+			} else { # set "+kw", "+L", "-L", "-kw"
+				push @{$vmd_mod->{$op.$pfx}}, $val;
+			}
+		} else {
+			push @new_argv, $x;
+		}
+	}
+	@$argv = @new_argv;
+	$vmd_mod;
+}
+
 1;
diff --git a/lib/PublicInbox/LeiTag.pm b/lib/PublicInbox/LeiTag.pm
index f019202f..f5791947 100644
--- a/lib/PublicInbox/LeiTag.pm
+++ b/lib/PublicInbox/LeiTag.pm
@@ -7,58 +7,6 @@ use strict;
 use v5.10.1;
 use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
 
-# JMAP RFC 8621 4.1.1
-# https://www.iana.org/assignments/imap-jmap-keywords/imap-jmap-keywords.xhtml
-my @KW = (qw(seen answered flagged draft), # widely-compatible
-	qw(forwarded phishing junk notjunk)); # rarely supported
-# note: RFC 8621 states "Users may add arbitrary keywords to an Email",
-# but is it good idea?  Stick to the system and reserved ones, for now.
-# The widely-compatible ones map to IMAP system flags, Maildir flags
-# and mbox Status/X-Status headers.
-my %KW = map { $_ => 1 } @KW;
-my $L_MAX = 244; # Xapian term limit - length('L')
-
-# RFC 8621, sec 2 (Mailboxes) a "label" for us is a JMAP Mailbox "name"
-# "Servers MAY reject names that violate server policy"
-my %ERR = (
-	L => sub {
-		my ($label) = @_;
-		length($label) >= $L_MAX and
-			return "`$label' too long (must be <= $L_MAX)";
-		$label =~ m{\A[a-z0-9_](?:[a-z0-9_\-\./\@,]*[a-z0-9])?\z}i ?
-			undef : "`$label' is invalid";
-	},
-	kw => sub {
-		my ($kw) = @_;
-		$KW{$kw} ? undef : <<EOM;
-`$kw' is not one of: `seen', `flagged', `answered', `draft'
-`junk', `notjunk', `phishing' or `forwarded'
-EOM
-	}
-);
-
-# like Getopt::Long, but for +kw:FOO and -kw:FOO to prepare
-# for update_xvmd -> update_vmd
-sub vmd_mod_extract {
-	my $argv = $_[-1];
-	my $vmd_mod = {};
-	my @new_argv;
-	for my $x (@$argv) {
-		if ($x =~ /\A(\+|\-)(kw|L):(.+)\z/) {
-			my ($op, $pfx, $val) = ($1, $2, $3);
-			if (my $err = $ERR{$pfx}->($val)) {
-				push @{$vmd_mod->{err}}, $err;
-			} else { # set "+kw", "+L", "-L", "-kw"
-				push @{$vmd_mod->{$op.$pfx}}, $val;
-			}
-		} else {
-			push @new_argv, $x;
-		}
-	}
-	@$argv = @new_argv;
-	$vmd_mod;
-}
-
 sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
 	my ($self, $eml) = @_;
 	if (my $xoids = $self->{lei}->{ale}->xoids_for($eml)) {
@@ -99,7 +47,7 @@ sub lei_tag { # the "lei tag" method
 	$sto->write_prepare($lei);
 	my $self = bless { missing => 0 }, __PACKAGE__;
 	$lei->ale; # refresh and prepare
-	my $vmd_mod = vmd_mod_extract(\@argv);
+	my $vmd_mod = $self->vmd_mod_extract(\@argv);
 	return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err};
 	$self->prepare_inputs($lei, \@argv) or return;
 	grep(defined, @$vmd_mod{qw(+kw +L -L -kw)}) or
@@ -161,7 +109,7 @@ sub _complete_mark_common ($) {
 sub _complete_tag {
 	my ($self, @argv) = @_;
 	my @L = eval { $self->_lei_store->search->all_terms('L') };
-	my @all = ((map { ("+kw:$_", "-kw:$_") } @KW),
+	my @all = ((map { ("+kw:$_", "-kw:$_") } @PublicInbox::LeiInput::KW),
 		(map { ("+L:$_", "-L:$_") } @L));
 	return @all if !@argv;
 	my ($cur, $re) = _complete_mark_common(\@argv);
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index ca1f3588..f066cc92 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -567,16 +567,39 @@ sub set_vmd {
 	$self->{xdb}->replace_document($docid, $doc);
 }
 
+sub apply_vmd_mod ($$) {
+	my ($doc, $vmd_mod) = @_;
+	my $updated = 0;
+	my @x = @VMD_MAP;
+	while (my ($field, $pfx) = splice(@x, 0, 2)) {
+		# field: "label" or "kw"
+		for my $val (@{$vmd_mod->{"-$field"} // []}) {
+			eval {
+				$doc->remove_term($pfx . $val);
+				++$updated;
+			};
+		}
+		for my $val (@{$vmd_mod->{"+$field"} // []}) {
+			$doc->add_boolean_term($pfx . $val);
+			++$updated;
+		}
+	}
+	$updated;
+}
+
 sub add_vmd {
 	my ($self, $docid, $vmd) = @_;
 	begin_txn_lazy($self);
 	my $doc = _get_doc($self, $docid) or return;
 	my @x = @VMD_MAP;
+	my $updated = 0;
 	while (my ($field, $pfx) = splice(@x, 0, 2)) {
 		my $add = $vmd->{$field} // next;
 		$doc->add_boolean_term($pfx . $_) for @$add;
+		$updated += scalar(@$add);
 	}
-	$self->{xdb}->replace_document($docid, $doc);
+	$updated += apply_vmd_mod($doc, $vmd);
+	$self->{xdb}->replace_document($docid, $doc) if $updated;
 }
 
 sub remove_vmd {
@@ -601,21 +624,7 @@ sub update_vmd {
 	my ($self, $docid, $vmd_mod) = @_;
 	begin_txn_lazy($self);
 	my $doc = _get_doc($self, $docid) or return;
-	my $updated = 0;
-	my @x = @VMD_MAP;
-	while (my ($field, $pfx) = splice(@x, 0, 2)) {
-		# field: "label" or "kw"
-		for my $val (@{$vmd_mod->{"-$field"} // []}) {
-			eval {
-				$doc->remove_term($pfx . $val);
-				++$updated;
-			};
-		}
-		for my $val (@{$vmd_mod->{"+$field"} // []}) {
-			$doc->add_boolean_term($pfx . $val);
-			++$updated;
-		}
-	}
+	my $updated = apply_vmd_mod($doc, $vmd_mod);
 	$self->{xdb}->replace_document($docid, $doc) if $updated;
 	$updated;
 }
diff --git a/t/lei-import.t b/t/lei-import.t
index 8635df5a..6e9a853c 100644
--- a/t/lei-import.t
+++ b/t/lei-import.t
@@ -101,6 +101,16 @@ is_deeply($draft_a, $draft_b, 'fake Message-ID lookup') or
 lei_ok('blob', '--mail', $draft_b->[0]->{blob});
 is($lei_out, $eml_str, 'draft retrieved by blob');
 
+
+$eml_str = "Message-ID: <inbox\@example.com>\nSubject: label-this\n\n";
+lei_ok([qw(import -F eml - +kw:seen +L:inbox)],
+	undef, { %$lei_opt, 0 => \$eml_str });
+lei_ok(qw(q m:inbox@example.com));
+$res = json_utf8->decode($lei_out);
+is_deeply($res->[0]->{kw}, ['seen'], 'keyword set');
+is_deeply($res->[0]->{L}, ['inbox'], 'label set');
+
+
 # see t/lei_to_mail.t for "import -F mbox*"
 });
 done_testing;

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2021-04-23 11:22 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-23 11:22 [PATCH] lei import: support adding keywords and labels on import Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).