unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] lei_store: keywords => vmd (volatile metadata), prepare for labels
Date: Wed, 17 Mar 2021 15:39:22 +0600	[thread overview]
Message-ID: <20210317093922.2125-1-e@80x24.org> (raw)

Since keywords and mailboxes (AKA labels) are separate things in
JMAP; and only keywords can map reliably to Maildir and mbox;
we'll keep them separate in our internal data representations,
too.

I initially wanted to call this just "meta" for "metadata", but
that might be confused with our mailing list name.  "metadata"
is already used in Xapian's own API, to add another layer of
confusion.

"tags" was also considered, but probably confusing to notmuch
users since our "labels" are analogous to "tags" in notmuch,
and notmuch doesn't seem to cover "keywords" separately...

So "vmd" it is, since we haven't used this particular
three-letter-abbreviation anywhere before; and "volatile" seems
like a good description of this metadata since everything else
up to this point has been mostly WORM (write-once, read-many).
---
 Documentation/public-inbox-glossary.pod | 13 ++++-
 lib/PublicInbox/LeiImport.pm            |  6 +--
 lib/PublicInbox/LeiStore.pm             | 30 ++++++------
 lib/PublicInbox/LeiToMail.pm            |  2 +-
 lib/PublicInbox/SearchIdx.pm            | 65 ++++++++++++++++---------
 t/lei_store.t                           | 28 +++++------
 6 files changed, 85 insertions(+), 59 deletions(-)

diff --git a/Documentation/public-inbox-glossary.pod b/Documentation/public-inbox-glossary.pod
index 61e1e9f8..10b3f9d6 100644
--- a/Documentation/public-inbox-glossary.pod
+++ b/Documentation/public-inbox-glossary.pod
@@ -69,8 +69,8 @@ L<public-inbox-nntpd(1)> or L<public-inbox-imapd(1)>
 
 Private, per-message keywords or flags as described in RFC 8621
 section 10.4.  These are conveyed in the C<Status:> and
-C<X-Status:> headers for L<mbox(5)>, as IMAP FLAGS (RFC 3501 section 2.3.2),
-or Maildir info flags.
+C<X-Status:> headers for L<mbox(5)>, as system IMAP FLAGS
+(RFC 3501 section 2.3.2), or Maildir info flags.
 
 L<public-inbox-watch(1)> ignores drafts and trashed (deleted)
 messages.  L<lei-import(1)> ignores trashed (deleted) messages,
@@ -83,6 +83,15 @@ the same email into one or more virtual folders for
 ease-of-filtering.  This is NOT tied to public-inbox names, as
 messages stored by lei may not be public.
 
+These are similar in spirit to arbitrary freeform "tags"
+in mail software such as L<notmuch(1)> and non-system IMAP FLAGS.
+
+=item volatile metadata (VMD)
+
+For L<lei(1)> users only, this refers to the combination of
+keywords and labels which are subject to frequent change
+independently of immutable message content.
+
 =item IMAP INTERNALDATE, JMAP receivedAt, rt: search prefix
 
 The first valid timestamp value of Received: headers (top first).
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 65e37371..137c22fc 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -12,7 +12,7 @@ use PublicInbox::PktOp qw(pkt_do);
 sub _import_eml { # MboxReader callback
 	my ($eml, $sto, $set_kw) = @_;
 	$sto->ipc_do('set_eml', $eml, $set_kw ?
-		@{PublicInbox::MboxReader::mbox_keywords($eml)} : ());
+		{ kw => PublicInbox::MboxReader::mbox_keywords($eml) } : ());
 }
 
 sub import_done_wait { # dwaitpid callback
@@ -150,12 +150,12 @@ error reading $input: $!
 
 sub _import_maildir { # maildir_each_eml cb
 	my ($f, $kw, $eml, $sto, $set_kw) = @_;
-	$sto->ipc_do('set_eml', $eml, $set_kw ? @$kw : ());
+	$sto->ipc_do('set_eml', $eml, $set_kw ? { kw => $kw }: ());
 }
 
 sub _import_net { # imap_each, nntp_each cb
 	my ($url, $uid, $kw, $eml, $sto, $set_kw) = @_;
-	$sto->ipc_do('set_eml', $eml, $set_kw ? @$kw : ());
+	$sto->ipc_do('set_eml', $eml, $set_kw ? { kw => $kw } : ());
 }
 
 sub import_path_url {
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 771443db..ae263914 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -129,38 +129,38 @@ sub _docids_for ($$) {
 	sort { $a <=> $b } values %docids;
 }
 
-sub set_eml_keywords {
-	my ($self, $eml, @kw) = @_;
+sub set_eml_vmd {
+	my ($self, $eml, $vmd) = @_;
 	my $eidx = eidx_init($self);
 	my @docids = _docids_for($self, $eml);
 	for my $docid (@docids) {
-		$eidx->idx_shard($docid)->ipc_do('set_keywords', $docid, @kw);
+		$eidx->idx_shard($docid)->ipc_do('set_vmd', $docid, $vmd);
 	}
 	\@docids;
 }
 
-sub add_eml_keywords {
-	my ($self, $eml, @kw) = @_;
+sub add_eml_vmd {
+	my ($self, $eml, $vmd) = @_;
 	my $eidx = eidx_init($self);
 	my @docids = _docids_for($self, $eml);
 	for my $docid (@docids) {
-		$eidx->idx_shard($docid)->ipc_do('add_keywords', $docid, @kw);
+		$eidx->idx_shard($docid)->ipc_do('add_vmd', $docid, $vmd);
 	}
 	\@docids;
 }
 
-sub remove_eml_keywords {
-	my ($self, $eml, @kw) = @_;
+sub remove_eml_vmd {
+	my ($self, $eml, $vmd) = @_;
 	my $eidx = eidx_init($self);
 	my @docids = _docids_for($self, $eml);
 	for my $docid (@docids) {
-		$eidx->idx_shard($docid)->ipc_do('remove_keywords', $docid, @kw)
+		$eidx->idx_shard($docid)->ipc_do('remove_vmd', $docid, $vmd);
 	}
 	\@docids;
 }
 
 sub add_eml {
-	my ($self, $eml, @kw) = @_;
+	my ($self, $eml, $vmd) = @_;
 	my $im = $self->importer; # may create new epoch
 	my $eidx = eidx_init($self); # writes ALL.git/objects/info/alternates
 	my $oidx = $eidx->{oidx};
@@ -174,7 +174,7 @@ sub add_eml {
 			$oidx->add_xref3($docid, -1, $smsg->{blob}, '.');
 			# add_eidx_info for List-Id
 			$idx->ipc_do('add_eidx_info', $docid, '.', $eml);
-			$idx->ipc_do('add_keywords', $docid, @kw) if @kw;
+			$idx->ipc_do('add_vmd', $docid, $vmd) if $vmd;
 		}
 		\@docids;
 	} else {
@@ -183,14 +183,14 @@ sub add_eml {
 		$oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
 		my $idx = $eidx->idx_shard($smsg->{num});
 		$idx->index_eml($eml, $smsg);
-		$idx->ipc_do('add_keywords', $smsg->{num}, @kw) if @kw;
+		$idx->ipc_do('add_vmd', $smsg->{num}, $vmd ) if $vmd;
 		$smsg;
 	}
 }
 
 sub set_eml {
-	my ($self, $eml, @kw) = @_;
-	add_eml($self, $eml, @kw) // set_eml_keywords($self, $eml, @kw);
+	my ($self, $eml, $vmd) = @_;
+	add_eml($self, $eml, $vmd) // set_eml_vmd($self, $eml, $vmd);
 }
 
 sub add_eml_maybe {
@@ -207,7 +207,7 @@ sub set_xkw {
 	if ($lxs->xids_for($eml, 1)) { # is it in a local external?
 		# TODO: index keywords only
 	} else {
-		set_eml($self, $eml, @$kw);
+		set_eml($self, $eml, { kw => $kw });
 	}
 }
 
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 27e1338f..5cea73e1 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -277,7 +277,7 @@ sub update_kw_maybe ($$$$) {
 	return unless $lse;
 	my $x = $lse->kw_changed($eml, $kw);
 	if ($x) {
-		$lei->{sto}->ipc_do('set_eml', $eml, @$kw);
+		$lei->{sto}->ipc_do('set_eml', $eml, { kw => $kw });
 	} elsif (!defined($x)) {
 		$lei->{sto}->ipc_do('set_xkw', $eml, $kw);
 	}
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 772f5a64..e2a1a678 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -528,44 +528,61 @@ sub remove_eidx_info {
 	$self->{xdb}->replace_document($docid, $doc);
 }
 
-sub set_keywords {
-	my ($self, $docid, @kw) = @_;
+my @VMD_MAP = (kw => 'K', label => 'L');
+
+sub set_vmd {
+	my ($self, $docid, $vmd) = @_;
 	begin_txn_lazy($self);
 	my $doc = _get_doc($self, $docid) or return;
-	my %keep = map { $_ => 1 } @kw;
-	my %add = %keep;
-	my @rm;
-	my $end = $doc->termlist_end;
-	for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) {
-		$cur->skip_to('K');
-		last if $cur == $end;
-		my $kw = $cur->get_termname;
-		$kw =~ s/\AK//s or next;
-		$keep{$kw} ? delete($add{$kw}) : push(@rm, $kw);
+	my ($end, @rm, @add);
+	my @x = @VMD_MAP;
+	while (my ($field, $pfx) = splice(@x, 0, 2)) {
+		my $set = $vmd->{$field} // next;
+		my %keep = map { $_ => 1 } @$set;
+		my %add = %keep;
+		$end //= $doc->termlist_end;
+		for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) {
+			$cur->skip_to($pfx);
+			last if $cur == $end;
+			my $v = $cur->get_termname;
+			$v =~ s/\A$pfx//s or next;
+			$keep{$v} ? delete($add{$v}) : push(@rm, $pfx.$v);
+		}
+		push(@add, map { $pfx.$_ } keys %add);
 	}
-	return unless (scalar(@rm) + scalar(keys %add));
-	$doc->remove_term('K'.$_) for @rm;
-	$doc->add_boolean_term('K'.$_) for (keys %add);
+	return unless scalar(@rm) || scalar(@add);
+	$doc->remove_term($_) for @rm;
+	$doc->add_boolean_term($_) for @add;
 	$self->{xdb}->replace_document($docid, $doc);
 }
 
-sub add_keywords {
-	my ($self, $docid, @kw) = @_;
+sub add_vmd {
+	my ($self, $docid, $vmd) = @_;
 	begin_txn_lazy($self);
 	my $doc = _get_doc($self, $docid) or return;
-	$doc->add_boolean_term('K'.$_) for @kw;
+	my @x = @VMD_MAP;
+	while (my ($field, $pfx) = splice(@x, 0, 2)) {
+		my $add = $vmd->{$field} // next;
+		$doc->add_boolean_term($pfx . $_) for @$add;
+	}
 	$self->{xdb}->replace_document($docid, $doc);
 }
 
-sub remove_keywords {
-	my ($self, $docid, @kw) = @_;
+sub remove_vmd {
+	my ($self, $docid, $vmd) = @_;
 	begin_txn_lazy($self);
 	my $doc = _get_doc($self, $docid) or return;
 	my $replace;
-	eval {
-		$doc->remove_term('K'.$_);
-		$replace = 1
-	} for @kw;
+	my @x = @VMD_MAP;
+	while (my ($field, $pfx) = splice(@x, 0, 2)) {
+		my $rm = $vmd->{$field} // next;
+		for (@$rm) {
+			eval {
+				$doc->remove_term($pfx . $_);
+				$replace = 1;
+			};
+		}
+	}
 	$self->{xdb}->replace_document($docid, $doc) if $replace;
 }
 
diff --git a/t/lei_store.t b/t/lei_store.t
index d270e1f6..024ff527 100644
--- a/t/lei_store.t
+++ b/t/lei_store.t
@@ -36,37 +36,37 @@ $sto->done;
 
 for my $parallel (0, 1) {
 	$sto->{priv_eidx}->{parallel} = $parallel;
-	my $docids = $sto->set_eml_keywords($eml, qw(seen draft));
+	my $docids = $sto->set_eml_vmd($eml, { kw => [ qw(seen draft) ] });
 	is(scalar @$docids, 1, 'set keywords on one doc');
 	$sto->done;
 	my @kw = $sto->search->msg_keywords($docids->[0]);
 	is_deeply(\@kw, [qw(draft seen)], 'kw matches');
 
-	$docids = $sto->add_eml_keywords($eml, qw(seen draft));
+	$docids = $sto->add_eml_vmd($eml, {kw => [qw(seen draft)]});
 	$sto->done;
 	is(scalar @$docids, 1, 'idempotently added keywords to doc');
 	@kw = $sto->search->msg_keywords($docids->[0]);
 	is_deeply(\@kw, [qw(draft seen)], 'kw matches after noop');
 
-	$docids = $sto->remove_eml_keywords($eml, qw(seen draft));
+	$docids = $sto->remove_eml_vmd($eml, {kw => [qw(seen draft)]});
 	is(scalar @$docids, 1, 'removed from one doc');
 	$sto->done;
 	@kw = $sto->search->msg_keywords($docids->[0]);
 	is_deeply(\@kw, [], 'kw matches after remove');
 
-	$docids = $sto->remove_eml_keywords($eml, qw(answered));
+	$docids = $sto->remove_eml_vmd($eml, {kw=> [qw(answered)]});
 	is(scalar @$docids, 1, 'removed from one doc (idempotently)');
 	$sto->done;
 	@kw = $sto->search->msg_keywords($docids->[0]);
 	is_deeply(\@kw, [], 'kw matches after remove (idempotent)');
 
-	$docids = $sto->add_eml_keywords($eml, qw(answered));
+	$docids = $sto->add_eml_vmd($eml, {kw => [qw(answered)]});
 	is(scalar @$docids, 1, 'added to empty doc');
 	$sto->done;
 	@kw = $sto->search->msg_keywords($docids->[0]);
 	is_deeply(\@kw, ['answered'], 'kw matches after add');
 
-	$docids = $sto->set_eml_keywords($eml);
+	$docids = $sto->set_eml_vmd($eml, { kw => [] });
 	is(scalar @$docids, 1, 'set to clobber');
 	$sto->done;
 	@kw = $sto->search->msg_keywords($docids->[0]);
@@ -74,11 +74,11 @@ for my $parallel (0, 1) {
 
 	my $set = eml_load('t/plack-qp.eml');
 	$set->header_set('Message-ID', "<set\@$parallel>");
-	my $ret = $sto->set_eml($set, 'seen');
+	my $ret = $sto->set_eml($set, { kw => [ 'seen' ] });
 	is(ref $ret, 'PublicInbox::Smsg', 'initial returns smsg');
-	my $ids = $sto->set_eml($set, qw(seen));
+	my $ids = $sto->set_eml($set, { kw => [ 'seen' ] });
 	is_deeply($ids, [ $ret->{num} ], 'set_eml idempotent');
-	$ids = $sto->set_eml($set, qw(seen answered));
+	$ids = $sto->set_eml($set, { kw => [ qw(seen answered) ] });
 	is_deeply($ids, [ $ret->{num} ], 'set_eml to change kw');
 	$sto->done;
 	@kw = $sto->search->msg_keywords($ids->[0]);
@@ -91,23 +91,23 @@ SKIP: {
 	$eml->header_set('Message-ID', '<ipc-test@example>');
 	my $pid = $sto->ipc_worker_spawn('lei-store');
 	ok($pid > 0, 'got a worker');
-	my $smsg = $sto->ipc_do('set_eml', $eml, qw(seen));
+	my $smsg = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] });
 	is(ref($smsg), 'PublicInbox::Smsg', 'set_eml works over ipc');
-	my $ids = $sto->ipc_do('set_eml', $eml, qw(seen));
+	my $ids = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] });
 	is_deeply($ids, [ $smsg->{num} ], 'docid returned');
 
 	$eml->header_set('Message-ID');
-	my $no_mid = $sto->ipc_do('set_eml', $eml, qw(seen));
+	my $no_mid = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] });
 	my $wait = $sto->ipc_do('done');
 	my @kw = $sto->search->msg_keywords($no_mid->{num});
 	is_deeply(\@kw, [qw(seen)], 'ipc set changed kw');
 
 	is(ref($smsg), 'PublicInbox::Smsg', 'no mid works ipc');
-	$ids = $sto->ipc_do('set_eml', $eml, qw(seen));
+	$ids = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] });
 	is_deeply($ids, [ $no_mid->{num} ], 'docid returned w/o mid w/ ipc');
 	$sto->ipc_do('done');
 	$sto->ipc_worker_stop;
-	$ids = $sto->ipc_do('set_eml', $eml, qw(seen answered));
+	$ids = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen answered) ] });
 	is_deeply($ids, [ $no_mid->{num} ], 'docid returned w/o mid w/o ipc');
 	$wait = $sto->ipc_do('done');
 

                 reply	other threads:[~2021-03-17  9:39 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210317093922.2125-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).