unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] lei_store: keywords => vmd (volatile metadata), prepare for labels
@ 2021-03-17  9:39 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2021-03-17  9:39 UTC (permalink / raw)
  To: meta

Since keywords and mailboxes (AKA labels) are separate things in
JMAP; and only keywords can map reliably to Maildir and mbox;
we'll keep them separate in our internal data representations,
too.

I initially wanted to call this just "meta" for "metadata", but
that might be confused with our mailing list name.  "metadata"
is already used in Xapian's own API, to add another layer of
confusion.

"tags" was also considered, but probably confusing to notmuch
users since our "labels" are analogous to "tags" in notmuch,
and notmuch doesn't seem to cover "keywords" separately...

So "vmd" it is, since we haven't used this particular
three-letter-abbreviation anywhere before; and "volatile" seems
like a good description of this metadata since everything else
up to this point has been mostly WORM (write-once, read-many).
---
 Documentation/public-inbox-glossary.pod | 13 ++++-
 lib/PublicInbox/LeiImport.pm            |  6 +--
 lib/PublicInbox/LeiStore.pm             | 30 ++++++------
 lib/PublicInbox/LeiToMail.pm            |  2 +-
 lib/PublicInbox/SearchIdx.pm            | 65 ++++++++++++++++---------
 t/lei_store.t                           | 28 +++++------
 6 files changed, 85 insertions(+), 59 deletions(-)

diff --git a/Documentation/public-inbox-glossary.pod b/Documentation/public-inbox-glossary.pod
index 61e1e9f8..10b3f9d6 100644
--- a/Documentation/public-inbox-glossary.pod
+++ b/Documentation/public-inbox-glossary.pod
@@ -69,8 +69,8 @@ L<public-inbox-nntpd(1)> or L<public-inbox-imapd(1)>
 
 Private, per-message keywords or flags as described in RFC 8621
 section 10.4.  These are conveyed in the C<Status:> and
-C<X-Status:> headers for L<mbox(5)>, as IMAP FLAGS (RFC 3501 section 2.3.2),
-or Maildir info flags.
+C<X-Status:> headers for L<mbox(5)>, as system IMAP FLAGS
+(RFC 3501 section 2.3.2), or Maildir info flags.
 
 L<public-inbox-watch(1)> ignores drafts and trashed (deleted)
 messages.  L<lei-import(1)> ignores trashed (deleted) messages,
@@ -83,6 +83,15 @@ the same email into one or more virtual folders for
 ease-of-filtering.  This is NOT tied to public-inbox names, as
 messages stored by lei may not be public.
 
+These are similar in spirit to arbitrary freeform "tags"
+in mail software such as L<notmuch(1)> and non-system IMAP FLAGS.
+
+=item volatile metadata (VMD)
+
+For L<lei(1)> users only, this refers to the combination of
+keywords and labels which are subject to frequent change
+independently of immutable message content.
+
 =item IMAP INTERNALDATE, JMAP receivedAt, rt: search prefix
 
 The first valid timestamp value of Received: headers (top first).
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 65e37371..137c22fc 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -12,7 +12,7 @@ use PublicInbox::PktOp qw(pkt_do);
 sub _import_eml { # MboxReader callback
 	my ($eml, $sto, $set_kw) = @_;
 	$sto->ipc_do('set_eml', $eml, $set_kw ?
-		@{PublicInbox::MboxReader::mbox_keywords($eml)} : ());
+		{ kw => PublicInbox::MboxReader::mbox_keywords($eml) } : ());
 }
 
 sub import_done_wait { # dwaitpid callback
@@ -150,12 +150,12 @@ error reading $input: $!
 
 sub _import_maildir { # maildir_each_eml cb
 	my ($f, $kw, $eml, $sto, $set_kw) = @_;
-	$sto->ipc_do('set_eml', $eml, $set_kw ? @$kw : ());
+	$sto->ipc_do('set_eml', $eml, $set_kw ? { kw => $kw }: ());
 }
 
 sub _import_net { # imap_each, nntp_each cb
 	my ($url, $uid, $kw, $eml, $sto, $set_kw) = @_;
-	$sto->ipc_do('set_eml', $eml, $set_kw ? @$kw : ());
+	$sto->ipc_do('set_eml', $eml, $set_kw ? { kw => $kw } : ());
 }
 
 sub import_path_url {
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 771443db..ae263914 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -129,38 +129,38 @@ sub _docids_for ($$) {
 	sort { $a <=> $b } values %docids;
 }
 
-sub set_eml_keywords {
-	my ($self, $eml, @kw) = @_;
+sub set_eml_vmd {
+	my ($self, $eml, $vmd) = @_;
 	my $eidx = eidx_init($self);
 	my @docids = _docids_for($self, $eml);
 	for my $docid (@docids) {
-		$eidx->idx_shard($docid)->ipc_do('set_keywords', $docid, @kw);
+		$eidx->idx_shard($docid)->ipc_do('set_vmd', $docid, $vmd);
 	}
 	\@docids;
 }
 
-sub add_eml_keywords {
-	my ($self, $eml, @kw) = @_;
+sub add_eml_vmd {
+	my ($self, $eml, $vmd) = @_;
 	my $eidx = eidx_init($self);
 	my @docids = _docids_for($self, $eml);
 	for my $docid (@docids) {
-		$eidx->idx_shard($docid)->ipc_do('add_keywords', $docid, @kw);
+		$eidx->idx_shard($docid)->ipc_do('add_vmd', $docid, $vmd);
 	}
 	\@docids;
 }
 
-sub remove_eml_keywords {
-	my ($self, $eml, @kw) = @_;
+sub remove_eml_vmd {
+	my ($self, $eml, $vmd) = @_;
 	my $eidx = eidx_init($self);
 	my @docids = _docids_for($self, $eml);
 	for my $docid (@docids) {
-		$eidx->idx_shard($docid)->ipc_do('remove_keywords', $docid, @kw)
+		$eidx->idx_shard($docid)->ipc_do('remove_vmd', $docid, $vmd);
 	}
 	\@docids;
 }
 
 sub add_eml {
-	my ($self, $eml, @kw) = @_;
+	my ($self, $eml, $vmd) = @_;
 	my $im = $self->importer; # may create new epoch
 	my $eidx = eidx_init($self); # writes ALL.git/objects/info/alternates
 	my $oidx = $eidx->{oidx};
@@ -174,7 +174,7 @@ sub add_eml {
 			$oidx->add_xref3($docid, -1, $smsg->{blob}, '.');
 			# add_eidx_info for List-Id
 			$idx->ipc_do('add_eidx_info', $docid, '.', $eml);
-			$idx->ipc_do('add_keywords', $docid, @kw) if @kw;
+			$idx->ipc_do('add_vmd', $docid, $vmd) if $vmd;
 		}
 		\@docids;
 	} else {
@@ -183,14 +183,14 @@ sub add_eml {
 		$oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
 		my $idx = $eidx->idx_shard($smsg->{num});
 		$idx->index_eml($eml, $smsg);
-		$idx->ipc_do('add_keywords', $smsg->{num}, @kw) if @kw;
+		$idx->ipc_do('add_vmd', $smsg->{num}, $vmd ) if $vmd;
 		$smsg;
 	}
 }
 
 sub set_eml {
-	my ($self, $eml, @kw) = @_;
-	add_eml($self, $eml, @kw) // set_eml_keywords($self, $eml, @kw);
+	my ($self, $eml, $vmd) = @_;
+	add_eml($self, $eml, $vmd) // set_eml_vmd($self, $eml, $vmd);
 }
 
 sub add_eml_maybe {
@@ -207,7 +207,7 @@ sub set_xkw {
 	if ($lxs->xids_for($eml, 1)) { # is it in a local external?
 		# TODO: index keywords only
 	} else {
-		set_eml($self, $eml, @$kw);
+		set_eml($self, $eml, { kw => $kw });
 	}
 }
 
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 27e1338f..5cea73e1 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -277,7 +277,7 @@ sub update_kw_maybe ($$$$) {
 	return unless $lse;
 	my $x = $lse->kw_changed($eml, $kw);
 	if ($x) {
-		$lei->{sto}->ipc_do('set_eml', $eml, @$kw);
+		$lei->{sto}->ipc_do('set_eml', $eml, { kw => $kw });
 	} elsif (!defined($x)) {
 		$lei->{sto}->ipc_do('set_xkw', $eml, $kw);
 	}
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 772f5a64..e2a1a678 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -528,44 +528,61 @@ sub remove_eidx_info {
 	$self->{xdb}->replace_document($docid, $doc);
 }
 
-sub set_keywords {
-	my ($self, $docid, @kw) = @_;
+my @VMD_MAP = (kw => 'K', label => 'L');
+
+sub set_vmd {
+	my ($self, $docid, $vmd) = @_;
 	begin_txn_lazy($self);
 	my $doc = _get_doc($self, $docid) or return;
-	my %keep = map { $_ => 1 } @kw;
-	my %add = %keep;
-	my @rm;
-	my $end = $doc->termlist_end;
-	for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) {
-		$cur->skip_to('K');
-		last if $cur == $end;
-		my $kw = $cur->get_termname;
-		$kw =~ s/\AK//s or next;
-		$keep{$kw} ? delete($add{$kw}) : push(@rm, $kw);
+	my ($end, @rm, @add);
+	my @x = @VMD_MAP;
+	while (my ($field, $pfx) = splice(@x, 0, 2)) {
+		my $set = $vmd->{$field} // next;
+		my %keep = map { $_ => 1 } @$set;
+		my %add = %keep;
+		$end //= $doc->termlist_end;
+		for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) {
+			$cur->skip_to($pfx);
+			last if $cur == $end;
+			my $v = $cur->get_termname;
+			$v =~ s/\A$pfx//s or next;
+			$keep{$v} ? delete($add{$v}) : push(@rm, $pfx.$v);
+		}
+		push(@add, map { $pfx.$_ } keys %add);
 	}
-	return unless (scalar(@rm) + scalar(keys %add));
-	$doc->remove_term('K'.$_) for @rm;
-	$doc->add_boolean_term('K'.$_) for (keys %add);
+	return unless scalar(@rm) || scalar(@add);
+	$doc->remove_term($_) for @rm;
+	$doc->add_boolean_term($_) for @add;
 	$self->{xdb}->replace_document($docid, $doc);
 }
 
-sub add_keywords {
-	my ($self, $docid, @kw) = @_;
+sub add_vmd {
+	my ($self, $docid, $vmd) = @_;
 	begin_txn_lazy($self);
 	my $doc = _get_doc($self, $docid) or return;
-	$doc->add_boolean_term('K'.$_) for @kw;
+	my @x = @VMD_MAP;
+	while (my ($field, $pfx) = splice(@x, 0, 2)) {
+		my $add = $vmd->{$field} // next;
+		$doc->add_boolean_term($pfx . $_) for @$add;
+	}
 	$self->{xdb}->replace_document($docid, $doc);
 }
 
-sub remove_keywords {
-	my ($self, $docid, @kw) = @_;
+sub remove_vmd {
+	my ($self, $docid, $vmd) = @_;
 	begin_txn_lazy($self);
 	my $doc = _get_doc($self, $docid) or return;
 	my $replace;
-	eval {
-		$doc->remove_term('K'.$_);
-		$replace = 1
-	} for @kw;
+	my @x = @VMD_MAP;
+	while (my ($field, $pfx) = splice(@x, 0, 2)) {
+		my $rm = $vmd->{$field} // next;
+		for (@$rm) {
+			eval {
+				$doc->remove_term($pfx . $_);
+				$replace = 1;
+			};
+		}
+	}
 	$self->{xdb}->replace_document($docid, $doc) if $replace;
 }
 
diff --git a/t/lei_store.t b/t/lei_store.t
index d270e1f6..024ff527 100644
--- a/t/lei_store.t
+++ b/t/lei_store.t
@@ -36,37 +36,37 @@ $sto->done;
 
 for my $parallel (0, 1) {
 	$sto->{priv_eidx}->{parallel} = $parallel;
-	my $docids = $sto->set_eml_keywords($eml, qw(seen draft));
+	my $docids = $sto->set_eml_vmd($eml, { kw => [ qw(seen draft) ] });
 	is(scalar @$docids, 1, 'set keywords on one doc');
 	$sto->done;
 	my @kw = $sto->search->msg_keywords($docids->[0]);
 	is_deeply(\@kw, [qw(draft seen)], 'kw matches');
 
-	$docids = $sto->add_eml_keywords($eml, qw(seen draft));
+	$docids = $sto->add_eml_vmd($eml, {kw => [qw(seen draft)]});
 	$sto->done;
 	is(scalar @$docids, 1, 'idempotently added keywords to doc');
 	@kw = $sto->search->msg_keywords($docids->[0]);
 	is_deeply(\@kw, [qw(draft seen)], 'kw matches after noop');
 
-	$docids = $sto->remove_eml_keywords($eml, qw(seen draft));
+	$docids = $sto->remove_eml_vmd($eml, {kw => [qw(seen draft)]});
 	is(scalar @$docids, 1, 'removed from one doc');
 	$sto->done;
 	@kw = $sto->search->msg_keywords($docids->[0]);
 	is_deeply(\@kw, [], 'kw matches after remove');
 
-	$docids = $sto->remove_eml_keywords($eml, qw(answered));
+	$docids = $sto->remove_eml_vmd($eml, {kw=> [qw(answered)]});
 	is(scalar @$docids, 1, 'removed from one doc (idempotently)');
 	$sto->done;
 	@kw = $sto->search->msg_keywords($docids->[0]);
 	is_deeply(\@kw, [], 'kw matches after remove (idempotent)');
 
-	$docids = $sto->add_eml_keywords($eml, qw(answered));
+	$docids = $sto->add_eml_vmd($eml, {kw => [qw(answered)]});
 	is(scalar @$docids, 1, 'added to empty doc');
 	$sto->done;
 	@kw = $sto->search->msg_keywords($docids->[0]);
 	is_deeply(\@kw, ['answered'], 'kw matches after add');
 
-	$docids = $sto->set_eml_keywords($eml);
+	$docids = $sto->set_eml_vmd($eml, { kw => [] });
 	is(scalar @$docids, 1, 'set to clobber');
 	$sto->done;
 	@kw = $sto->search->msg_keywords($docids->[0]);
@@ -74,11 +74,11 @@ for my $parallel (0, 1) {
 
 	my $set = eml_load('t/plack-qp.eml');
 	$set->header_set('Message-ID', "<set\@$parallel>");
-	my $ret = $sto->set_eml($set, 'seen');
+	my $ret = $sto->set_eml($set, { kw => [ 'seen' ] });
 	is(ref $ret, 'PublicInbox::Smsg', 'initial returns smsg');
-	my $ids = $sto->set_eml($set, qw(seen));
+	my $ids = $sto->set_eml($set, { kw => [ 'seen' ] });
 	is_deeply($ids, [ $ret->{num} ], 'set_eml idempotent');
-	$ids = $sto->set_eml($set, qw(seen answered));
+	$ids = $sto->set_eml($set, { kw => [ qw(seen answered) ] });
 	is_deeply($ids, [ $ret->{num} ], 'set_eml to change kw');
 	$sto->done;
 	@kw = $sto->search->msg_keywords($ids->[0]);
@@ -91,23 +91,23 @@ SKIP: {
 	$eml->header_set('Message-ID', '<ipc-test@example>');
 	my $pid = $sto->ipc_worker_spawn('lei-store');
 	ok($pid > 0, 'got a worker');
-	my $smsg = $sto->ipc_do('set_eml', $eml, qw(seen));
+	my $smsg = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] });
 	is(ref($smsg), 'PublicInbox::Smsg', 'set_eml works over ipc');
-	my $ids = $sto->ipc_do('set_eml', $eml, qw(seen));
+	my $ids = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] });
 	is_deeply($ids, [ $smsg->{num} ], 'docid returned');
 
 	$eml->header_set('Message-ID');
-	my $no_mid = $sto->ipc_do('set_eml', $eml, qw(seen));
+	my $no_mid = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] });
 	my $wait = $sto->ipc_do('done');
 	my @kw = $sto->search->msg_keywords($no_mid->{num});
 	is_deeply(\@kw, [qw(seen)], 'ipc set changed kw');
 
 	is(ref($smsg), 'PublicInbox::Smsg', 'no mid works ipc');
-	$ids = $sto->ipc_do('set_eml', $eml, qw(seen));
+	$ids = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen) ] });
 	is_deeply($ids, [ $no_mid->{num} ], 'docid returned w/o mid w/ ipc');
 	$sto->ipc_do('done');
 	$sto->ipc_worker_stop;
-	$ids = $sto->ipc_do('set_eml', $eml, qw(seen answered));
+	$ids = $sto->ipc_do('set_eml', $eml, { kw => [ qw(seen answered) ] });
 	is_deeply($ids, [ $no_mid->{num} ], 'docid returned w/o mid w/o ipc');
 	$wait = $sto->ipc_do('done');
 

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-03-17  9:39 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-17  9:39 [PATCH] lei_store: keywords => vmd (volatile metadata), prepare for labels Eric Wong

unofficial mirror of meta@public-inbox.org

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://yhetil.org/meta

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 meta meta/ https://yhetil.org/meta \
		meta@public-inbox.org
	public-inbox-index meta

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.yhetil.org/yhetil.mail.public-inbox.meta
	nntp://news.public-inbox.org/inbox.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general


AGPL code for this site: git clone http://ou63pmih66umazou.onion/public-inbox.git