From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [RFC 5/7] lei_store: local storage for Local Email Interface
Date: Tue, 15 Dec 2020 11:47:20 +0000 [thread overview]
Message-ID: <20201215114722.27400-6-e@80x24.org> (raw)
In-Reply-To: <20201215114722.27400-1-e@80x24.org>
Still unstable, this builds off the equally unstable extindex :P
This will be used for caching/memoization of traditional mail
stores (IMAP, Maildir, etc) while providing indexing via Xapian,
along with compression, and checksumming from git.
Most notably, this adds the ability to add/remove per-message
keywords (draft, seen, flagged, answered) as described in the
JMAP specification (RFC 8621 section 4.1.1).
We'll use `.' (a single period) as an $eidx_key since it's an
invalid {inboxdir} or {newsgroup} name.
---
MANIFEST | 3 +
lib/PublicInbox/ExtSearch.pm | 4 +-
lib/PublicInbox/ExtSearchIdx.pm | 35 +++++-
lib/PublicInbox/Import.pm | 4 +
lib/PublicInbox/LeiDaemon.pm | 2 +-
lib/PublicInbox/LeiSearch.pm | 40 ++++++
lib/PublicInbox/LeiStore.pm | 197 ++++++++++++++++++++++++++++++
lib/PublicInbox/OverIdx.pm | 10 ++
lib/PublicInbox/SearchIdx.pm | 47 ++++++-
lib/PublicInbox/SearchIdxShard.pm | 33 +++++
lib/PublicInbox/V2Writable.pm | 2 +-
t/lei_store.t | 74 +++++++++++
12 files changed, 441 insertions(+), 10 deletions(-)
create mode 100644 lib/PublicInbox/LeiSearch.pm
create mode 100644 lib/PublicInbox/LeiStore.pm
create mode 100644 t/lei_store.t
diff --git a/MANIFEST b/MANIFEST
index 7536b7c2..9eb97d14 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -160,6 +160,8 @@ lib/PublicInbox/InboxWritable.pm
lib/PublicInbox/Isearch.pm
lib/PublicInbox/KQNotify.pm
lib/PublicInbox/LeiDaemon.pm
+lib/PublicInbox/LeiSearch.pm
+lib/PublicInbox/LeiStore.pm
lib/PublicInbox/Linkify.pm
lib/PublicInbox/Listener.pm
lib/PublicInbox/Lock.pm
@@ -319,6 +321,7 @@ t/init.t
t/iso-2202-jp.eml
t/kqnotify.t
t/lei.t
+t/lei_store.t
t/linkify.t
t/main-bin/spamc
t/mda-mime.eml
diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm
index 2a560935..410ae958 100644
--- a/lib/PublicInbox/ExtSearch.pm
+++ b/lib/PublicInbox/ExtSearch.pm
@@ -17,13 +17,13 @@ use DBI qw(:sql_types); # SQL_BLOB
use parent qw(PublicInbox::Search);
sub new {
- my (undef, $topdir) = @_;
+ my ($class, $topdir) = @_;
$topdir = File::Spec->canonpath($topdir);
bless {
topdir => $topdir,
# xpfx => 'ei15'
xpfx => "$topdir/ei".PublicInbox::Search::SCHEMA_VERSION
- }, __PACKAGE__;
+ }, $class;
}
sub misc {
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index b5024823..cdd1621d 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -812,18 +812,31 @@ sub idx_init { # similar to V2Writable
return if $self->{idx_shards};
$self->git->cleanup;
-
+ my $mode = 0644;
my $ALL = $self->git->{git_dir}; # ALL.git
- PublicInbox::Import::init_bare($ALL) unless -d $ALL;
+ my $old = -d $ALL;
+ if ($opt->{-private}) { # LeiStore
+ $mode = 0600;
+ if (!$old) {
+ umask 077; # don't bother restoring
+ PublicInbox::Import::init_bare($ALL);
+ $self->git->qx(qw(config core.sharedRepository 0600));
+ }
+ } else {
+ PublicInbox::Import::init_bare($ALL) unless $old;
+ }
my $info_dir = "$ALL/objects/info";
my $alt = "$info_dir/alternates";
- my $mode = 0644;
my (@old, @new, %seen); # seen: st_dev + st_ino
if (-e $alt) {
open(my $fh, '<', $alt) or die "open $alt: $!";
$mode = (stat($fh))[2] & 07777;
while (my $line = <$fh>) {
chomp(my $d = $line);
+
+ # expand relative path (/local/ stuff)
+ substr($d, 0, 3) eq '../' and
+ $d = "$ALL/objects/$d";
if (my @st = stat($d)) {
next if $seen{"$st[0]\0$st[1]"}++;
} else {
@@ -833,6 +846,22 @@ sub idx_init { # similar to V2Writable
push @old, $line;
}
}
+
+ # for LeiStore, and possibly some mirror-only state
+ if (opendir(my $dh, my $local = "$self->{topdir}/local")) {
+ # highest numbered epoch first
+ for my $n (sort { $b <=> $a } map { substr($_, 0, -4) + 0 }
+ grep(/\A[0-9]+\.git\z/, readdir($dh))) {
+ my $d = "$local/$n.git/objects"; # absolute path
+ if (my @st = stat($d)) {
+ next if $seen{"$st[0]\0$st[1]"}++;
+ # favor relative paths for rename-friendliness
+ push @new, "../../local/$n.git/objects\n";
+ } else {
+ warn "W: stat($d) failed: $!\n";
+ }
+ }
+ }
for my $ibx (@{$self->{ibx_list}}) {
my $line = $ibx->git->{git_dir} . "/objects\n";
chomp(my $d = $line);
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 1a226cc7..07c7baf8 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -403,6 +403,10 @@ sub add {
if ($smsg) {
$smsg->{blob} = $self->get_mark(":$blob");
$smsg->{raw_bytes} = $n;
+ if (my $oidx = delete $smsg->{-oidx}) { # used by LeiStore
+ return if $oidx->blob_exists($smsg->{blob});
+ }
+ # XXX do we need this? it's in git at this point
$smsg->{-raw_email} = \$raw_email;
}
my $ref = $self->{ref};
diff --git a/lib/PublicInbox/LeiDaemon.pm b/lib/PublicInbox/LeiDaemon.pm
index 89434cb8..20ff0758 100644
--- a/lib/PublicInbox/LeiDaemon.pm
+++ b/lib/PublicInbox/LeiDaemon.pm
@@ -42,7 +42,7 @@ our %CMD = ( # sorted in order of importance/use:
'add-extinbox' => [ 'URL-OR-PATHNAME',
'add/set priority of a publicinbox|extindex for extra matches',
qw(prio=i) ],
-'ls-extinbox' => [ '[FILTER]', 'list publicinbox|extindex sources',
+'ls-extinbox' => [ '[FILTER]', 'list publicinbox|extindex locations',
qw(format|f=s z local remote) ],
'forget-extinbox' => [ '{URL-OR-PATHNAME|--prune}',
'exclude further results from a publicinbox|extindex',
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
new file mode 100644
index 00000000..c59e2e55
--- /dev/null
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -0,0 +1,40 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+package PublicInbox::LeiSearch;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::ExtSearch);
+use PublicInbox::Search;
+
+sub combined_docid ($$) {
+ my ($self, $num) = @_;
+ my $nshard = ($self->{nshard} // 1);
+ ($num - 1) * $nshard + 1;
+}
+
+sub msg_keywords {
+ my ($self, $num) = @_; # num_or_mitem
+ my $xdb = $self->xdb; # set {nshard};
+ my $docid = ref($num) ? $num->get_docid : do {
+ # get combined docid from over.num:
+ # (not generic Xapian, only works with our sharding scheme)
+ my $nshard = $self->{nshard} // 1;
+ ($num - 1) * $nshard + $num % $nshard + 1;
+ };
+ my %kw;
+ eval {
+ my $end = $xdb->termlist_end($docid);
+ for (my $cur = $xdb->termlist_begin($docid);
+ $cur != $end; $cur++) {
+ $cur->skip_to('K');
+ last if $cur == $end;
+ my $kw = $cur->get_termname;
+ $kw =~ s/\AK//s and $kw{$kw} = undef;
+ }
+ };
+ warn "E: #$docid ($num): $@\n" if $@;
+ wantarray ? sort(keys(%kw)) : \%kw;
+}
+
+1;
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
new file mode 100644
index 00000000..56f668b8
--- /dev/null
+++ b/lib/PublicInbox/LeiStore.pm
@@ -0,0 +1,197 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Local storage (cache/memo) for lei(1), suitable for personal/private
+# mail iff on encrypted device/FS. Based on v2, but only deduplicates
+# based on git OID.
+#
+# for xref3, the following are constant: $eidx_key = '.', $xnum = -1
+package PublicInbox::LeiStore;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::Lock);
+use PublicInbox::SearchIdx qw(crlf_adjust);
+use PublicInbox::ExtSearchIdx;
+use PublicInbox::Import;
+use PublicInbox::InboxWritable;
+use PublicInbox::V2Writable;
+use PublicInbox::ContentHash qw(content_hash);
+use PublicInbox::MID qw(mids);
+use PublicInbox::LeiSearch;
+
+sub new {
+ my (undef, $dir, $opt) = @_;
+ my $eidx = PublicInbox::ExtSearchIdx->new($dir, $opt);
+ bless { priv_eidx => $eidx }, __PACKAGE__;
+}
+
+sub git { $_[0]->{priv_eidx}->git } # read-only
+
+sub packing_factor { $PublicInbox::V2Writable::PACKING_FACTOR }
+
+sub rotate_bytes {
+ $_[0]->{rotate_bytes} // ((1024 * 1024 * 1024) / $_[0]->packing_factor)
+}
+
+sub git_pfx { "$_[0]->{priv_eidx}->{topdir}/local" };
+
+sub git_epoch_max {
+ my ($self) = @_;
+ my $pfx = $self->git_pfx;
+ my $max = 0;
+ return $max unless -d $pfx ;
+ opendir my $dh, $pfx or die "opendir $pfx: $!\n";
+ while (defined(my $git_dir = readdir($dh))) {
+ $git_dir =~ m!\A([0-9]+)\.git\z! or next;
+ $max = $1 + 0 if $1 > $max;
+ }
+ $max;
+}
+
+sub importer {
+ my ($self) = @_;
+ my $max;
+ my $im = $self->{im};
+ if ($im) {
+ return $im if $im->{bytes_added} < $self->rotate_bytes;
+
+ delete $self->{im};
+ $im->done;
+ undef $im;
+ $self->checkpoint;
+ $max = $self->git_epoch_max + 1;
+ }
+ my $pfx = $self->git_pfx;
+ $max //= $self->git_epoch_max;
+ while (1) {
+ my $latest = "$pfx/$max.git";
+ my $old = -e $latest;
+ my $git = PublicInbox::Git->new($latest);
+ PublicInbox::Import::init_bare({ git => $git });
+ $git->qx(qw(config core.sharedRepository 0600)) if !$old;
+ my $packed_bytes = $git->packed_bytes;
+ my $unpacked_bytes = $packed_bytes / $self->packing_factor;
+ if ($unpacked_bytes >= $self->rotate_bytes) {
+ $max++;
+ next;
+ }
+ chomp(my $i = $git->qx(qw(var GIT_COMMITTER_IDENT)));
+ die "$git->{git_dir} GIT_COMMITTER_IDENT failed\n" if $?;
+ my ($n, $e) = ($i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/g)
+ or die "could not extract name/email from `$i'\n";
+ $self->{im} = $im = PublicInbox::Import->new($git, $n, $e);
+ $im->{bytes_added} = int($packed_bytes / $self->packing_factor);
+ $im->{lock_path} = undef;
+ $im->{path_type} = 'v2';
+ return $im;
+ }
+}
+
+sub search {
+ PublicInbox::LeiSearch->new($_[0]->{priv_eidx}->{topdir});
+}
+
+sub eidx_init {
+ my ($self) = @_;
+ my $eidx = $self->{priv_eidx};
+ $eidx->idx_init({-private => 1});
+ $eidx;
+}
+
+sub _docids_for ($$) {
+ my ($self, $eml) = @_;
+ my %docids;
+ my $chash = content_hash($eml);
+ my $eidx = eidx_init($self);
+ my $oidx = $eidx->{oidx};
+ my $im = $self->{im};
+ for my $mid (@{mids($eml)}) {
+ my ($id, $prev);
+ while (my $cur = $oidx->next_by_mid($mid, \$id, \$prev)) {
+ my $oid = $cur->{blob};
+ my $docid = $cur->{num};
+ my $bref = $im ? $im->cat_blob($oid) : undef;
+ $bref //= $eidx->git->cat_file($oid) // do {
+ warn "W: $oid (#$docid) <$mid> not found\n";
+ next;
+ };
+ local $self->{current_info} = $oid;
+ my $x = PublicInbox::Eml->new($bref);
+ $docids{$docid} = $docid if content_hash($x) eq $chash;
+ }
+ }
+ sort { $a <=> $b } values %docids;
+}
+
+sub set_eml_keywords {
+ my ($self, $eml, @kw) = @_;
+ my $eidx = eidx_init($self);
+ my @docids = _docids_for($self, $eml);
+ for my $docid (@docids) {
+ $eidx->idx_shard($docid)->shard_set_keywords($docid, @kw);
+ }
+ \@docids;
+}
+
+sub add_eml_keywords {
+ my ($self, $eml, @kw) = @_;
+ my $eidx = eidx_init($self);
+ my @docids = _docids_for($self, $eml);
+ for my $docid (@docids) {
+ $eidx->idx_shard($docid)->shard_add_keywords($docid, @kw);
+ }
+ \@docids;
+}
+
+sub remove_eml_keywords {
+ my ($self, $eml, @kw) = @_;
+ my $eidx = eidx_init($self);
+ my @docids = _docids_for($self, $eml);
+ for my $docid (@docids) {
+ $eidx->idx_shard($docid)->shard_remove_keywords($docid, @kw);
+ }
+ \@docids;
+}
+
+sub add_eml {
+ my ($self, $eml) = @_;
+ my $eidx = eidx_init($self);
+ my $oidx = $eidx->{oidx};
+ my $smsg = bless { -oidx => $oidx }, 'PublicInbox::Smsg';
+ my $im = $self->importer;
+ $im->add($eml, undef, $smsg) or return; # duplicate returns undef
+ my $msgref = delete $smsg->{-raw_email};
+ $smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref);
+
+ local $self->{current_info} = $smsg->{blob};
+ if (my @docids = _docids_for($self, $eml)) {
+ for my $docid (@docids) {
+ my $idx = $eidx->idx_shard($docid);
+ $oidx->add_xref3($docid, -1, $smsg->{blob}, '.');
+ $idx->shard_add_eidx_info($docid, '.', $eml); # List-Id
+ }
+ } else {
+ $smsg->{num} = $oidx->adj_counter('eidx_docid', '+');
+ $oidx->add_overview($eml, $smsg);
+ $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
+ my $idx = $eidx->idx_shard($smsg->{num});
+ $idx->index_raw($msgref, $eml, $smsg);
+ }
+ $smsg->{blob}
+}
+
+sub done {
+ my ($self) = @_;
+ my $err = '';
+ if (my $im = delete($self->{im})) {
+ eval { $im->done };
+ if ($@) {
+ $err .= "import done: $@\n";
+ warn $err;
+ }
+ }
+ $self->{priv_eidx}->done;
+ die $err if $err;
+}
+
+1;
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index 4a39bf53..c8630ddb 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -684,4 +684,14 @@ DELETE FROM eidxq WHERE docid = ?
}
+sub blob_exists {
+ my ($self, $oidhex) = @_;
+ my $sth = $self->dbh->prepare_cached(<<'', undef, 1);
+SELECT COUNT(*) FROM xref3 WHERE oidbin = ?
+
+ $sth->bind_param(1, pack('H*', $oidhex), SQL_BLOB);
+ $sth->execute;
+ $sth->fetchrow_array;
+}
+
1;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index c6d2a0e8..ad71bc13 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -1,6 +1,6 @@
# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-# based on notmuch, but with no concept of folders, files or flags
+# based on notmuch, but with no concept of folders, files
#
# Indexes mail with Xapian and our (SQLite-based) ::Msgmap for use
# with the web and NNTP interfaces. This index maintains thread
@@ -371,7 +371,7 @@ sub eml2doc ($$$;$) {
index_headers($self, $smsg);
if (defined(my $eidx_key = $smsg->{eidx_key})) {
- $doc->add_boolean_term('O'.$eidx_key);
+ $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.';
}
msg_iter($eml, \&index_xapian, [ $self, $doc ]);
index_ids($self, $doc, $eml, $mids);
@@ -467,7 +467,7 @@ sub add_eidx_info {
begin_txn_lazy($self);
my $doc = _get_doc($self, $docid) or return;
term_generator($self)->set_document($doc);
- $doc->add_boolean_term('O'.$eidx_key);
+ $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.';
index_list_id($self, $doc, $eml);
$self->{xdb}->replace_document($docid, $doc);
}
@@ -501,6 +501,47 @@ sub remove_eidx_info {
$self->{xdb}->replace_document($docid, $doc);
}
+sub set_keywords {
+ my ($self, $docid, @kw) = @_;
+ begin_txn_lazy($self);
+ my $doc = _get_doc($self, $docid) or return;
+ my %keep = map { $_ => 1 } @kw;
+ my %add = %keep;
+ my @rm;
+ my $end = $doc->termlist_end;
+ for (my $cur = $doc->termlist_begin; $cur != $end; $cur++) {
+ $cur->skip_to('K');
+ last if $cur == $end;
+ my $kw = $cur->get_termname;
+ $kw =~ s/\AK//s or next;
+ $keep{$kw} ? delete($add{$kw}) : push(@rm, $kw);
+ }
+ return unless (scalar(@rm) + scalar(keys %add));
+ $doc->remove_term('K'.$_) for @rm;
+ $doc->add_boolean_term('K'.$_) for (keys %add);
+ $self->{xdb}->replace_document($docid, $doc);
+}
+
+sub add_keywords {
+ my ($self, $docid, @kw) = @_;
+ begin_txn_lazy($self);
+ my $doc = _get_doc($self, $docid) or return;
+ $doc->add_boolean_term('K'.$_) for @kw;
+ $self->{xdb}->replace_document($docid, $doc);
+}
+
+sub remove_keywords {
+ my ($self, $docid, @kw) = @_;
+ begin_txn_lazy($self);
+ my $doc = _get_doc($self, $docid) or return;
+ my $replace;
+ eval {
+ $doc->remove_term('K'.$_);
+ $replace = 1
+ } for @kw;
+ $self->{xdb}->replace_document($docid, $doc) if $replace;
+}
+
sub get_val ($$) {
my ($doc, $col) = @_;
sortable_unserialise($doc->get_value($col));
diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm
index 2e654769..87b0bad6 100644
--- a/lib/PublicInbox/SearchIdxShard.pm
+++ b/lib/PublicInbox/SearchIdxShard.pm
@@ -89,6 +89,12 @@ sub shard_worker_loop ($$$$$) {
my ($len, $docid, $eidx_key) = split(/ /, $line, 3);
$self->remove_eidx_info($docid, $eidx_key,
eml($r, $len));
+ } elsif ($line =~ s/\A=K (\d+) //) {
+ $self->set_keywords($1 + 0, split(/ /, $line));
+ } elsif ($line =~ s/\A-K (\d+) //) {
+ $self->remove_keywords($1 + 0, split(/ /, $line));
+ } elsif ($line =~ s/\A\+K (\d+) //) {
+ $self->add_keywords($1 + 0, split(/ /, $line));
} elsif ($line =~ s/\AO ([^\n]+)//) {
my $over_fn = $1;
$over_fn =~ tr/\0/\n/;
@@ -210,6 +216,33 @@ sub shard_remove {
}
}
+sub shard_set_keywords {
+ my ($self, $docid, @kw) = @_;
+ if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child
+ print $w "=K $docid @kw\n" or die "failed to write: $!";
+ } else { # same process
+ $self->set_keywords($docid, @kw);
+ }
+}
+
+sub shard_remove_keywords {
+ my ($self, $docid, @kw) = @_;
+ if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child
+ print $w "-K $docid @kw\n" or die "failed to write: $!";
+ } else { # same process
+ $self->remove_keywords($docid, @kw);
+ }
+}
+
+sub shard_add_keywords {
+ my ($self, $docid, @kw) = @_;
+ if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child
+ print $w "+K $docid @kw\n" or die "failed to write: $!";
+ } else { # same process
+ $self->add_keywords($docid, @kw);
+ }
+}
+
sub shard_over_check {
my ($self, $over) = @_;
if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 992305c5..b98b4695 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -24,7 +24,7 @@ use File::Temp ();
my $OID = qr/[a-f0-9]{40,}/;
# an estimate of the post-packed size to the raw uncompressed size
-my $PACKING_FACTOR = 0.4;
+our $PACKING_FACTOR = 0.4;
# SATA storage lags behind what CPUs are capable of, so relying on
# nproc(1) can be misleading and having extra Xapian shards is a
diff --git a/t/lei_store.t b/t/lei_store.t
new file mode 100644
index 00000000..c18a9620
--- /dev/null
+++ b/t/lei_store.t
@@ -0,0 +1,74 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use Test::More;
+use PublicInbox::TestCommon;
+require_mods(qw(DBD::SQLite Search::Xapian));
+require_git 2.6;
+require_ok 'PublicInbox::LeiStore';
+require_ok 'PublicInbox::ExtSearch';
+my ($home, $for_destroy) = tmpdir();
+my $opt = { 1 => \(my $out = ''), 2 => \(my $err = '') };
+my $store_dir = "$home/lst";
+my $lst = PublicInbox::LeiStore->new($store_dir, { creat => 1 });
+ok($lst, '->new');
+my $oid = $lst->add_eml(eml_load('t/data/0001.patch'));
+like($oid, qr/\A[0-9a-f]+\z/, 'add returned OID');
+my $eml = eml_load('t/data/0001.patch');
+is($lst->add_eml($eml), undef, 'idempotent');
+$lst->done;
+{
+ my $es = $lst->search;
+ my $msgs = $es->over->query_xover(0, 1000);
+ is(scalar(@$msgs), 1, 'one message');
+ is($msgs->[0]->{blob}, $oid, 'blob matches');
+ my $mset = $es->mset("mid:$msgs->[0]->{mid}");
+ is($mset->size, 1, 'search works');
+ is_deeply($es->mset_to_artnums($mset), [ $msgs->[0]->{num} ],
+ 'mset_to_artnums');
+ my @kw = $es->msg_keywords(($mset->items)[0]);
+ is_deeply(\@kw, [], 'no flags');
+}
+
+for my $parallel (0, 1) {
+ $lst->{priv_eidx}->{parallel} = $parallel;
+ my $docids = $lst->set_eml_keywords($eml, qw(seen draft));
+ is(scalar @$docids, 1, 'set keywords on one doc');
+ $lst->done;
+ my @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, [qw(draft seen)], 'kw matches');
+
+ $docids = $lst->add_eml_keywords($eml, qw(seen draft));
+ $lst->done;
+ is(scalar @$docids, 1, 'idempotently added keywords to doc');
+ @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, [qw(draft seen)], 'kw matches after noop');
+
+ $docids = $lst->remove_eml_keywords($eml, qw(seen draft));
+ is(scalar @$docids, 1, 'removed from one doc');
+ $lst->done;
+ @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, [], 'kw matches after remove');
+
+ $docids = $lst->remove_eml_keywords($eml, qw(answered));
+ is(scalar @$docids, 1, 'removed from one doc (idempotently)');
+ $lst->done;
+ @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, [], 'kw matches after remove (idempotent)');
+
+ $docids = $lst->add_eml_keywords($eml, qw(answered));
+ is(scalar @$docids, 1, 'added to empty doc');
+ $lst->done;
+ @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, ['answered'], 'kw matches after add');
+
+ $docids = $lst->set_eml_keywords($eml);
+ is(scalar @$docids, 1, 'set to clobber');
+ $lst->done;
+ @kw = $lst->search->msg_keywords($docids->[0]);
+ is_deeply(\@kw, [], 'set clobbers all');
+}
+
+done_testing;
next prev parent reply other threads:[~2020-12-15 11:47 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-12-15 11:47 [PATCH/RFC 0/7] lei - Local Email Interface skeleton Eric Wong
2020-12-15 11:47 ` [PATCH 1/7] daemon: support --daemonize without Net::Server::Daemonize Eric Wong
2020-12-15 11:47 ` [PATCH 2/7] daemon: simplify fork() failure checks Eric Wong
2020-12-15 11:47 ` [RFC 3/7] lei: FD-passing and IPC basics Eric Wong
2020-12-15 11:47 ` [RFC 4/7] lei: proposed command-listing and options Eric Wong
2020-12-26 11:26 ` "extinbox" term - was: [RFC 4/7] lei: proposed command-listing Eric Wong
2020-12-28 15:29 ` Kyle Meyer
2020-12-28 21:55 ` Eric Wong
2020-12-29 3:01 ` Kyle Meyer
2020-12-15 11:47 ` Eric Wong [this message]
2020-12-15 11:47 ` [RFC 6/7] tests: more common JSON module loading Eric Wong
2020-12-15 11:47 ` [RFC 7/7] lei: use spawn (vfork + execve) for lazy start Eric Wong
2020-12-15 12:05 ` more considerations in UI/UX Eric Wong
2020-12-23 5:42 ` Kyle Meyer
2020-12-23 9:47 ` Eric Wong
2020-12-23 15:49 ` Kyle Meyer
2020-12-26 11:13 ` [RFC] lei: rename proposed "query" command to "q", add JSON output Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201215114722.27400-6-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).