* [PATCH 1/2] lei: saved searches support --dedupe=<mid|oid>
2021-04-23 1:45 [PATCH 0/2] "lei up" surprise reduction fixes Eric Wong
@ 2021-04-23 1:45 ` Eric Wong
2021-04-23 1:45 ` [PATCH 2/2] lei up: support symlinked pathnames Eric Wong
1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2021-04-23 1:45 UTC (permalink / raw)
To: meta
This is less surprising in case users are used to using --dedupe=
without --save.
---
lib/PublicInbox/LeiSavedSearch.pm | 27 ++++++++++++++++++++--
lib/PublicInbox/LeiUp.pm | 4 ++++
t/lei-q-save.t | 37 +++++++++++++++++++++++++++++++
3 files changed, 66 insertions(+), 2 deletions(-)
diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm
index cd9effce..ed217cf2 100644
--- a/lib/PublicInbox/LeiSavedSearch.pm
+++ b/lib/PublicInbox/LeiSavedSearch.pm
@@ -11,6 +11,7 @@ use PublicInbox::LeiSearch;
use PublicInbox::Config;
use PublicInbox::Spawn qw(run_die);
use PublicInbox::ContentHash qw(git_sha);
+use PublicInbox::MID qw(mids_for_index);
use Digest::SHA qw(sha256_hex);
# move this to PublicInbox::Config if other things use it:
@@ -65,6 +66,14 @@ sub list {
} @$out
}
+sub translate_dedupe ($$$) {
+ my ($self, $lei, $dd) = @_;
+ $dd //= 'content';
+ return 1 if $dd eq 'content'; # the default
+ return $self->{"-dedupe_$dd"} = 1 if ($dd eq 'oid' || $dd eq 'mid');
+ $lei->fail("--dedupe=$dd unsupported with --save");
+}
+
sub up { # updating existing saved search via "lei up"
my ($cls, $lei, $dst) = @_;
my $f;
@@ -89,6 +98,8 @@ sub new { # new saved search "lei q --save"
File::Path::make_path($dir); # raises on error
$self->{-cfg} = {};
my $f = $self->{'-f'} = "$dir/lei.saved-search";
+ my $dd = $lei->{opt}->{dedupe};
+ translate_dedupe($self, $lei, $dd) or return;
open my $fh, '>', $f or return $lei->fail("open $f: $!");
my $sq_dst = PublicInbox::Config::squote_maybe($dst);
my $q = $lei->{mset_opt}->{q_raw} // die 'BUG: {q_raw} missing';
@@ -105,6 +116,7 @@ sub new { # new saved search "lei q --save"
[lei "q"]
output = $dst
EOM
+ print $fh "\tdedupe = $dd\n" if $dd;
for my $k (ARRAY_FIELDS) {
my $ary = $lei->{opt}->{$k} // next;
for my $x (@$ary) {
@@ -134,14 +146,25 @@ sub is_dup {
my ($self, $eml, $smsg) = @_;
my $oidx = $self->{oidx} // die 'BUG: no {oidx}';
my $blob = $smsg ? $smsg->{blob} : undef;
- return 1 if $blob && $oidx->blob_exists($blob);
my $lk = $self->lock_for_scope_fast;
+ return 1 if $blob && $oidx->blob_exists($blob);
+ if ($self->{-dedupe_mid}) {
+ for my $mid (@{mids_for_index($eml)}) {
+ my ($id, $prv);
+ return 1 if $oidx->next_by_mid($mid, \$id, \$prv);
+ }
+ }
if (my $xoids = PublicInbox::LeiSearch::xoids_for($self, $eml, 1)) {
for my $docid (values %$xoids) {
$oidx->add_xref3($docid, -1, $blob, '.');
}
$oidx->commit_lazy;
- 1;
+ if ($self->{-dedupe_oid}) {
+ $smsg->{blob} //= git_sha(1, $eml)->hexdigest;
+ exists $xoids->{$smsg->{blob}} ? 1 : undef;
+ } else {
+ 1;
+ }
} else {
# n.b. above xoids_for fills out eml->{-lei_fake_mid} if needed
unless ($smsg) {
diff --git a/lib/PublicInbox/LeiUp.pm b/lib/PublicInbox/LeiUp.pm
index 0fb9698b..f4ff070b 100644
--- a/lib/PublicInbox/LeiUp.pm
+++ b/lib/PublicInbox/LeiUp.pm
@@ -25,6 +25,10 @@ sub up1 ($$) {
my $o = $lei->{opt}->{output} = $lss->{-cfg}->{'lei.q.output'} //
return $lei->fail("lei.q.output unset in $f");
ref($o) and return $lei->fail("multiple values of lei.q.output in $f");
+ if (defined(my $dd = $lss->{-cfg}->{'lei.q.dedupe'})) {
+ $lss->translate_dedupe($lei, $dd) or return;
+ $lei->{opt}->{dedupe} = $dd;
+ }
for my $k (qw(only include exclude)) {
my $v = $lss->{-cfg}->get_all("lei.q.$k") // next;
$lei->{opt}->{$k} = $v;
diff --git a/t/lei-q-save.t b/t/lei-q-save.t
index 5a2f7fff..26ea5cb8 100644
--- a/t/lei-q-save.t
+++ b/t/lei-q-save.t
@@ -121,5 +121,42 @@ test_lei(sub {
unlike($lei_out, qr/mbrd-aug/,
'forget-search completion cleared after forget');
ok(!lei('up', "$home/mbrd-aug"), 'lei up fails after forget');
+
+ # dedupe=mid
+ my $o = "$home/dd-mid";
+ $in = $doc2->as_string . "\n-------\nappended list sig\n";
+ lei_ok [qw(import -q -F eml -)], undef, { 0 => \$in, %$lei_opt };
+ lei_ok(qw(q --dedupe=mid --save m:testmessage@example.com -o), $o);
+ my @m = glob("$o/cur/*");
+ is(scalar(@m), 1, '--dedupe=mid w/ --save');
+ $in = $doc2->as_string . "\n-------\nanother list sig\n";
+ lei_ok [qw(import -q -F eml -)], undef, { 0 => \$in, %$lei_opt };
+ lei_ok 'up', $o;
+ is_deeply([glob("$o/cur/*")], \@m, 'lei up dedupe=mid works');
+
+ for my $dd (qw(content)) {
+ $o = "$home/dd-$dd";
+ lei_ok(qw(q --save m:testmessage@example.com -o), $o,
+ "--dedupe=$dd");
+ @m = glob("$o/cur/*");
+ is(scalar(@m), 3, 'all 3 matches with dedupe='.$dd);
+ }
+
+ # dedupe=oid
+ $o = "$home/dd-oid";
+ my $ibx = create_inbox 'ibx', indexlevel => 'medium',
+ tmpdir => "$home/v1", sub {};
+ lei_ok(qw(q --save --dedupe=oid m:qp@example.com -o), $o,
+ '-I', $ibx->{inboxdir});
+ @m = glob("$o/cur/*");
+ is(scalar(@m), 1, 'got first result');
+
+ my $im = $ibx->importer(0);
+ my $diff = "X-Insignificant-Header: x\n".$doc1->as_string;
+ $im->add(PublicInbox::Eml->new($diff));
+ $im->done;
+ lei_ok('up', $o);
+ @m = glob("$o/cur/*");
+ is(scalar(@m), 2, 'got 2nd result due to different OID');
});
done_testing;
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 2/2] lei up: support symlinked pathnames
2021-04-23 1:45 [PATCH 0/2] "lei up" surprise reduction fixes Eric Wong
2021-04-23 1:45 ` [PATCH 1/2] lei: saved searches support --dedupe=<mid|oid> Eric Wong
@ 2021-04-23 1:45 ` Eric Wong
1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2021-04-23 1:45 UTC (permalink / raw)
To: meta
On my default FreeBSD 11.x system, "/home" is a symlink to
"/usr/home", which causes "lei up" path resolution to fail when
I use outputs in $HOME. Fall back to a slow path of globbing
and matching pathnames based on st_ino+st_dev.
---
lib/PublicInbox/LeiSavedSearch.pm | 40 ++++++++++++++++++++++++-------
t/lei-q-save.t | 6 +++++
2 files changed, 38 insertions(+), 8 deletions(-)
diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm
index ed217cf2..af864a50 100644
--- a/lib/PublicInbox/LeiSavedSearch.pm
+++ b/lib/PublicInbox/LeiSavedSearch.pm
@@ -13,6 +13,7 @@ use PublicInbox::Spawn qw(run_die);
use PublicInbox::ContentHash qw(git_sha);
use PublicInbox::MID qw(mids_for_index);
use Digest::SHA qw(sha256_hex);
+my $LOCAL_PFX = qr!\A(?:maildir|mh|mbox.+|mmdf):!i; # TODO: put in LeiToMail?
# move this to PublicInbox::Config if other things use it:
my %cquote = ("\n" => '\\n', "\t" => '\\t', "\b" => '\\b');
@@ -27,27 +28,50 @@ sub BOOL_FIELDS () {
qw(external local remote import-remote import-before threads)
}
-sub lss_dir_for ($$) {
- my ($lei, $dstref) = @_;
+sub lss_dir_for ($$;$) {
+ my ($lei, $dstref, $on_fs) = @_;
my @n;
if ($$dstref =~ m,\Aimaps?://,i) { # already canonicalized
require PublicInbox::URIimap;
my $uri = PublicInbox::URIimap->new($$dstref)->canonical;
$$dstref = $$uri;
@n = ($uri->mailbox);
- } else { # basename
+ } else {
+ # can't use Cwd::abs_path since dirname($$dstref) may not exist
$$dstref = $lei->rel2abs($$dstref);
+ # Maildirs have trailing '/' internally
$$dstref .= '/' if -d $$dstref;
$$dstref =~ tr!/!/!s;
- @n = ($$dstref =~ m{([^/]+)/*\z});
+ @n = ($$dstref =~ m{([^/]+)/*\z}); # basename
}
push @n, sha256_hex($$dstref);
- $lei->share_path . '/saved-searches/' . join('-', @n);
+ my $lss_dir = $lei->share_path . '/saved-searches/';
+ my $d = $lss_dir . join('-', @n);
+
+ # fall-back to looking up by st_ino + st_dev in case we're in
+ # a symlinked or bind-mounted path
+ if ($on_fs && !-d $d && -e $$dstref) {
+ my @cur = stat(_);
+ my $want = pack('dd', @cur[1,0]); # st_ino + st_dev
+ my ($c, $o, @st);
+ for my $g ("$n[0]-*", '*') {
+ my @maybe = glob("$lss_dir$g/lei.saved-search");
+ for my $f (@maybe) {
+ $c = PublicInbox::Config->git_config_dump($f);
+ $o = $c->{'lei.q.output'} // next;
+ $o =~ s!$LOCAL_PFX!! or next;
+ @st = stat($o) or next;
+ next if pack('dd', @st[1,0]) ne $want;
+ $f =~ m!\A(.+?)/[^/]+\z! and return $1;
+ }
+ }
+ }
+ $d;
}
sub list {
my ($lei, $pfx) = @_;
- my $lss_dir = $lei->share_path.'/saved-searches/';
+ my $lss_dir = $lei->share_path.'/saved-searches';
return () unless -d $lss_dir;
# TODO: persist the cache? Use another format?
my $f = $lei->cache_dir."/saved-tmp.$$.".time.'.config';
@@ -61,7 +85,7 @@ sub list {
unlink($f);
my $out = $cfg->get_all('lei.q.output') or return ();
map {;
- s!\A(?:maildir|mh|mbox.+|mmdf):!!i;
+ s!$LOCAL_PFX!!;
$_;
} @$out
}
@@ -221,7 +245,7 @@ sub cloneurl { [] }
sub output2lssdir {
my ($self, $lei, $dir_ref, $fn_ref) = @_;
my $dst = $$dir_ref; # imap://$MAILBOX, /path/to/maildir, /path/to/mbox
- my $dir = lss_dir_for($lei, \$dst);
+ my $dir = lss_dir_for($lei, \$dst, 1);
my $f = "$dir/lei.saved-search";
if (-f $f && -r _) {
$self->{-cfg} = PublicInbox::Config->git_config_dump($f);
diff --git a/t/lei-q-save.t b/t/lei-q-save.t
index 26ea5cb8..170f7ce5 100644
--- a/t/lei-q-save.t
+++ b/t/lei-q-save.t
@@ -158,5 +158,11 @@ test_lei(sub {
lei_ok('up', $o);
@m = glob("$o/cur/*");
is(scalar(@m), 2, 'got 2nd result due to different OID');
+
+ SKIP: {
+ symlink($o, "$home/ln -s") or
+ skip "symlinks not supported in $home?: $!", 1;
+ lei_ok('up', "$home/ln -s");
+ };
});
done_testing;
^ permalink raw reply related [flat|nested] 3+ messages in thread