unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/2] "lei up" surprise reduction fixes
@ 2021-04-23  1:45 Eric Wong
  2021-04-23  1:45 ` [PATCH 1/2] lei: saved searches support --dedupe=<mid|oid> Eric Wong
  2021-04-23  1:45 ` [PATCH 2/2] lei up: support symlinked pathnames Eric Wong
  0 siblings, 2 replies; 3+ messages in thread
From: Eric Wong @ 2021-04-23  1:45 UTC (permalink / raw)
  To: meta

Eric Wong (2):
  lei: saved searches support --dedupe=<mid|oid>
  lei up: support symlinked pathnames

 lib/PublicInbox/LeiSavedSearch.pm | 67 ++++++++++++++++++++++++++-----
 lib/PublicInbox/LeiUp.pm          |  4 ++
 t/lei-q-save.t                    | 43 ++++++++++++++++++++
 3 files changed, 104 insertions(+), 10 deletions(-)

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] lei: saved searches support --dedupe=<mid|oid>
  2021-04-23  1:45 [PATCH 0/2] "lei up" surprise reduction fixes Eric Wong
@ 2021-04-23  1:45 ` Eric Wong
  2021-04-23  1:45 ` [PATCH 2/2] lei up: support symlinked pathnames Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2021-04-23  1:45 UTC (permalink / raw)
  To: meta

This is less surprising in case users are used to using --dedupe=
without --save.
---
 lib/PublicInbox/LeiSavedSearch.pm | 27 ++++++++++++++++++++--
 lib/PublicInbox/LeiUp.pm          |  4 ++++
 t/lei-q-save.t                    | 37 +++++++++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm
index cd9effce..ed217cf2 100644
--- a/lib/PublicInbox/LeiSavedSearch.pm
+++ b/lib/PublicInbox/LeiSavedSearch.pm
@@ -11,6 +11,7 @@ use PublicInbox::LeiSearch;
 use PublicInbox::Config;
 use PublicInbox::Spawn qw(run_die);
 use PublicInbox::ContentHash qw(git_sha);
+use PublicInbox::MID qw(mids_for_index);
 use Digest::SHA qw(sha256_hex);
 
 # move this to PublicInbox::Config if other things use it:
@@ -65,6 +66,14 @@ sub list {
 	} @$out
 }
 
+sub translate_dedupe ($$$) {
+	my ($self, $lei, $dd) = @_;
+	$dd //= 'content';
+	return 1 if $dd eq 'content'; # the default
+	return $self->{"-dedupe_$dd"} = 1 if ($dd eq 'oid' || $dd eq 'mid');
+	$lei->fail("--dedupe=$dd unsupported with --save");
+}
+
 sub up { # updating existing saved search via "lei up"
 	my ($cls, $lei, $dst) = @_;
 	my $f;
@@ -89,6 +98,8 @@ sub new { # new saved search "lei q --save"
 	File::Path::make_path($dir); # raises on error
 	$self->{-cfg} = {};
 	my $f = $self->{'-f'} = "$dir/lei.saved-search";
+	my $dd = $lei->{opt}->{dedupe};
+	translate_dedupe($self, $lei, $dd) or return;
 	open my $fh, '>', $f or return $lei->fail("open $f: $!");
 	my $sq_dst = PublicInbox::Config::squote_maybe($dst);
 	my $q = $lei->{mset_opt}->{q_raw} // die 'BUG: {q_raw} missing';
@@ -105,6 +116,7 @@ sub new { # new saved search "lei q --save"
 [lei "q"]
 	output = $dst
 EOM
+	print $fh "\tdedupe = $dd\n" if $dd;
 	for my $k (ARRAY_FIELDS) {
 		my $ary = $lei->{opt}->{$k} // next;
 		for my $x (@$ary) {
@@ -134,14 +146,25 @@ sub is_dup {
 	my ($self, $eml, $smsg) = @_;
 	my $oidx = $self->{oidx} // die 'BUG: no {oidx}';
 	my $blob = $smsg ? $smsg->{blob} : undef;
-	return 1 if $blob && $oidx->blob_exists($blob);
 	my $lk = $self->lock_for_scope_fast;
+	return 1 if $blob && $oidx->blob_exists($blob);
+	if ($self->{-dedupe_mid}) {
+		for my $mid (@{mids_for_index($eml)}) {
+			my ($id, $prv);
+			return 1 if $oidx->next_by_mid($mid, \$id, \$prv);
+		}
+	}
 	if (my $xoids = PublicInbox::LeiSearch::xoids_for($self, $eml, 1)) {
 		for my $docid (values %$xoids) {
 			$oidx->add_xref3($docid, -1, $blob, '.');
 		}
 		$oidx->commit_lazy;
-		1;
+		if ($self->{-dedupe_oid}) {
+			$smsg->{blob} //= git_sha(1, $eml)->hexdigest;
+			exists $xoids->{$smsg->{blob}} ? 1 : undef;
+		} else {
+			1;
+		}
 	} else {
 		# n.b. above xoids_for fills out eml->{-lei_fake_mid} if needed
 		unless ($smsg) {
diff --git a/lib/PublicInbox/LeiUp.pm b/lib/PublicInbox/LeiUp.pm
index 0fb9698b..f4ff070b 100644
--- a/lib/PublicInbox/LeiUp.pm
+++ b/lib/PublicInbox/LeiUp.pm
@@ -25,6 +25,10 @@ sub up1 ($$) {
 	my $o = $lei->{opt}->{output} = $lss->{-cfg}->{'lei.q.output'} //
 		return $lei->fail("lei.q.output unset in $f");
 	ref($o) and return $lei->fail("multiple values of lei.q.output in $f");
+	if (defined(my $dd = $lss->{-cfg}->{'lei.q.dedupe'})) {
+		$lss->translate_dedupe($lei, $dd) or return;
+		$lei->{opt}->{dedupe} = $dd;
+	}
 	for my $k (qw(only include exclude)) {
 		my $v = $lss->{-cfg}->get_all("lei.q.$k") // next;
 		$lei->{opt}->{$k} = $v;
diff --git a/t/lei-q-save.t b/t/lei-q-save.t
index 5a2f7fff..26ea5cb8 100644
--- a/t/lei-q-save.t
+++ b/t/lei-q-save.t
@@ -121,5 +121,42 @@ test_lei(sub {
 	unlike($lei_out, qr/mbrd-aug/,
 		'forget-search completion cleared after forget');
 	ok(!lei('up', "$home/mbrd-aug"), 'lei up fails after forget');
+
+	# dedupe=mid
+	my $o = "$home/dd-mid";
+	$in = $doc2->as_string . "\n-------\nappended list sig\n";
+	lei_ok [qw(import -q -F eml -)], undef, { 0 => \$in, %$lei_opt };
+	lei_ok(qw(q --dedupe=mid --save m:testmessage@example.com -o), $o);
+	my @m = glob("$o/cur/*");
+	is(scalar(@m), 1, '--dedupe=mid w/ --save');
+	$in = $doc2->as_string . "\n-------\nanother list sig\n";
+	lei_ok [qw(import -q -F eml -)], undef, { 0 => \$in, %$lei_opt };
+	lei_ok 'up', $o;
+	is_deeply([glob("$o/cur/*")], \@m, 'lei up dedupe=mid works');
+
+	for my $dd (qw(content)) {
+		$o = "$home/dd-$dd";
+		lei_ok(qw(q --save m:testmessage@example.com -o), $o,
+				"--dedupe=$dd");
+		@m = glob("$o/cur/*");
+		is(scalar(@m), 3, 'all 3 matches with dedupe='.$dd);
+	}
+
+	# dedupe=oid
+	$o = "$home/dd-oid";
+	my $ibx = create_inbox 'ibx', indexlevel => 'medium',
+			tmpdir => "$home/v1", sub {};
+	lei_ok(qw(q --save --dedupe=oid m:qp@example.com -o), $o,
+		'-I', $ibx->{inboxdir});
+	@m = glob("$o/cur/*");
+	is(scalar(@m), 1, 'got first result');
+
+	my $im = $ibx->importer(0);
+	my $diff = "X-Insignificant-Header: x\n".$doc1->as_string;
+	$im->add(PublicInbox::Eml->new($diff));
+	$im->done;
+	lei_ok('up', $o);
+	@m = glob("$o/cur/*");
+	is(scalar(@m), 2, 'got 2nd result due to different OID');
 });
 done_testing;

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] lei up: support symlinked pathnames
  2021-04-23  1:45 [PATCH 0/2] "lei up" surprise reduction fixes Eric Wong
  2021-04-23  1:45 ` [PATCH 1/2] lei: saved searches support --dedupe=<mid|oid> Eric Wong
@ 2021-04-23  1:45 ` Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2021-04-23  1:45 UTC (permalink / raw)
  To: meta

On my default FreeBSD 11.x system, "/home" is a symlink to
"/usr/home", which causes "lei up" path resolution to fail when
I use outputs in $HOME.  Fall back to a slow path of globbing
and matching pathnames based on st_ino+st_dev.
---
 lib/PublicInbox/LeiSavedSearch.pm | 40 ++++++++++++++++++++++++-------
 t/lei-q-save.t                    |  6 +++++
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm
index ed217cf2..af864a50 100644
--- a/lib/PublicInbox/LeiSavedSearch.pm
+++ b/lib/PublicInbox/LeiSavedSearch.pm
@@ -13,6 +13,7 @@ use PublicInbox::Spawn qw(run_die);
 use PublicInbox::ContentHash qw(git_sha);
 use PublicInbox::MID qw(mids_for_index);
 use Digest::SHA qw(sha256_hex);
+my $LOCAL_PFX = qr!\A(?:maildir|mh|mbox.+|mmdf):!i; # TODO: put in LeiToMail?
 
 # move this to PublicInbox::Config if other things use it:
 my %cquote = ("\n" => '\\n', "\t" => '\\t', "\b" => '\\b');
@@ -27,27 +28,50 @@ sub BOOL_FIELDS () {
 	qw(external local remote import-remote import-before threads)
 }
 
-sub lss_dir_for ($$) {
-	my ($lei, $dstref) = @_;
+sub lss_dir_for ($$;$) {
+	my ($lei, $dstref, $on_fs) = @_;
 	my @n;
 	if ($$dstref =~ m,\Aimaps?://,i) { # already canonicalized
 		require PublicInbox::URIimap;
 		my $uri = PublicInbox::URIimap->new($$dstref)->canonical;
 		$$dstref = $$uri;
 		@n = ($uri->mailbox);
-	} else { # basename
+	} else {
+		# can't use Cwd::abs_path since dirname($$dstref) may not exist
 		$$dstref = $lei->rel2abs($$dstref);
+		# Maildirs have trailing '/' internally
 		$$dstref .= '/' if -d $$dstref;
 		$$dstref =~ tr!/!/!s;
-		@n = ($$dstref =~ m{([^/]+)/*\z});
+		@n = ($$dstref =~ m{([^/]+)/*\z}); # basename
 	}
 	push @n, sha256_hex($$dstref);
-	$lei->share_path . '/saved-searches/' . join('-', @n);
+	my $lss_dir = $lei->share_path . '/saved-searches/';
+	my $d = $lss_dir . join('-', @n);
+
+	# fall-back to looking up by st_ino + st_dev in case we're in
+	# a symlinked or bind-mounted path
+	if ($on_fs && !-d $d && -e $$dstref) {
+		my @cur = stat(_);
+		my $want = pack('dd', @cur[1,0]); # st_ino + st_dev
+		my ($c, $o, @st);
+		for my $g ("$n[0]-*", '*') {
+			my @maybe = glob("$lss_dir$g/lei.saved-search");
+			for my $f (@maybe) {
+				$c = PublicInbox::Config->git_config_dump($f);
+				$o = $c->{'lei.q.output'} // next;
+				$o =~ s!$LOCAL_PFX!! or next;
+				@st = stat($o) or next;
+				next if pack('dd', @st[1,0]) ne $want;
+				$f =~ m!\A(.+?)/[^/]+\z! and return $1;
+			}
+		}
+	}
+	$d;
 }
 
 sub list {
 	my ($lei, $pfx) = @_;
-	my $lss_dir = $lei->share_path.'/saved-searches/';
+	my $lss_dir = $lei->share_path.'/saved-searches';
 	return () unless -d $lss_dir;
 	# TODO: persist the cache?  Use another format?
 	my $f = $lei->cache_dir."/saved-tmp.$$.".time.'.config';
@@ -61,7 +85,7 @@ sub list {
 	unlink($f);
 	my $out = $cfg->get_all('lei.q.output') or return ();
 	map {;
-		s!\A(?:maildir|mh|mbox.+|mmdf):!!i;
+		s!$LOCAL_PFX!!;
 		$_;
 	} @$out
 }
@@ -221,7 +245,7 @@ sub cloneurl { [] }
 sub output2lssdir {
 	my ($self, $lei, $dir_ref, $fn_ref) = @_;
 	my $dst = $$dir_ref; # imap://$MAILBOX, /path/to/maildir, /path/to/mbox
-	my $dir = lss_dir_for($lei, \$dst);
+	my $dir = lss_dir_for($lei, \$dst, 1);
 	my $f = "$dir/lei.saved-search";
 	if (-f $f && -r _) {
 		$self->{-cfg} = PublicInbox::Config->git_config_dump($f);
diff --git a/t/lei-q-save.t b/t/lei-q-save.t
index 26ea5cb8..170f7ce5 100644
--- a/t/lei-q-save.t
+++ b/t/lei-q-save.t
@@ -158,5 +158,11 @@ test_lei(sub {
 	lei_ok('up', $o);
 	@m = glob("$o/cur/*");
 	is(scalar(@m), 2, 'got 2nd result due to different OID');
+
+	SKIP: {
+		symlink($o, "$home/ln -s") or
+			skip "symlinks not supported in $home?: $!", 1;
+		lei_ok('up', "$home/ln -s");
+	};
 });
 done_testing;

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-04-23  1:45 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-23  1:45 [PATCH 0/2] "lei up" surprise reduction fixes Eric Wong
2021-04-23  1:45 ` [PATCH 1/2] lei: saved searches support --dedupe=<mid|oid> Eric Wong
2021-04-23  1:45 ` [PATCH 2/2] lei up: support symlinked pathnames Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).