* [PATCH 1/3] imap_tracker: prepare for use with lei
2021-04-22 9:08 [PATCH 0/3] lei import: network sync things Eric Wong
@ 2021-04-22 9:08 ` Eric Wong
2021-04-22 9:08 ` [PATCH 2/3] lei import: --incremental default for NNTP and IMAP Eric Wong
2021-04-22 9:08 ` [PATCH 3/3] lei import|convert: drop --no-kw aliases Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2021-04-22 9:08 UTC (permalink / raw)
To: meta
We'll support nodatacow as we do in other SQLite DBs
---
lib/PublicInbox/IMAPTracker.pm | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/lib/PublicInbox/IMAPTracker.pm b/lib/PublicInbox/IMAPTracker.pm
index 6d4fb227..bcf7af2e 100644
--- a/lib/PublicInbox/IMAPTracker.pm
+++ b/lib/PublicInbox/IMAPTracker.pm
@@ -62,21 +62,27 @@ VALUES (?, ?, ?)
}
sub new {
- my ($class, $url) = @_;
+ my ($class, $url, $dbname) = @_;
- # original name for compatibility with old setups:
- my $dbname = PublicInbox::Config->config_dir() . "/imap.sqlite3";
+ unless (defined($dbname)) {
+ # original name for compatibility with old setups:
+ $dbname = PublicInbox::Config->config_dir() . '/imap.sqlite3';
- # use the new XDG-compliant name for new setups:
- if (!-f $dbname) {
- $dbname = ($ENV{XDG_DATA_HOME} //
- (($ENV{HOME} // '/nonexistent').'/.local/share')) .
- '/public-inbox/imap.sqlite3';
+ # use the new XDG-compliant name for new setups:
+ if (!-f $dbname) {
+ $dbname = ($ENV{XDG_DATA_HOME} //
+ (($ENV{HOME} // '/nonexistent').
+ '/.local/share')) .
+ '/public-inbox/imap.sqlite3';
+ }
}
if (!-f $dbname) {
require File::Path;
require File::Basename;
+ require PublicInbox::Spawn;
File::Path::mkpath(File::Basename::dirname($dbname));
+ open my $fh, '+>>', $dbname or die "failed to open $dbname: $!";
+ PublicInbox::Spawn::nodatacow_fd(fileno($fh));
}
my $self = bless { lock_path => "$dbname.lock", url => $url }, $class;
$self->lock_acquire;
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] lei import: --incremental default for NNTP and IMAP
2021-04-22 9:08 [PATCH 0/3] lei import: network sync things Eric Wong
2021-04-22 9:08 ` [PATCH 1/3] imap_tracker: prepare for use with lei Eric Wong
@ 2021-04-22 9:08 ` Eric Wong
2021-04-22 9:08 ` [PATCH 3/3] lei import|convert: drop --no-kw aliases Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2021-04-22 9:08 UTC (permalink / raw)
To: meta
No point in burning through bandwidth to import stuff we already
saw. All this logic is shared with -watch but uses a different
pathname for lei since it's tied to lei/store (and not a
public-inbox).
---
Documentation/lei-store-format.pod | 1 +
lib/PublicInbox/LEI.pm | 4 +++-
lib/PublicInbox/LeiImport.pm | 5 +++++
lib/PublicInbox/NetReader.pm | 13 +++++++++----
t/lei-import-imap.t | 3 +++
t/lei-import-nntp.t | 3 +++
6 files changed, 24 insertions(+), 5 deletions(-)
diff --git a/Documentation/lei-store-format.pod b/Documentation/lei-store-format.pod
index a42c770e..3e1ddc65 100644
--- a/Documentation/lei-store-format.pod
+++ b/Documentation/lei-store-format.pod
@@ -32,6 +32,7 @@ prevent them from being accidentally treated as a v2 inbox.
~/.local/share/lei/store
- ipc.lock # lock file for internal lei IPC
- local/$EPOCH.git # normal bare git repositories
+ - net_last.sqlite3 # import state for IMAP & NNTP
Additionally, the following share the same roles they do in extindex:
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 2e1aa246..d9e644eb 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -193,7 +193,8 @@ our %CMD = ( # sorted in order of importance/use:
'import' => [ 'LOCATION...|--stdin',
'one-time import/update from URL or filesystem',
qw(stdin| offset=i recursive|r exclude=s include|I=s
- lock=s@ in-format|F=s kw|keywords|flags! verbose|v+), @c_opt ],
+ lock=s@ in-format|F=s kw|keywords|flags! verbose|v+
+ incremental!), @c_opt ],
'convert' => [ 'LOCATION...|--stdin',
'one-time conversion from URL or filesystem to another format',
qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s
@@ -244,6 +245,7 @@ my %OPTDESC = (
'lock=s@' => [ 'METHOD|dotlock|fcntl|flock|none',
'mbox(5) locking method(s) to use (default: fcntl,dotlock)' ],
+'incremental! import' => 'import already seen IMAP and NNTP articles',
'globoff|g' => "do not match locations using '*?' wildcards ".
"and\xa0'[]'\x{a0}ranges",
'verbose|v+' => 'be more verbose',
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 16271603..accf08f5 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -58,6 +58,11 @@ sub lei_import { # the main "lei import" method
my $j = $lei->{opt}->{jobs} // scalar(@{$self->{inputs}}) || 1;
if (my $net = $lei->{net}) {
# $j = $net->net_concurrency($j); TODO
+ if ($lei->{opt}->{incremental} // 1) {
+ $net->{incremental} = 1;
+ $net->{itrk_fn} = $lei->store_path .
+ '/net_last.sqlite3';
+ }
} else {
my $nproc = $self->detect_nproc;
$j = $nproc if $j > $nproc;
diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm
index 0ef66fd8..c7b43f01 100644
--- a/lib/PublicInbox/NetReader.pm
+++ b/lib/PublicInbox/NetReader.pm
@@ -373,6 +373,13 @@ sub run_commit_cb ($) {
$cb->(@args);
}
+sub _itrk ($$) {
+ my ($self, $uri) = @_;
+ return unless $self->{incremental};
+ # itrk_fn is set by lei
+ PublicInbox::IMAPTracker->new($$uri, $self->{itrk_fn});
+}
+
sub _imap_fetch_all ($$$) {
my ($self, $mic, $uri) = @_;
my $sec = uri_section($uri);
@@ -389,8 +396,7 @@ sub _imap_fetch_all ($$$) {
return "E: $uri cannot get UIDVALIDITY";
$r_uidnext //= $mic->uidnext($mbx) //
return "E: $uri cannot get UIDNEXT";
- my $itrk = $self->{incremental} ?
- PublicInbox::IMAPTracker->new($$uri) : 0;
+ my $itrk = _itrk($self, $uri);
my ($l_uidval, $l_uid) = $itrk ? $itrk->get_last : ();
$l_uidval //= $r_uidval; # first time
$l_uid //= 0;
@@ -543,8 +549,7 @@ sub _nntp_fetch_all ($$$) {
# IMAPTracker is also used for tracking NNTP, UID == article number
# LIST.ACTIVE can get the equivalent of UIDVALIDITY, but that's
# expensive. So we assume newsgroups don't change:
- my $itrk = $self->{incremental} ?
- PublicInbox::IMAPTracker->new($$uri) : 0;
+ my $itrk = _itrk($self, $uri);
my (undef, $l_art) = $itrk ? $itrk->get_last : ();
# allow users to specify articles to refetch
diff --git a/t/lei-import-imap.t b/t/lei-import-imap.t
index 7e4d44b9..490ea9be 100644
--- a/t/lei-import-imap.t
+++ b/t/lei-import-imap.t
@@ -24,5 +24,8 @@ test_lei({ tmpdir => $tmpdir }, sub {
for (@$out) { $r{ref($_)}++ }
is_deeply(\%r, { 'HASH' => scalar(@$out) }, 'all hashes');
lei_ok([qw(tag +kw:seen), "imap://$host_port/t.v2.0"], undef, undef);
+
+ my $f = "$ENV{HOME}/.local/share/lei/store/net_last.sqlite3";
+ ok(-s $f, 'net tracked for redundant imports');
});
done_testing;
diff --git a/t/lei-import-nntp.t b/t/lei-import-nntp.t
index 1fc6dbad..d795a86a 100644
--- a/t/lei-import-nntp.t
+++ b/t/lei-import-nntp.t
@@ -26,5 +26,8 @@ test_lei({ tmpdir => $tmpdir }, sub {
my %r;
for (@$out) { $r{ref($_)}++ }
is_deeply(\%r, { 'HASH' => scalar(@$out) }, 'all hashes');
+
+ my $f = "$ENV{HOME}/.local/share/lei/store/net_last.sqlite3";
+ ok(-s $f, 'net tracked for redundant imports');
});
done_testing;
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] lei import|convert: drop --no-kw aliases
2021-04-22 9:08 [PATCH 0/3] lei import: network sync things Eric Wong
2021-04-22 9:08 ` [PATCH 1/3] imap_tracker: prepare for use with lei Eric Wong
2021-04-22 9:08 ` [PATCH 2/3] lei import: --incremental default for NNTP and IMAP Eric Wong
@ 2021-04-22 9:08 ` Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2021-04-22 9:08 UTC (permalink / raw)
To: meta
Supporting --no-keywords and --no-flags aliases is harmful
if users end up assuming "keywords:" and "flags:" are valid
search prefixes (they're not).
---
Documentation/lei-import.pod | 2 +-
lib/PublicInbox/LEI.pm | 9 ++++-----
t/lei.t | 3 +--
3 files changed, 6 insertions(+), 8 deletions(-)
diff --git a/Documentation/lei-import.pod b/Documentation/lei-import.pod
index acc4f776..7d70191d 100644
--- a/Documentation/lei-import.pod
+++ b/Documentation/lei-import.pod
@@ -40,7 +40,7 @@ C<none>.
Default: fcntl,dotlock
-=item --no-kw, --no-keywords, --no-flags
+=item --no-kw
Don't import message keywords (or "flags" in IMAP terminology).
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index d9e644eb..9f49fc03 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -181,7 +181,7 @@ our %CMD = ( # sorted in order of importance/use:
qw(exact! all jobs:i indexed), @c_opt ],
'add-watch' => [ 'LOCATION', 'watch for new messages and flag changes',
- qw(import! kw|keywords|flags! interval=s recursive|r
+ qw(import! kw! interval=s recursive|r
exclude=s include=s), @c_opt ],
'ls-watch' => [ '[FILTER...]', 'list active watches with numbers and status',
qw(format|f=s z), @c_opt ],
@@ -193,12 +193,11 @@ our %CMD = ( # sorted in order of importance/use:
'import' => [ 'LOCATION...|--stdin',
'one-time import/update from URL or filesystem',
qw(stdin| offset=i recursive|r exclude=s include|I=s
- lock=s@ in-format|F=s kw|keywords|flags! verbose|v+
- incremental!), @c_opt ],
+ lock=s@ in-format|F=s kw! verbose|v+ incremental!), @c_opt ],
'convert' => [ 'LOCATION...|--stdin',
'one-time conversion from URL or filesystem to another format',
qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s
- lock=s@ kw|keywords|flags!), @c_opt ],
+ lock=s@ kw!), @c_opt ],
'p2q' => [ 'FILE|COMMIT_OID|--stdin',
"use a patch to generate a query for `lei q --stdin'",
qw(stdin| want|w=s@ uri debug), @c_opt ],
@@ -350,7 +349,7 @@ my %OPTDESC = (
'by-mid|mid:s' => [ 'MID', 'match only by Message-ID, ignoring contents' ],
-'kw|keywords|flags!' => 'disable/enable importing flags',
+'kw!' => 'disable/enable importing keywords (aka "flags")',
# xargs, env, use "-0", git(1) uses "-z". We support z|0 everywhere
'z|0' => 'use NUL \\0 instead of newline (CR) to delimit lines',
diff --git a/t/lei.t b/t/lei.t
index 6ade2f18..6d276050 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -131,8 +131,7 @@ my $test_completion = sub {
}
lei_ok(qw(_complete lei import));
%out = map { $_ => 1 } split(/\s+/s, $lei_out);
- for my $sw (qw(--flags --no-flags --no-kw --kw --no-keywords
- --keywords)) {
+ for my $sw (qw(--no-kw --kw)) {
ok($out{$sw}, "$sw offered as `lei import' completion");
}
};
^ permalink raw reply related [flat|nested] 4+ messages in thread