From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 4BAAF1F9FF for ; Wed, 24 Feb 2021 11:31:55 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/4] lei : support NNTP sources Date: Wed, 24 Feb 2021 17:31:52 +0600 Message-Id: <20210224113154.686-3-e@80x24.org> In-Reply-To: <20210224113154.686-1-e@80x24.org> References: <20210224113154.686-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We can read NNTP in -watch and Net::NNTP is shipped with Perl5, so lei import and convert have no excuse not to support NNTP as a client. Authentication is not tested, yet; but should be close to what IMAP is like... --- MANIFEST | 2 +- lib/PublicInbox/LeiAuth.pm | 4 +- lib/PublicInbox/LeiConvert.pm | 14 ++- lib/PublicInbox/LeiImport.pm | 12 +- lib/PublicInbox/NetReader.pm | 209 ++++++++++++++++++++++++++------ lib/PublicInbox/Watch.pm | 218 +++++++++++++--------------------- t/lei-convert.t | 31 +++-- t/lei-import-nntp.t | 30 +++++ t/watch_nntp.t | 17 --- 9 files changed, 331 insertions(+), 206 deletions(-) create mode 100644 t/lei-import-nntp.t delete mode 100644 t/watch_nntp.t diff --git a/MANIFEST b/MANIFEST index 9cf97563..4c04eec8 100644 --- a/MANIFEST +++ b/MANIFEST @@ -369,6 +369,7 @@ t/lei-daemon.t t/lei-externals.t t/lei-import-imap.t t/lei-import-maildir.t +t/lei-import-nntp.t t/lei-import.t t/lei-mirror.t t/lei.t @@ -454,7 +455,6 @@ t/watch_imap.t t/watch_maildir.t t/watch_maildir_v2.t t/watch_multiple_headers.t -t/watch_nntp.t t/www_altid.t t/www_listing.t t/www_static.t diff --git a/lib/PublicInbox/LeiAuth.pm b/lib/PublicInbox/LeiAuth.pm index 099bdaca..927fe550 100644 --- a/lib/PublicInbox/LeiAuth.pm +++ b/lib/PublicInbox/LeiAuth.pm @@ -14,9 +14,11 @@ sub do_auth_atfork { # used by IPC WQ workers my $lei = $wq->{lei}; my $net = $lei->{net}; my $mics = $net->imap_common_init($lei); + my $nn = $net->nntp_common_init($lei); pkt_do($lei->{pkt_op_p}, 'net_merge', $net) or die "pkt_do net_merge: $!"; - $net->{mics_cached} = $mics; + $net->{mics_cached} = $mics if $mics; + $net->{nn_cached} = $nn if $nn; } sub net_merge_done1 { # bump merge-count in top-level lei-daemon diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm index 4839dea4..a7e47871 100644 --- a/lib/PublicInbox/LeiConvert.pm +++ b/lib/PublicInbox/LeiConvert.pm @@ -18,8 +18,8 @@ sub mbox_cb { $self->{wcb}->(undef, { kw => \@kw }, $eml); } -sub imap_cb { # ->imap_each - my ($url, $uid, $kw, $eml, $self) = @_; +sub net_cb { # callback for ->imap_each, ->nntp_each + my (undef, undef, $kw, $eml, $self) = @_; # @_[0,1]: url + uid ignored $self->{wcb}->(undef, { kw => $kw }, $eml); } @@ -35,14 +35,18 @@ sub do_convert { # via wq_do my $mics; if (my $nrd = $lei->{nrd}) { # may prompt user once $nrd->{mics_cached} = $nrd->imap_common_init($lei); + $nrd->{nn_cached} = $nrd->nntp_common_init($lei); } if (my $stdin = delete $self->{0}) { PublicInbox::MboxReader->$in_fmt($stdin, \&mbox_cb, $self); } for my $input (@{$self->{inputs}}) { my $ifmt = lc($in_fmt // ''); - if ($input =~ m!\A(?:imap|nntp)s?://!) { # TODO: nntp - $lei->{nrd}->imap_each($input, \&imap_cb, $self); + if ($input =~ m!\Aimaps?://!) { + $lei->{nrd}->imap_each($input, \&net_cb, $self); + next; + } elsif ($input =~ m!\A(?:nntps?|s?news)://!) { + $lei->{nrd}->nntp_each($input, \&net_cb, $self); next; } elsif ($input =~ s!\A([a-z0-9]+):!!i) { $ifmt = lc $1; @@ -82,7 +86,7 @@ sub call { # the main "lei convert" method # e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX for my $input (@inputs) { my $input_path = $input; - if ($input =~ m!\A(?:imap|nntp)s?://!i) { + if ($input =~ m!\A(?:imaps?|nntps?|s?news)://!i) { require PublicInbox::NetReader; $nrd //= PublicInbox::NetReader->new; $nrd->add_url($input); diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index b85f4d6c..cbfb3127 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -74,7 +74,7 @@ sub call { # the main "lei import" method # e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX for my $input (@inputs) { my $input_path = $input; - if ($input =~ m!\A(?:imap|nntp)s?://!i) { + if ($input =~ m!\A(?:imaps?|nntps?|s?news)://!i) { require PublicInbox::NetReader; $net //= PublicInbox::NetReader->new; $net->add_url($input); @@ -152,9 +152,8 @@ sub _import_maildir { # maildir_each_file cb $sto->ipc_do('set_eml_from_maildir', $f, $set_kw); } -sub _import_imap { # imap_each cb +sub _import_net { # imap_each, nntp_each cb my ($url, $uid, $kw, $eml, $sto, $set_kw) = @_; - warn "$url $uid"; $sto->ipc_do('set_eml', $eml, $set_kw ? @$kw : ()); } @@ -163,10 +162,13 @@ sub import_path_url { my $lei = $self->{lei}; my $ifmt = lc($lei->{opt}->{'format'} // ''); # TODO auto-detect? - if ($input =~ m!\A(imap|nntp)s?://!i) { - $lei->{net}->imap_each($input, \&_import_imap, $lei->{sto}, + if ($input =~ m!\Aimaps?://!i) { + $lei->{net}->imap_each($input, \&_import_net, $lei->{sto}, $lei->{opt}->{kw}); return; + } elsif ($input =~ m!\A(?:nntps?|s?news)://!i) { + $lei->{net}->nntp_each($input, \&_import_net, $lei->{sto}, 0); + return; } elsif ($input =~ s!\A([a-z0-9]+):!!i) { $ifmt = lc $1; } diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm index ff90468b..2a453217 100644 --- a/lib/PublicInbox/NetReader.pm +++ b/lib/PublicInbox/NetReader.pm @@ -11,26 +11,16 @@ use PublicInbox::Eml; our %IMAPflags2kw = map {; "\\\u$_" => $_ } qw(seen answered flagged draft); # TODO: trim this down, this is huge -our @EXPORT = qw(uri_new uri_scheme uri_section - nn_new nn_for - imap_uri nntp_url - cfg_bool cfg_intvl imap_common_init +our @EXPORT = qw(uri_new uri_section + nn_new imap_uri nntp_uri + cfg_bool cfg_intvl imap_common_init nntp_common_init ); -# avoid exposing deprecated "snews" to users. -my %SCHEME_MAP = ('snews' => 'nntps'); - -sub uri_scheme ($) { - my ($uri) = @_; - my $scheme = $uri->scheme; - $SCHEME_MAP{$scheme} // $scheme; -} - # returns the git config section name, e.g [imap "imaps://user@example.com"] # without the mailbox, so we can share connections between different inboxes sub uri_section ($) { my ($uri) = @_; - uri_scheme($uri) . '://' . $uri->authority; + $uri->scheme . '://' . $uri->authority; } sub auth_anon_cb { '' }; # for Mail::IMAPClient::Authcallback @@ -123,8 +113,8 @@ sub try_starttls ($) { } sub nn_new ($$$) { - my ($nn_arg, $nntp_opt, $url) = @_; - my $nn = Net::NNTP->new(%$nn_arg) or die "E: <$url> new: $!\n"; + my ($nn_arg, $nntp_opt, $uri) = @_; + my $nn = Net::NNTP->new(%$nn_arg) or die "E: <$uri> new: $!\n"; # default to using STARTTLS if it's available, but allow # it to be disabled for localhost/VPN users @@ -133,27 +123,26 @@ sub nn_new ($$$) { try_starttls($nn_arg->{Host})) { # soft fail by default $nn->starttls or warn <<""; -W: <$url> STARTTLS tried and failed (not requested) +W: <$uri> STARTTLS tried and failed (not requested) } elsif ($nntp_opt->{starttls}) { # hard fail if explicitly configured $nn->starttls or die <<""; -E: <$url> STARTTLS requested and failed +E: <$uri> STARTTLS requested and failed } } elsif ($nntp_opt->{starttls}) { $nn->can('starttls') or - die "E: <$url> Net::NNTP too old for STARTTLS\n"; + die "E: <$uri> Net::NNTP too old for STARTTLS\n"; $nn->starttls or die <<""; -E: <$url> STARTTLS requested and failed +E: <$uri> STARTTLS requested and failed } $nn; } sub nn_for ($$$;$) { # nn = Net::NNTP - my ($self, $url, $nn_args, $lei) = @_; - my $uri = uri_new($url); + my ($self, $uri, $nn_args, $lei) = @_; my $sec = uri_section($uri); my $nntp_opt = $self->{nntp_opt}->{$sec} //= {}; my $host = $uri->host; @@ -165,7 +154,7 @@ sub nn_for ($$$;$) { # nn = Net::NNTP require PublicInbox::GitCredential; $cred = bless { url => $sec, - protocol => uri_scheme($uri), + protocol => $uri->scheme, host => $host, }, 'PublicInbox::GitCredential'; ($u, $p) = split(/:/, $ui, 2); @@ -179,14 +168,13 @@ sub nn_for ($$$;$) { # nn = Net::NNTP SSL => $uri->secure, # snews == nntps %$common, # may Debug .... }; - my $nn = nn_new($nn_arg, $nntp_opt, $url); - + my $nn = nn_new($nn_arg, $nntp_opt, $uri); if ($cred) { $cred->fill($lei); # may prompt user here if ($nn->authinfo($u, $p)) { push @{$nntp_opt->{-postconn}}, [ 'authinfo', $u, $p ]; } else { - warn "E: <$url> AUTHINFO $u XXXX failed\n"; + warn "E: <$uri> AUTHINFO $u XXXX failed\n"; $nn = undef; } } @@ -197,12 +185,12 @@ sub nn_for ($$$;$) { # nn = Net::NNTP if ($nn->compress) { push @{$nntp_opt->{-postconn}}, [ 'compress' ]; } else { - warn "W: <$url> COMPRESS failed\n"; + warn "W: <$uri> COMPRESS failed\n"; } } else { delete $nntp_opt->{compress}; warn <<""; -W: <$url> COMPRESS not supported by Net::NNTP +W: <$uri> COMPRESS not supported by Net::NNTP W: see https://rt.cpan.org/Ticket/Display.html?id=129967 for updates } @@ -220,15 +208,12 @@ sub imap_uri { $uri ? $uri->canonical : undef; } -my %IS_NNTP = (news => 1, snews => 1, nntp => 1); -sub nntp_url { +my %IS_NNTP = (news => 1, snews => 1, nntp => 1, nntps => 1); +sub nntp_uri { my ($url) = @_; - my $uri = uri_new($url); - return unless $uri && $IS_NNTP{$uri->scheme} && $uri->group; - $url = $uri->canonical->as_string; - # nntps is IANA registered, snews is deprecated - $url =~ s!\Asnews://!nntps://!; - $url; + require PublicInbox::URInntps; + my $uri = PublicInbox::URInntps->new($url); + $uri && $IS_NNTP{$uri->scheme} && $uri->group ? $uri->canonical : undef; } sub cfg_intvl ($$$) { @@ -254,6 +239,7 @@ sub cfg_bool ($$$) { # flesh out common IMAP-specific data structures sub imap_common_init ($;$) { my ($self, $lei) = @_; + return unless $self->{imap_order}; $self->{quiet} = 1 if $lei && $lei->{opt}->{quiet}; eval { require PublicInbox::IMAPClient } or die "Mail::IMAPClient is required for IMAP:\n$@\n"; @@ -297,10 +283,55 @@ sub imap_common_init ($;$) { $mics; } +# flesh out common NNTP-specific data structures +sub nntp_common_init ($;$) { + my ($self, $lei) = @_; + return unless $self->{nntp_order}; + $self->{quiet} = 1 if $lei && $lei->{opt}->{quiet}; + eval { require Net::NNTP } or + die "Net::NNTP is required for NNTP:\n$@\n"; + eval { require PublicInbox::IMAPTracker } or + die "DBD::SQLite is required for NNTP\n:$@\n"; + my $cfg = $self->{pi_cfg} // $lei->_lei_cfg; + my $nn_args = {}; # scheme://authority => Net::NNTP->new arg + for my $uri (@{$self->{nntp_order}}) { + my $sec = uri_section($uri); + + # Debug and Timeout are passed to Net::NNTP->new + my $v = cfg_bool($cfg, 'nntp.Debug', $$uri); + $nn_args->{$sec}->{Debug} = $v if defined $v; + my $to = cfg_intvl($cfg, 'nntp.Timeout', $$uri); + $nn_args->{$sec}->{Timeout} = $to if $to; + + # Net::NNTP post-connect commands + for my $k (qw(starttls compress)) { + $v = cfg_bool($cfg, "nntp.$k", $$uri) // next; + $self->{nntp_opt}->{$sec}->{$k} = $v; + } + + # internal option + for my $k (qw(pollInterval)) { + $to = cfg_intvl($cfg, "nntp.$k", $$uri) // next; + $self->{nntp_opt}->{$sec}->{$k} = $to; + } + } + # make sure we can connect and cache the credentials in memory + $self->{nn_arg} = {}; # schema://authority => Net::NNTP->new args + my %nn; # schema://authority => Net::NNTP object + for my $uri (@{$self->{nntp_order}}) { + my $sec = uri_section($uri); + $nn{$sec} //= nn_for($self, $uri, $nn_args, $lei); + } + \%nn; # for optional {nn_cached} +} + sub add_url { my ($self, $arg) = @_; - if (my $uri = imap_uri($arg)) { + my $uri; + if ($uri = imap_uri($arg)) { push @{$self->{imap_order}}, $uri; + } elsif ($uri = nntp_uri($arg)) { + push @{$self->{nntp_order}}, $uri; } else { push @{$self->{unsupported_url}}, $arg; } @@ -315,6 +346,10 @@ sub errors { eval { require PublicInbox::IMAPClient } or die "Mail::IMAPClient is required for IMAP:\n$@\n"; } + if ($self->{nntp_order}) { + eval { require Net::NNTP } or + die "Net::NNTP is required for NNTP:\n$@\n"; + } undef; } @@ -461,6 +496,106 @@ sub imap_each { $mic; } +# may used cached auth info prepared by nn_for once +sub nn_get { + my ($self, $uri) = @_; + my $sec = uri_section($uri); + # see if caller saved result of nntp_common_init + my $cached = $self->{nn_cached} // {}; + my $nn; + $nn = delete($cached->{$sec}) and return $nn; + my $nn_arg = $self->{nn_arg}->{$sec} or + die "BUG: no Net::NNTP->new arg for $sec"; + my $nntp_opt = $self->{nntp_opt}->{$sec}; + $nn = nn_new($nn_arg, $nntp_opt, $uri) or return; + if (my $postconn = $nntp_opt->{-postconn}) { + for my $m_arg (@$postconn) { + my ($method, @args) = @$m_arg; + $nn->$method(@args) and next; + die "E: <$uri> $method failed\n"; + return; + } + } + $nn; +} + +sub _nntp_fetch_all ($$$) { + my ($self, $nn, $uri) = @_; + my ($group, $num_a, $num_b) = $uri->group; + my $sec = uri_section($uri); + my ($nr, $beg, $end) = $nn->group($group); + unless (defined($nr)) { + chomp(my $msg = $nn->message); + return "E: GROUP $group <$sec> $msg"; + } + + # IMAPTracker is also used for tracking NNTP, UID == article number + # LIST.ACTIVE can get the equivalent of UIDVALIDITY, but that's + # expensive. So we assume newsgroups don't change: + my $itrk = $self->{incremental} ? + PublicInbox::IMAPTracker->new($$uri) : 0; + my (undef, $l_art) = $itrk ? $itrk->get_last : (); + + # allow users to specify articles to refetch + # cf. https://tools.ietf.org/id/draft-gilman-news-url-01.txt + # nntp://example.com/inbox.foo/$num_a-$num_b + $beg = $num_a if defined($num_a) && $num_a < $beg; + $end = $num_b if defined($num_b) && $num_b < $end; + if (defined $l_art) { + return if $l_art >= $end; # nothing to do + $beg = $l_art + 1; + } + my ($err, $art); + unless ($self->{quiet}) { + warn "# $uri fetching ARTICLE $beg..$end\n"; + } + my $last_art; + my $n = $self->{max_batch}; + for ($beg..$end) { + last if $self->{quit}; + $art = $_; + if (--$n < 0) { + $itrk->update_last(0, $last_art) if $itrk; + $n = $self->{max_batch}; + } + my $raw = $nn->article($art); + unless (defined($raw)) { + my $msg = $nn->message; + if ($nn->code == 421) { # pseudo response from Net::Cmd + $err = "E: $msg"; + last; + } else { # probably just a deleted message (spam) + warn "W: $msg"; + next; + } + } + $raw = join('', @$raw); + $raw =~ s/\r\n/\n/sg; + my ($eml_cb, @args) = @{$self->{eml_each}}; + $eml_cb->($uri, $art, [], PublicInbox::Eml->new(\$raw), @args); + $last_art = $art; + } + $itrk->update_last(0, $last_art) if $itrk; + $err; +} + +sub nntp_each { + my ($self, $url, $eml_cb, @args) = @_; + my $uri = ref($url) ? $url : PublicInbox::URInntps->new($url); + my $sec = uri_section($uri); + local $0 = $uri->group ." $sec"; + my $nn = nn_get($self, $uri); + my $err; + if ($nn) { + local $self->{eml_each} = [ $eml_cb, @args ]; + $err = _nntp_fetch_all($self, $nn, $uri); + } else { + $err = "E: not connected: $!"; + } + warn $err if $err; + $nn; +} + sub new { bless {}, shift }; 1; diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm index 8d13ea35..4b009a28 100644 --- a/lib/PublicInbox/Watch.pm +++ b/lib/PublicInbox/Watch.pm @@ -56,16 +56,16 @@ sub new { defined(my $dirs = $cfg->{$k}) or next; $dirs = PublicInbox::Config::_array($dirs); for my $dir (@$dirs) { - my $url; + my $uri; if (is_maildir($dir)) { # skip "new", no MUA has seen it, yet. $mdmap{"$dir/cur"} = 'watchspam'; - } elsif (my $uri = imap_uri($dir)) { + } elsif ($uri = imap_uri($dir)) { $imap{$$uri} = 'watchspam'; push @imap, $uri; - } elsif ($url = nntp_url($dir)) { - $nntp{$url} = 'watchspam'; - push @nntp, $url; + } elsif ($uri = nntp_uri($dir)) { + $nntp{$$uri} = 'watchspam'; + push @nntp, $uri; } else { warn "unsupported $k=$dir\n"; } @@ -84,7 +84,7 @@ sub new { my $watches = $ibx->{watch} or return; $watches = PublicInbox::Config::_array($watches); for my $watch (@$watches) { - my $url; + my $uri; if (is_maildir($watch)) { compile_watchheaders($ibx); my ($new, $cur) = ("$watch/new", "$watch/cur"); @@ -92,17 +92,16 @@ sub new { return if is_watchspam($cur, $cur_dst, $ibx); push @{$mdmap{$new} //= []}, $ibx; push @$cur_dst, $ibx; - } elsif (my $uri = imap_uri($watch)) { - my $url = $$uri; - return if is_watchspam($url, $imap{$url}, $ibx); + } elsif ($uri = imap_uri($watch)) { + my $cur_dst = $imap{$$uri} //= []; + return if is_watchspam($uri, $cur_dst, $ibx); compile_watchheaders($ibx); - my $n = push @{$imap{$url} ||= []}, $ibx; - push @imap, $uri if $n == 1; - } elsif ($url = nntp_url($watch)) { - return if is_watchspam($url, $nntp{$url}, $ibx); + push(@imap, $uri) if 1 == push(@$cur_dst, $ibx); + } elsif ($uri = nntp_uri($watch)) { + my $cur_dst = $nntp{$$uri} //= []; + return if is_watchspam($uri, $cur_dst, $ibx); compile_watchheaders($ibx); - my $n = push @{$nntp{$url} ||= []}, $ibx; - push @nntp, $url if $n == 1; + push(@nntp, $uri) if 1 == push(@$cur_dst, $ibx); } else { warn "watch unsupported: $k=$watch\n"; } @@ -289,11 +288,11 @@ sub watch_fs_init ($) { } sub imap_import_msg ($$$$$) { - my ($self, $url, $uid, $raw, $flags) = @_; + my ($self, $uri, $uid, $raw, $flags) = @_; # our target audience expects LF-only, save storage $$raw =~ s/\r\n/\n/sg; - my $inboxes = $self->{imap}->{$url}; + my $inboxes = $self->{imap}->{$$uri}; if (ref($inboxes)) { for my $ibx (@$inboxes) { my $eml = PublicInbox::Eml->new($$raw); @@ -304,15 +303,14 @@ sub imap_import_msg ($$$$$) { local $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb(); my $eml = PublicInbox::Eml->new($raw); $self->{pi_cfg}->each_inbox(\&remove_eml_i, - $self, $eml, "$url UID:$uid"); + $self, $eml, "$uri UID:$uid"); } else { die "BUG: destination unknown $inboxes"; } } sub imap_fetch_all ($$$) { - my ($self, $mic, $url) = @_; - my $uri = PublicInbox::URIimap->new($url); + my ($self, $mic, $uri) = @_; my $sec = uri_section($uri); my $mbx = $uri->mailbox; $mic->Clear(1); # trim results history @@ -324,25 +322,25 @@ sub imap_fetch_all ($$$) { last if $r_uidval && $r_uidnext; } $r_uidval //= $mic->uidvalidity($mbx) // - return "E: $url cannot get UIDVALIDITY"; + return "E: $uri cannot get UIDVALIDITY"; $r_uidnext //= $mic->uidnext($mbx) // - return "E: $url cannot get UIDNEXT"; - my $itrk = PublicInbox::IMAPTracker->new($url); + return "E: $uri cannot get UIDNEXT"; + my $itrk = PublicInbox::IMAPTracker->new($$uri); my ($l_uidval, $l_uid) = $itrk->get_last; $l_uidval //= $r_uidval; # first time $l_uid //= 1; if ($l_uidval != $r_uidval) { - return "E: $url UIDVALIDITY mismatch\n". + return "E: $uri UIDVALIDITY mismatch\n". "E: local=$l_uidval != remote=$r_uidval"; } my $r_uid = $r_uidnext - 1; if ($l_uid != 1 && $l_uid > $r_uid) { - return "E: $url local UID exceeds remote ($l_uid > $r_uid)\n". - "E: $url strangely, UIDVALIDLITY matches ($l_uidval)\n"; + return "E: $uri local UID exceeds remote ($l_uid > $r_uid)\n". + "E: $uri strangely, UIDVALIDLITY matches ($l_uidval)\n"; } return if $l_uid >= $r_uid; # nothing to do - warn "I: $url fetching UID $l_uid:$r_uid\n"; + warn "I: $uri fetching UID $l_uid:$r_uid\n"; $mic->Uid(1); # the default, we hope my $bs = $self->{imap_opt}->{$sec}->{batch_size} // 1; my $req = $mic->imap4rev1 ? 'BODY.PEEK[]' : 'RFC822.PEEK'; @@ -355,7 +353,7 @@ sub imap_fetch_all ($$$) { local $SIG{__WARN__} = sub { my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : ''; $batch //= '?'; - $warn_cb->("$pfx$url UID:$batch\n", @_); + $warn_cb->("$pfx$uri UID:$batch\n", @_); }; my $err; do { @@ -363,7 +361,7 @@ sub imap_fetch_all ($$$) { # 1) servers do not need to return results in any order # 2) Mail::IMAPClient doesn't offer a streaming API $uids = $mic->search("UID $l_uid:*") or - return "E: $url UID SEARCH $l_uid:* error: $!"; + return "E: $uri UID SEARCH $l_uid:* error: $!"; return if scalar(@$uids) == 0; # RFC 3501 doesn't seem to indicate order of UID SEARCH @@ -389,7 +387,7 @@ sub imap_fetch_all ($$$) { local $0 = "UID:$batch $mbx $sec"; my $r = $mic->fetch_hash($batch, $req, 'FLAGS'); unless ($r) { # network error? - $err = "E: $url UID FETCH $batch error: $!"; + $err = "E: $uri UID FETCH $batch error: $!"; last; } for my $uid (@batch) { @@ -397,7 +395,7 @@ sub imap_fetch_all ($$$) { my $per_uid = delete $r->{$uid} // next; my $raw = delete($per_uid->{$key}) // next; my $fl = $per_uid->{FLAGS} // ''; - imap_import_msg($self, $url, $uid, \$raw, $fl); + imap_import_msg($self, $uri, $uid, \$raw, $fl); $last_uid = $uid; last if $self->{quit}; } @@ -410,14 +408,14 @@ sub imap_fetch_all ($$$) { } sub imap_idle_once ($$$$) { - my ($self, $mic, $intvl, $url) = @_; + my ($self, $mic, $intvl, $uri) = @_; my $i = $intvl //= (29 * 60); my $end = now() + $intvl; - warn "I: $url idling for ${intvl}s\n"; + warn "I: $uri idling for ${intvl}s\n"; local $0 = "IDLE $0"; unless ($mic->idle) { return if $self->{quit}; - return "E: IDLE failed on $url: $!"; + return "E: IDLE failed on $uri: $!"; } $self->{idle_mic} = $mic; # for ->quit my @res; @@ -428,16 +426,15 @@ sub imap_idle_once ($$$$) { } delete $self->{idle_mic}; unless ($self->{quit}) { - $mic->IsConnected or return "E: IDLE disconnected on $url"; - $mic->done or return "E: IDLE DONE failed on $url: $!"; + $mic->IsConnected or return "E: IDLE disconnected on $uri"; + $mic->done or return "E: IDLE DONE failed on $uri: $!"; } undef; } # idles on a single URI sub watch_imap_idle_1 ($$$) { - my ($self, $url, $intvl) = @_; - my $uri = PublicInbox::URIimap->new($url); + my ($self, $uri, $intvl) = @_; my $sec = uri_section($uri); my $mic_arg = $self->{mic_arg}->{$sec} or die "BUG: no Mail::IMAPClient->new arg for $sec"; @@ -447,8 +444,8 @@ sub watch_imap_idle_1 ($$$) { $mic //= PublicInbox::IMAPClient->new(%$mic_arg); my $err; if ($mic && $mic->IsConnected) { - $err = imap_fetch_all($self, $mic, $url); - $err //= imap_idle_once($self, $mic, $intvl, $url); + $err = imap_fetch_all($self, $mic, $uri); + $err //= imap_idle_once($self, $mic, $intvl, $uri); } else { $err = "E: not connected: $!"; } @@ -477,21 +474,21 @@ sub watch_atfork_parent ($) { } sub imap_idle_requeue { # DS::add_timer callback - my ($self, $url_intvl) = @_; + my ($self, $uri_intvl) = @_; return if $self->{quit}; - push @{$self->{idle_todo}}, $url_intvl; + push @{$self->{idle_todo}}, $uri_intvl; event_step($self); } sub imap_idle_reap { # PublicInbox::DS::dwaitpid callback my ($self, $pid) = @_; - my $url_intvl = delete $self->{idle_pids}->{$pid} or + my $uri_intvl = delete $self->{idle_pids}->{$pid} or die "BUG: PID=$pid (unknown) reaped: \$?=$?\n"; - my ($url, $intvl) = @$url_intvl; + my ($uri, $intvl) = @$uri_intvl; return if $self->{quit}; - warn "W: PID=$pid on $url died: \$?=$?\n" if $?; - add_timer(60, \&imap_idle_requeue, $self, $url_intvl); + warn "W: PID=$pid on $uri died: \$?=$?\n" if $?; + add_timer(60, \&imap_idle_requeue, $self, $uri_intvl); } sub reap { # callback for EOFpipe @@ -505,8 +502,8 @@ sub reap { # callback for EOFpipe } sub imap_idle_fork ($$) { - my ($self, $url_intvl) = @_; - my ($url, $intvl) = @$url_intvl; + my ($self, $uri_intvl) = @_; + my ($uri, $intvl) = @$uri_intvl; pipe(my ($r, $w)) or die "pipe: $!"; my $seed = rand(0xffffffff); my $pid = fork // die "fork: $!"; @@ -515,11 +512,11 @@ sub imap_idle_fork ($$) { eval { Net::SSLeay::randomize() }; close $r; watch_atfork_child($self); - watch_imap_idle_1($self, $url, $intvl); + watch_imap_idle_1($self, $uri, $intvl); close $w; _exit(0); } - $self->{idle_pids}->{$pid} = $url_intvl; + $self->{idle_pids}->{$pid} = $uri_intvl; PublicInbox::EOFpipe->new($r, \&reap, [$pid, \&imap_idle_reap, $self]); } @@ -530,8 +527,8 @@ sub event_step { if ($idle_todo && @$idle_todo) { my $oldset = watch_atfork_parent($self); eval { - while (my $url_intvl = shift(@$idle_todo)) { - imap_idle_fork($self, $url_intvl); + while (my $uri_intvl = shift(@$idle_todo)) { + imap_idle_fork($self, $uri_intvl); } }; PublicInbox::DS::sig_setmask($oldset); @@ -541,30 +538,28 @@ sub event_step { } sub watch_imap_fetch_all ($$) { - my ($self, $urls) = @_; - for my $url (@$urls) { - my $uri = PublicInbox::URIimap->new($url); + my ($self, $uris) = @_; + for my $uri (@$uris) { my $sec = uri_section($uri); my $mic_arg = $self->{mic_arg}->{$sec} or die "BUG: no Mail::IMAPClient->new arg for $sec"; my $mic = PublicInbox::IMAPClient->new(%$mic_arg) or next; - my $err = imap_fetch_all($self, $mic, $url); + my $err = imap_fetch_all($self, $mic, $uri); last if $self->{quit}; warn $err, "\n" if $err; } } sub watch_nntp_fetch_all ($$) { - my ($self, $urls) = @_; - for my $url (@$urls) { - my $uri = uri_new($url); + my ($self, $uris) = @_; + for my $uri (@$uris) { my $sec = uri_section($uri); my $nn_arg = $self->{nn_arg}->{$sec} or die "BUG: no Net::NNTP->new arg for $sec"; my $nntp_opt = $self->{nntp_opt}->{$sec}; - my $nn = nn_new($nn_arg, $nntp_opt, $url); + my $nn = nn_new($nn_arg, $nntp_opt, $uri); unless ($nn) { - warn "E: $url: \$!=$!\n"; + warn "E: $uri: \$!=$!\n"; next; } last if $self->{quit}; @@ -572,21 +567,21 @@ sub watch_nntp_fetch_all ($$) { for my $m_arg (@$postconn) { my ($method, @args) = @$m_arg; $nn->$method(@args) and next; - warn "E: <$url> $method failed\n"; + warn "E: <$uri> $method failed\n"; $nn = undef; last; } } last if $self->{quit}; if ($nn) { - my $err = nntp_fetch_all($self, $nn, $url); + my $err = nntp_fetch_all($self, $nn, $uri); warn $err, "\n" if $err; } } } sub poll_fetch_fork { # DS::add_timer callback - my ($self, $intvl, $urls) = @_; + my ($self, $intvl, $uris) = @_; return if $self->{quit}; pipe(my ($r, $w)) or die "pipe: $!"; my $oldset = watch_atfork_parent($self); @@ -597,47 +592,46 @@ sub poll_fetch_fork { # DS::add_timer callback eval { Net::SSLeay::randomize() }; close $r; watch_atfork_child($self); - if ($urls->[0] =~ m!\Aimaps?://!i) { - watch_imap_fetch_all($self, $urls); + if ($uris->[0]->scheme =~ m!\Aimaps?!i) { + watch_imap_fetch_all($self, $uris); } else { - watch_nntp_fetch_all($self, $urls); + watch_nntp_fetch_all($self, $uris); } close $w; _exit(0); } PublicInbox::DS::sig_setmask($oldset); die "fork: $!" unless defined $pid; - $self->{poll_pids}->{$pid} = [ $intvl, $urls ]; + $self->{poll_pids}->{$pid} = [ $intvl, $uris ]; PublicInbox::EOFpipe->new($r, \&reap, [$pid, \&poll_fetch_reap, $self]); } sub poll_fetch_reap { my ($self, $pid) = @_; - my $intvl_urls = delete $self->{poll_pids}->{$pid} or + my $intvl_uris = delete $self->{poll_pids}->{$pid} or die "BUG: PID=$pid (unknown) reaped: \$?=$?\n"; return if $self->{quit}; - my ($intvl, $urls) = @$intvl_urls; + my ($intvl, $uris) = @$intvl_uris; if ($?) { - warn "W: PID=$pid died: \$?=$?\n", map { "$_\n" } @$urls; + warn "W: PID=$pid died: \$?=$?\n", map { "$_\n" } @$uris; } - warn("I: will check $_ in ${intvl}s\n") for @$urls; - add_timer($intvl, \&poll_fetch_fork, $self, $intvl, $urls); + warn("I: will check $_ in ${intvl}s\n") for @$uris; + add_timer($intvl, \&poll_fetch_fork, $self, $intvl, $uris); } sub watch_imap_init ($$) { my ($self, $poll) = @_; my $mics = imap_common_init($self); # read args from config - my $idle = []; # [ [ url1, intvl1 ], [url2, intvl2] ] - for my $url (keys %{$self->{imap}}) { - my $uri = PublicInbox::URIimap->new($url); + my $idle = []; # [ [ uri1, intvl1 ], [uri2, intvl2] ] + for my $uri (@{$self->{imap_order}}) { my $sec = uri_section($uri); my $mic = $mics->{$sec}; my $intvl = $self->{imap_opt}->{$sec}->{pollInterval}; if ($mic->has_capability('IDLE') && !$intvl) { $intvl = $self->{imap_opt}->{$sec}->{idleInterval}; - push @$idle, [ $url, $intvl // () ]; + push @$idle, [ $uri, $intvl // () ]; } else { - push @{$poll->{$intvl || 120}}, $url; + push @{$poll->{$intvl || 120}}, $uri; } } if (scalar @$idle) { @@ -646,38 +640,8 @@ sub watch_imap_init ($$) { } } -# flesh out common NNTP-specific data structures -sub nntp_common_init ($) { - my ($self) = @_; - my $cfg = $self->{pi_cfg}; - my $nn_args = {}; # scheme://authority => Net::NNTP->new arg - for my $url (@{$self->{nntp_order}}) { - my $sec = uri_section(uri_new($url)); - - # Debug and Timeout are passed to Net::NNTP->new - my $v = cfg_bool($cfg, 'nntp.Debug', $url); - $nn_args->{$sec}->{Debug} = $v if defined $v; - my $to = cfg_intvl($cfg, 'nntp.Timeout', $url); - $nn_args->{$sec}->{Timeout} = $to if $to; - - # Net::NNTP post-connect commands - for my $k (qw(starttls compress)) { - $v = cfg_bool($cfg, "nntp.$k", $url) // next; - $self->{nntp_opt}->{$sec}->{$k} = $v; - } - - # internal option - for my $k (qw(pollInterval)) { - $to = cfg_intvl($cfg, "nntp.$k", $url) // next; - $self->{nntp_opt}->{$sec}->{$k} = $to; - } - } - $nn_args; -} - sub nntp_fetch_all ($$$) { - my ($self, $nn, $url) = @_; - my $uri = uri_new($url); + my ($self, $nn, $uri) = @_; my ($group, $num_a, $num_b) = $uri->group; my $sec = uri_section($uri); my ($nr, $beg, $end) = $nn->group($group); @@ -689,7 +653,7 @@ sub nntp_fetch_all ($$$) { # IMAPTracker is also used for tracking NNTP, UID == article number # LIST.ACTIVE can get the equivalent of UIDVALIDITY, but that's # expensive. So we assume newsgroups don't change: - my $itrk = PublicInbox::IMAPTracker->new($url); + my $itrk = PublicInbox::IMAPTracker->new($$uri); my (undef, $l_art) = $itrk->get_last; $l_art //= $beg; # initial import @@ -702,14 +666,14 @@ sub nntp_fetch_all ($$$) { return if $l_art >= $end; # nothing to do $beg = $l_art + 1; - warn "I: $url fetching ARTICLE $beg..$end\n"; + warn "I: $uri fetching ARTICLE $beg..$end\n"; my $warn_cb = $SIG{__WARN__} || \&CORE::warn; my ($err, $art); local $SIG{__WARN__} = sub { my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : ''; - $warn_cb->("$pfx$url ", $art ? ("ARTICLE $art") : (), "\n", @_); + $warn_cb->("$pfx$uri ", $art ? ("ARTICLE $art") : (), "\n", @_); }; - my $inboxes = $self->{nntp}->{$url}; + my $inboxes = $self->{nntp}->{$$uri}; my $last_art; my $n = $self->{max_batch}; for ($beg..$end) { @@ -741,7 +705,7 @@ sub nntp_fetch_all ($$$) { } elsif ($inboxes eq 'watchspam') { my $eml = PublicInbox::Eml->new(\$raw); $self->{pi_cfg}->each_inbox(\&remove_eml_i, - $self, $eml, "$url ARTICLE $art"); + $self, $eml, "$uri ARTICLE $art"); } else { die "BUG: destination unknown $inboxes"; } @@ -754,23 +718,11 @@ sub nntp_fetch_all ($$$) { sub watch_nntp_init ($$) { my ($self, $poll) = @_; - eval { require Net::NNTP } or - die "Net::NNTP is required for NNTP:\n$@\n"; - eval { require PublicInbox::IMAPTracker } or - die "DBD::SQLite is required for NNTP\n:$@\n"; - - my $nn_args = nntp_common_init($self); # read args from config - - # make sure we can connect and cache the credentials in memory - $self->{nn_arg} = {}; # schema://authority => Net::NNTP->new args - for my $url (@{$self->{nntp_order}}) { - nn_for($self, $url, $nn_args); - } - for my $url (@{$self->{nntp_order}}) { - my $uri = uri_new($url); + nntp_common_init($self); # read args from config + for my $uri (@{$self->{nntp_order}}) { my $sec = uri_section($uri); my $intvl = $self->{nntp_opt}->{$sec}->{pollInterval}; - push @{$poll->{$intvl || 120}}, $url; + push @{$poll->{$intvl || 120}}, $uri; } } @@ -778,12 +730,12 @@ sub watch { # main entry point my ($self, $sig, $oldset) = @_; $self->{oldset} = $oldset; $self->{sig} = $sig; - my $poll = {}; # intvl_seconds => [ url1, url2 ] + my $poll = {}; # intvl_seconds => [ uri1, uri2 ] watch_imap_init($self, $poll) if $self->{imap}; watch_nntp_init($self, $poll) if $self->{nntp}; - while (my ($intvl, $urls) = each %$poll) { - # poll all URLs for a given interval sequentially - add_timer(0, \&poll_fetch_fork, $self, $intvl, $urls); + while (my ($intvl, $uris) = each %$poll) { + # poll all URIs for a given interval sequentially + add_timer(0, \&poll_fetch_fork, $self, $intvl, $uris); } watch_fs_init($self) if $self->{mdre}; PublicInbox::DS->SetPostLoopCallback(sub { !$self->quit_done }); diff --git a/t/lei-convert.t b/t/lei-convert.t index 29f8ba75..2ba62db3 100644 --- a/t/lei-convert.t +++ b/t/lei-convert.t @@ -6,26 +6,43 @@ use PublicInbox::MboxReader; use PublicInbox::MdirReader; use PublicInbox::NetReader; require_git 2.6; -require_mods(qw(DBD::SQLite Search::Xapian Mail::IMAPClient)); +require_mods(qw(DBD::SQLite Search::Xapian Mail::IMAPClient Net::NNTP)); my ($tmpdir, $for_destroy) = tmpdir; my $sock = tcp_server; -my $cmd = [ '-imapd', '-W0', "--stdout=$tmpdir/1", "--stderr=$tmpdir/2" ]; +my $cmd = [ '-imapd', '-W0', "--stdout=$tmpdir/i1", "--stderr=$tmpdir/i2" ]; my ($ro_home, $cfg_path) = setup_public_inboxes; my $env = { PI_CONFIG => $cfg_path }; -my $td = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT("-imapd: $?"); -my $host_port = tcp_host_port($sock); +my $tdi = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT("-imapd: $?"); +my $imap_host_port = tcp_host_port($sock); +$sock = tcp_server; +$cmd = [ '-nntpd', '-W0', "--stdout=$tmpdir/n1", "--stderr=$tmpdir/n2" ]; +my $tdn = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT("-nntpd: $?"); +my $nntp_host_port = tcp_host_port($sock); undef $sock; + test_lei({ tmpdir => $tmpdir }, sub { my $d = $ENV{HOME}; - my $dig = Digest::SHA->new(256); lei_ok('convert', '-o', "mboxrd:$d/foo.mboxrd", - "imap://$host_port/t.v2.0"); - ok(-f "$d/foo.mboxrd", 'mboxrd created'); + "imap://$imap_host_port/t.v2.0"); + ok(-f "$d/foo.mboxrd", 'mboxrd created from imap://'); + + lei_ok('convert', '-o', "mboxrd:$d/nntp.mboxrd", + "nntp://$nntp_host_port/t.v2"); + ok(-f "$d/nntp.mboxrd", 'mboxrd created from nntp://'); + my (@mboxrd, @mboxcl2); open my $fh, '<', "$d/foo.mboxrd" or BAIL_OUT $!; PublicInbox::MboxReader->mboxrd($fh, sub { push @mboxrd, shift }); ok(scalar(@mboxrd) > 1, 'got multiple messages'); + open $fh, '<', "$d/nntp.mboxrd" or BAIL_OUT $!; + my $i = 0; + PublicInbox::MboxReader->mboxrd($fh, sub { + my ($eml) = @_; + is($eml->body, $mboxrd[$i]->body, "body matches #$i"); + $i++; + }); + lei_ok('convert', '-o', "mboxcl2:$d/cl2", "mboxrd:$d/foo.mboxrd"); ok(-s "$d/cl2", 'mboxcl2 non-empty') or diag $lei_err; open $fh, '<', "$d/cl2" or BAIL_OUT $!; diff --git a/t/lei-import-nntp.t b/t/lei-import-nntp.t new file mode 100644 index 00000000..3fb78fbc --- /dev/null +++ b/t/lei-import-nntp.t @@ -0,0 +1,30 @@ +#!perl -w +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ +use strict; use v5.10.1; use PublicInbox::TestCommon; +require_git 2.6; +require_mods(qw(json DBD::SQLite Search::Xapian Net::NNTP)); +my ($ro_home, $cfg_path) = setup_public_inboxes; +my ($tmpdir, $for_destroy) = tmpdir; +my $sock = tcp_server; +my $cmd = [ '-nntpd', '-W0', "--stdout=$tmpdir/1", "--stderr=$tmpdir/2" ]; +my $env = { PI_CONFIG => $cfg_path }; +my $td = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT("-nntpd $?"); +my $host_port = tcp_host_port($sock); +undef $sock; +test_lei({ tmpdir => $tmpdir }, sub { + lei_ok(qw(q bytes:1..)); + my $out = json_utf8->decode($lei_out); + is_deeply($out, [ undef ], 'nothing imported, yet'); + lei_ok('import', "nntp://$host_port/t.v2"); + diag $lei_err; + lei_ok(qw(q bytes:1..)); + diag $lei_err; + $out = json_utf8->decode($lei_out); + ok(scalar(@$out) > 1, 'got imported messages'); + is(pop @$out, undef, 'trailing JSON null element was null'); + my %r; + for (@$out) { $r{ref($_)}++ } + is_deeply(\%r, { 'HASH' => scalar(@$out) }, 'all hashes'); +}); +done_testing; diff --git a/t/watch_nntp.t b/t/watch_nntp.t deleted file mode 100644 index c0ad3098..00000000 --- a/t/watch_nntp.t +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (C) 2020-2021 all contributors -# License: AGPL-3.0+ -use strict; -use Test::More; -use PublicInbox::Config; -# see t/nntpd*.t for tests against a live NNTP server - -use_ok 'PublicInbox::Watch'; -my $nntp_url = \&PublicInbox::Watch::nntp_url; -is('news://example.com/inbox.foo', - $nntp_url->('NEWS://examplE.com/inbox.foo'), 'lowercased'); -is('nntps://example.com/inbox.foo', - $nntp_url->('nntps://example.com/inbox.foo'), 'nntps:// accepted'); -is('nntps://example.com/inbox.foo', - $nntp_url->('SNEWS://example.com/inbox.foo'), 'snews => nntps'); - -done_testing;