* [PATCH 1/3] mda: use InboxWritable
2018-07-29 9:34 [PATCH 0/3] mda: v2: ensure message bodies are indexed Eric Wong
@ 2018-07-29 9:34 ` Eric Wong
2018-07-29 9:34 ` [PATCH 2/3] t/v2mda: make it easy to test v1 repos here, too Eric Wong
2018-07-29 9:34 ` [PATCH 3/3] mda: v2: ensure message bodies are indexed Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2018-07-29 9:34 UTC (permalink / raw)
To: meta
It's a convenient wrapper nowadays, so get rid of some legacy
code and minimize differences from the -watch code.
---
lib/PublicInbox/InboxWritable.pm | 1 +
script/public-inbox-mda | 37 +++++++++-----------------------
2 files changed, 11 insertions(+), 27 deletions(-)
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 9b0cdfd..aa62132 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -39,6 +39,7 @@ sub importer {
my $addr = $self->{-primary_address};
PublicInbox::Import->new($git, $name, $addr, $self);
} else {
+ $! = 78; # EX_CONFIG 5.3.5 local configuration error
die "unsupported inbox version: $v\n";
}
}
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index 1f1252a..2a31537 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -18,11 +18,10 @@ use Email::Simple;
use PublicInbox::MIME;
use PublicInbox::MDA;
use PublicInbox::Config;
-use PublicInbox::Import;
-use PublicInbox::Git;
use PublicInbox::Emergency;
use PublicInbox::Filter::Base;
use PublicInbox::Spamcheck::Spamc;
+use PublicInbox::InboxWritable;
# n.b: hopefully we can setup the emergency path without bailing due to
# user error, we really want to setup the emergency destination ASAP
@@ -39,7 +38,8 @@ my $recipient = $ENV{ORIGINAL_RECIPIENT};
defined $recipient or die "ORIGINAL_RECIPIENT not defined in ENV\n";
my $dst = $config->lookup($recipient); # first check
defined $dst or do_exit(67); # EX_NOUSER 5.1.1 user unknown
-my $main_repo = $dst->{mainrepo} or do_exit(67);
+$dst->{mainrepo} or do_exit(67);
+$dst = PublicInbox::InboxWritable->new($dst);
# pre-check, MDA has stricter rules than an importer might;
do_exit(0) unless PublicInbox::MDA->precheck($simple, $dst->{address});
@@ -55,18 +55,13 @@ $str = '';
do_exit(0) unless $spam_ok;
my $fcfg = $dst->{filter} || '';
-my $filter;
-if ($fcfg =~ /::/) {
- eval "require $fcfg";
- die $@ if $@;
- $filter = $fcfg->new;
-} elsif ($fcfg eq 'scrub') { # TODO:
- require PublicInbox::Filter::Mirror;
- $filter = PublicInbox::Filter::Mirror->new;
-} else {
- $filter = PublicInbox::Filter::Base->new;
+# -mda defaults to the strict base filter
+if ($fcfg eq '') {
+ $dst->{filter} = 'PublicInbox::Filter::Base';
+} elsif ($fcfg eq 'scrub') { # legacy alias, undocumented, remove?
+ $dst->{filter} = 'PublicInbox::Filter::Mirror';
}
-
+my $filter = $dst->filter;
my $ret = $filter->delivery($mime);
if (ref($ret) && $ret->isa('Email::MIME')) { # filter altered message
$mime = $ret;
@@ -78,19 +73,7 @@ if (ref($ret) && $ret->isa('Email::MIME')) { # filter altered message
} # else { accept
PublicInbox::MDA->set_list_headers($mime, $dst);
-my $v = $dst->{version} || 1;
-my $im;
-if ($v == 2) {
- require PublicInbox::V2Writable;
- $im = PublicInbox::V2Writable->new($dst);
- $im->{parallel} = 0; # pointless to be parallel for a single message
-} elsif ($v == 1) {
- my $git = $dst->git;
- $im = PublicInbox::Import->new($git, $dst->{name}, $recipient, $dst);
-} else {
- $! = 78; # EX_CONFIG 5.3.5 local configuration error
- die "Unsupported inbox version: $v\n";
-}
+my $im = $dst->importer(0);
if (defined $im->add($mime)) {
$emm = $emm->abort;
} else {
--
EW
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] t/v2mda: make it easy to test v1 repos here, too
2018-07-29 9:34 [PATCH 0/3] mda: v2: ensure message bodies are indexed Eric Wong
2018-07-29 9:34 ` [PATCH 1/3] mda: use InboxWritable Eric Wong
@ 2018-07-29 9:34 ` Eric Wong
2018-07-29 9:34 ` [PATCH 3/3] mda: v2: ensure message bodies are indexed Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2018-07-29 9:34 UTC (permalink / raw)
To: meta
It will help track down a bug which only seems to happen in v2 repos.
---
t/v2mda.t | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/t/v2mda.t b/t/v2mda.t
index f386289..7df3a43 100644
--- a/t/v2mda.t
+++ b/t/v2mda.t
@@ -8,6 +8,7 @@ use File::Temp qw/tempdir/;
use Fcntl qw(SEEK_SET);
use Cwd;
+my $V = 2;
foreach my $mod (qw(DBD::SQLite Search::Xapian)) {
eval "require $mod";
plan skip_all => "$mod missing for v2mda.t" if $@;
@@ -38,7 +39,8 @@ local $ENV{PI_DIR} = "$tmpdir/foo";
local $ENV{PATH} = "$main_bin:blib/script:$ENV{PATH}";
local $ENV{PI_EMERGENCY} = "$tmpdir/fail";
ok(mkdir "$tmpdir/fail");
-my @cmd = (qw(public-inbox-init -V2), $ibx->{name},
+
+my @cmd = (qw(public-inbox-init), "-V$V", $ibx->{name},
$ibx->{mainrepo}, 'http://localhost/test',
$ibx->{address}->[0]);
ok(PublicInbox::Import::run_die(\@cmd), 'initialized v2 inbox');
@@ -54,6 +56,11 @@ ok(PublicInbox::Import::run_die(['public-inbox-mda'], undef, $rdr),
'mda delivered a message');
$ibx = PublicInbox::Inbox->new($ibx);
+
+if ($V == 1) {
+ my $cmd = [ 'public-inbox-index', "$tmpdir/inbox" ];
+ ok(PublicInbox::Import::run_die($cmd, undef, $rdr), 'v1 indexed');
+}
my $msgs = $ibx->search->query('');
my $saved = $ibx->smsg_mime($msgs->[0]);
is($saved->{mime}->as_string, $mime->as_string, 'injected message');
--
EW
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] mda: v2: ensure message bodies are indexed
2018-07-29 9:34 [PATCH 0/3] mda: v2: ensure message bodies are indexed Eric Wong
2018-07-29 9:34 ` [PATCH 1/3] mda: use InboxWritable Eric Wong
2018-07-29 9:34 ` [PATCH 2/3] t/v2mda: make it easy to test v1 repos here, too Eric Wong
@ 2018-07-29 9:34 ` Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2018-07-29 9:34 UTC (permalink / raw)
To: meta
We must not clobber the original message string, as Email::MIME(*)
still needs it for iterating through parts in SearchIdx (but not
when handing it as a raw string to git-fast-import).
I've noticed message bodies (especially dfpre/dpost) were not
getting indexed when going through -mda (no problems with
-watch). This also did not affect v1 repos, since indexing is a
separate process for v1 and requires re-reading the data from
git.
(*) tested Email::MIME 1.937 on Debian stretch
---
MANIFEST | 1 +
script/public-inbox-mda | 1 -
t/data/0001.patch | 46 +++++++++++++++++++++++++++++++++++++++++
t/v2mda.t | 10 +++++++++
t/watch_maildir_v2.t | 15 ++++++++++++++
5 files changed, 72 insertions(+), 1 deletion(-)
create mode 100644 t/data/0001.patch
diff --git a/MANIFEST b/MANIFEST
index fd74a43..003c3c5 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -146,6 +146,7 @@ t/config.t
t/config_limiter.t
t/content_id.t
t/convert-compact.t
+t/data/0001.patch
t/emergency.t
t/fail-bin/spamc
t/feed.t
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index 2a31537..2b7f298 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -51,7 +51,6 @@ $emm = PublicInbox::Emergency->new($emergency);
$emm->prepare(\$str);
$ems = $ems->abort;
my $mime = PublicInbox::MIME->new(\$str);
-$str = '';
do_exit(0) unless $spam_ok;
my $fcfg = $dst->{filter} || '';
diff --git a/t/data/0001.patch b/t/data/0001.patch
new file mode 100644
index 0000000..b7964a2
--- /dev/null
+++ b/t/data/0001.patch
@@ -0,0 +1,46 @@
+From: Eric Wong <e@80x24.org>
+Date: Fri, 20 Jul 2018 07:21:41 +0000
+To: test@example.com
+Subject: [PATCH] search: use boolean prefix for filenames in diffs, too
+Message-ID: <20180720072141.GA15957@example>
+
+Filenames within a project tend to be reasonably stable within a
+project and I plan on having automated searches hit these.
+
+Also, using no term prefix at all (the default for searching)
+still allows probabilistic searches on everything that's in a
+"git diff", including the blob names which were just made
+boolean.
+
+Note, attachment filenames ("n:" prefix) will stil use
+probabilistic search, as they're hardly standardized.
+---
+ lib/PublicInbox/Search.pm | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
+index 090d998b6c2c..6e006fd73b1d 100644
+--- a/lib/PublicInbox/Search.pm
++++ b/lib/PublicInbox/Search.pm
+@@ -53,6 +53,9 @@ my %bool_pfx_external = (
+ dfpre => 'XDFPRE',
+ dfpost => 'XDFPOST',
+ dfblob => 'XDFPRE XDFPOST',
++ dfn => 'XDFN',
++ dfa => 'XDFA',
++ dfb => 'XDFB',
+ );
+
+ my $non_quoted_body = 'XNQ XDFN XDFA XDFB XDFHH XDFCTX XDFPRE XDFPOST';
+@@ -72,9 +75,6 @@ my %prob_prefix = (
+
+ q => 'XQUOT',
+ nq => $non_quoted_body,
+- dfn => 'XDFN',
+- dfa => 'XDFA',
+- dfb => 'XDFB',
+ dfhh => 'XDFHH',
+ dfctx => 'XDFCTX',
+
+--
+^_^
diff --git a/t/v2mda.t b/t/v2mda.t
index 7df3a43..6145720 100644
--- a/t/v2mda.t
+++ b/t/v2mda.t
@@ -65,4 +65,14 @@ my $msgs = $ibx->search->query('');
my $saved = $ibx->smsg_mime($msgs->[0]);
is($saved->{mime}->as_string, $mime->as_string, 'injected message');
+my $patch = 't/data/0001.patch';
+open my $fh, '<', $patch or die "failed to open $patch: $!\n";
+$rdr = { 0 => fileno($fh) };
+ok(PublicInbox::Import::run_die(['public-inbox-mda'], undef, $rdr),
+ 'mda delivered a patch');
+my $post = $ibx->search->reopen->query('dfpost:6e006fd7');
+is(scalar(@$post), 1, 'got one result for dfpost');
+my $pre = $ibx->search->query('dfpre:090d998');
+is(scalar(@$pre), 1, 'got one result for dfpre');
+is($post->[0]->{blob}, $pre->[0]->{blob}, 'same message in both cases');
done_testing();
diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t
index a76e413..fc002dc 100644
--- a/t/watch_maildir_v2.t
+++ b/t/watch_maildir_v2.t
@@ -120,6 +120,21 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n);
is($nr, 1, 'inbox has one mail after spamc OK-ed a message');
my $mref = $ibx->msg_by_smsg($msgs->[0]);
like($$mref, qr/something\n\z/s, 'message scrubbed on import');
+ delete $config->{'publicinboxwatch.spamcheck'};
+}
+
+{
+ my $patch = 't/data/0001.patch';
+ open my $fh, '<', $patch or die "failed to open $patch: $!\n";
+ $msg = eval { local $/; <$fh> };
+ PublicInbox::Emergency->new($maildir)->prepare(\$msg);
+ PublicInbox::WatchMaildir->new($config)->scan('full');
+ ($nr, $msgs) = $srch->reopen->query('dfpost:6e006fd7');
+ is($nr, 1, 'diff postimage found');
+ my $post = $msgs->[0];
+ ($nr, $msgs) = $srch->query('dfpre:090d998b6c2c');
+ is($nr, 1, 'diff preimage found');
+ is($post->{blob}, $msgs->[0]->{blob}, 'same message');
}
done_testing;
--
EW
^ permalink raw reply related [flat|nested] 4+ messages in thread