* [PATCH 1/4] dead code cleanup
@ 2015-08-20 10:20 Eric Wong
2015-08-20 10:20 ` [PATCH 2/4] view: simplify message threading dumpers Eric Wong
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Eric Wong @ 2015-08-20 10:20 UTC (permalink / raw)
To: meta
We may not be using subject_path after all.
---
lib/PublicInbox/Thread.pm | 7 -------
lib/PublicInbox/View.pm | 46 +++++++++++++---------------------------------
lib/PublicInbox/WWW.pm | 18 ------------------
3 files changed, 13 insertions(+), 58 deletions(-)
diff --git a/lib/PublicInbox/Thread.pm b/lib/PublicInbox/Thread.pm
index 92404fa..a3dedf5 100644
--- a/lib/PublicInbox/Thread.pm
+++ b/lib/PublicInbox/Thread.pm
@@ -24,13 +24,6 @@ sub sort_ts {
} @_;
}
-sub rsort_ts {
- sort {
- (eval { $b->topmost->message->header('X-PI-TS') } || 0) <=>
- (eval { $a->topmost->message->header('X-PI-TS') } || 0)
- } @_;
-}
-
package PublicInbox::Thread::Container;
use strict;
use warnings;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 17af21f..e29922e 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -153,10 +153,7 @@ sub thread_html {
my $msgs = load_results($res);
my $nr = scalar @$msgs;
return $rv if $nr == 0;
- require PublicInbox::Thread;
- my $th = PublicInbox::Thread->new(@$msgs);
- $th->thread;
- $th->order(*PublicInbox::Thread::sort_ts);
+ my $th = thread_results($msgs);
my $state = [ $srch, { root_anchor => anchor_for($mid) }, undef, 0 ];
{
require PublicInbox::GitCatFile;
@@ -176,30 +173,6 @@ sub thread_html {
$rv .= "<hr />" . PRE_WRAP . $next . $foot . "</pre>";
}
-sub subject_path_html {
- my (undef, $ctx, $foot, $srch) = @_;
- my $path = $ctx->{subject_path};
- my $res = $srch->get_subject_path($path);
- my $rv = '';
- my $msgs = load_results($res);
- my $nr = scalar @$msgs;
- return $rv if $nr == 0;
- require PublicInbox::Thread;
- my $th = PublicInbox::Thread->new(@$msgs);
- $th->thread;
- $th->order(*PublicInbox::Thread::sort_ts);
- my $state = [ $srch, { root_anchor => 'dummy' }, undef, 0 ];
- {
- require PublicInbox::GitCatFile;
- my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
- thread_entry(\$rv, $git, $state, $_, 0) for $th->rootset;
- }
- my $final_anchor = $state->[3];
- my $next = "<a\nid=\"s$final_anchor\">end of thread</a>\n";
-
- $rv .= "<hr />" . PRE_WRAP . $next . $foot . "</pre>";
-}
-
# only private functions below.
sub index_walk {
@@ -560,12 +533,10 @@ sub simple_dump {
sub thread_followups {
my ($dst, $root, $res) = @_;
- my $msgs = load_results($res);
- require PublicInbox::Thread;
$root->header_set('X-PI-TS', '0');
- my $th = PublicInbox::Thread->new($root, @$msgs);
- $th->thread;
- $th->order(*PublicInbox::Thread::sort_ts);
+ my $msgs = load_results($res);
+ push @$msgs, $root;
+ my $th = thread_results($msgs);
my $srch = $res->{srch};
my $subj = $srch->subject_path($root->header('Subject'));
my %seen = ($subj => 1);
@@ -618,4 +589,13 @@ sub msg_timestamp {
defined($ts) ? $ts : 0;
}
+sub thread_results {
+ my ($msgs) = @_;
+ require PublicInbox::Thread;
+ my $th = PublicInbox::Thread->new(@$msgs);
+ $th->thread;
+ $th->order(*PublicInbox::Thread::sort_ts);
+ $th
+}
+
1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index e7c28d7..b0c1348 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -53,11 +53,6 @@ sub run {
} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.html\z!o) {
invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx, $cgi);
- # subject_path display
- } elsif ($path_info =~ m!$LISTNAME_RE/s/(\S+)\.html\z!o) {
- my $sp = $2;
- invalid_list(\%ctx, $1) || get_subject_path(\%ctx, $cgi, $sp);
-
} elsif ($path_info =~ m!$LISTNAME_RE/f/\S+\.txt\z!o) {
invalid_list_mid(\%ctx, $1, $2) ||
redirect_mid_txt(\%ctx, $cgi);
@@ -202,19 +197,6 @@ sub get_thread {
[ $body ] ];
}
-# /$LISTNAME/s/$SUBJECT_PATH.html
-sub get_subject_path {
- my ($ctx, $cgi, $sp) = @_;
- $ctx->{subject_path} = $sp;
- my $srch = searcher($ctx) or return need_search($ctx);
- require PublicInbox::View;
- my $foot = footer($ctx);
- my $body = PublicInbox::View->subject_path_html($ctx, $foot, $srch) or
- return r404();
- [ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
- [ $body ] ];
-}
-
sub self_url {
my ($cgi) = @_;
ref($cgi) eq 'CGI' ? $cgi->self_url : $cgi->uri->as_string;
--
EW
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/4] view: simplify message threading dumpers
2015-08-20 10:20 [PATCH 1/4] dead code cleanup Eric Wong
@ 2015-08-20 10:20 ` Eric Wong
2015-08-20 10:20 ` [PATCH 3/4] avoid using header_raw for Message-ID retrieval Eric Wong
2015-08-20 10:20 ` [PATCH 4/4] search: preserve References: order in document data Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2015-08-20 10:20 UTC (permalink / raw)
To: meta
---
lib/PublicInbox/View.pm | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index e29922e..fe701b1 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -498,6 +498,7 @@ sub anchor_for {
sub simple_dump {
my ($dst, $root, $node, $level) = @_;
+ return unless $node;
# $root = [ Root Message-ID, \%seen, $srch ];
if (my $x = $node->message) {
my $mid = $x->header('Message-ID');
@@ -527,8 +528,8 @@ sub simple_dump {
}
}
}
- simple_dump($dst, $root, $node->child, $level + 1) if $node->child;
- simple_dump($dst, $root, $node->next, $level) if $node->next;
+ simple_dump($dst, $root, $node->child, $level+1);
+ simple_dump($dst, $root, $node->next, $level);
}
sub thread_followups {
@@ -553,6 +554,7 @@ sub thread_html_head {
sub thread_entry {
my ($dst, $git, $state, $node, $level) = @_;
+ return unless $node;
# $state = [ $search_res, $seen, undef, 0 (msg_nr) ];
# $seen is overloaded with 3 types of fields:
# 1) "root_anchor" => anchor_for(Message-ID),
@@ -570,11 +572,8 @@ sub thread_entry {
$$dst .= index_entry(undef, $mime, $level, $state);
}
}
- my $cur;
- $cur = $node->child and
- thread_entry($dst, $git, $state, $cur, $level + 1);
- $cur = $node->next and
- thread_entry($dst, $git, $state, $cur, $level);
+ thread_entry($dst, $git, $state, $node->child, $level + 1);
+ thread_entry($dst, $git, $state, $node->next, $level);
}
sub load_results {
--
EW
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/4] avoid using header_raw for Message-ID retrieval
2015-08-20 10:20 [PATCH 1/4] dead code cleanup Eric Wong
2015-08-20 10:20 ` [PATCH 2/4] view: simplify message threading dumpers Eric Wong
@ 2015-08-20 10:20 ` Eric Wong
2015-08-20 10:20 ` [PATCH 4/4] search: preserve References: order in document data Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2015-08-20 10:20 UTC (permalink / raw)
To: meta
This is for consistency with ssoma. I doubt it makes
a difference in practice, but in case somebody decides
any of the Message-ID-containing headers should have
strange characters, we'll decode and attempt to thread
them. This isn't an attack vector, just a way to
make messages thread improperly which is pointless...
---
lib/PublicInbox/Feed.pm | 4 ++--
lib/PublicInbox/Search.pm | 8 ++++----
lib/PublicInbox/View.pm | 14 +++++++-------
3 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 2e352cb..bbf5061 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -254,7 +254,7 @@ sub add_to_feed {
my $fullurl = $feed_opts->{fullurl} || 'http://example.com/f/';
my $header_obj = $mime->header_obj;
- my $mid = $header_obj->header_raw('Message-ID');
+ my $mid = $header_obj->header('Message-ID');
defined $mid or return 0;
$mid = PublicInbox::Hval->new_msgid($mid);
my $href = $mid->as_href . '.html';
@@ -318,7 +318,7 @@ sub add_topic {
my $mime = do_cat_mail($git, $path) or return 0;
$header_obj = $mime->header_obj;
}
- my $mid = $header_obj->header_raw('Message-ID');
+ my $mid = $header_obj->header('Message-ID');
$mid = mid_compressed(mid_clean($mid));
$u = $enc_utf8->decode($u);
push @$order, [ $mid, $ts, $u, $subj ];
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index d9e5fd1..2c66e55 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -84,7 +84,7 @@ sub add_message {
my $db = $self->{xdb};
my $doc_id;
- my $mid_orig = mid_clean($mime->header_obj->header_raw('Message-ID'));
+ my $mid_orig = mid_clean($mime->header_obj->header('Message-ID'));
my $mid = mid_compressed($mid_orig);
my $was_ghost = 0;
my $ct_msg = $mime->header('Content-Type') || 'text/plain';
@@ -348,9 +348,9 @@ sub link_message_to_parents {
my $doc = $smsg->{doc};
my $mid = mid_compressed($smsg->mid);
my $mime = $smsg->mime;
- my $refs = $mime->header_obj->header_raw('References');
+ my $refs = $mime->header_obj->header('References');
my @refs = $refs ? ($refs =~ /<([^>]+)>/g) : ();
- my $irt = $mime->header_obj->header_raw('In-Reply-To');
+ my $irt = $mime->header_obj->header('In-Reply-To');
if ($irt) {
if ($irt =~ /<([^>]+)>/) {
$irt = $1;
@@ -529,7 +529,7 @@ sub index_blob {
sub unindex_blob {
my ($self, $git, $blob) = @_;
my $mime = do_cat_mail($git, $blob) or return;
- my $mid = $mime->header_obj->header_raw('Message-ID');
+ my $mid = $mime->header_obj->header('Message-ID');
eval { $self->remove_message($mid) } if defined $mid;
warn "W: unindex_blob $blob: $@\n" if $@;
}
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index fe701b1..fb000f2 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -57,7 +57,7 @@ sub index_entry {
my $subj = $mime->header('Subject');
my $header_obj = $mime->header_obj;
- my $mid_raw = $header_obj->header_raw('Message-ID');
+ my $mid_raw = $header_obj->header('Message-ID');
my $id = anchor_for($mid_raw);
$seen->{$id} = "#$id"; # save the anchor for later
@@ -73,7 +73,7 @@ sub index_entry {
my $more = 'permalink';
my $path = $root_anchor ? '../' : '';
my $href = $mid->as_href;
- my $irt = $header_obj->header_raw('In-Reply-To');
+ my $irt = $header_obj->header('In-Reply-To');
my ($anchor_idx, $anchor, $t_anchor);
if (defined $irt) {
$anchor_idx = anchor_for($irt);
@@ -361,7 +361,7 @@ sub headers_to_html_header {
my $rv = "";
my @title;
my $header_obj = $mime->header_obj;
- my $mid = $header_obj->header_raw('Message-ID');
+ my $mid = $header_obj->header('Message-ID');
$mid = PublicInbox::Hval->new_msgid($mid);
my $mid_href = $mid->as_href;
foreach my $h (qw(From To Cc Subject Date)) {
@@ -388,7 +388,7 @@ sub headers_to_html_header {
$mid_href = "../m/$mid_href" unless $full_pfx;
$rv .= "(<a\nhref=\"$mid_href.txt\">raw</a>)\n";
- my $irt = $header_obj->header_raw('In-Reply-To');
+ my $irt = $header_obj->header('In-Reply-To');
if (defined $irt) {
my $v = PublicInbox::Hval->new_msgid($irt);
my $html = $v->as_html;
@@ -397,7 +397,7 @@ sub headers_to_html_header {
$rv .= "<a\nhref=\"$href.html\">$html</a>>\n";
}
- my $refs = $header_obj->header_raw('References');
+ my $refs = $header_obj->header('References');
if ($refs) {
# avoid redundant URLs wasting bandwidth
my %seen;
@@ -441,7 +441,7 @@ sub html_footer {
my $subj = $mime->header('Subject') || '';
$subj = "Re: $subj" unless $subj =~ /\bRe:/;
- my $mid = $mime->header_obj->header_raw('Message-ID');
+ my $mid = $mime->header_obj->header('Message-ID');
my $irt = uri_escape_utf8($mid);
delete $cc{$to};
$to = uri_escape_utf8($to);
@@ -452,7 +452,7 @@ sub html_footer {
my $idx = $standalone ? " <a\nhref=\"../\">index</a>" : '';
if ($idx && $srch) {
- $irt = $mime->header_obj->header_raw('In-Reply-To') || '';
+ $irt = $mime->header_obj->header('In-Reply-To') || '';
$mid = mid_compressed(mid_clean($mid));
my $t_anchor = length $irt ? T_ANCHOR : '';
$idx = " <a\nhref=\"../t/$mid.html$t_anchor\">".
--
EW
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 4/4] search: preserve References: order in document data
2015-08-20 10:20 [PATCH 1/4] dead code cleanup Eric Wong
2015-08-20 10:20 ` [PATCH 2/4] view: simplify message threading dumpers Eric Wong
2015-08-20 10:20 ` [PATCH 3/4] avoid using header_raw for Message-ID retrieval Eric Wong
@ 2015-08-20 10:20 ` Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2015-08-20 10:20 UTC (permalink / raw)
To: meta
We need proper ordering of References to thread messages
correctly. We would lose this order if we load the terms
from the database, so set it directly document data.
Do not bother with a separate In-Reply-To, since Mail::Thread
just merges the IRT into References. This bumps our schema
version once again.
---
lib/PublicInbox/Search.pm | 23 +++++++++++------------
lib/PublicInbox/SearchMsg.pm | 39 +++++++++++++++++++--------------------
2 files changed, 30 insertions(+), 32 deletions(-)
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 2c66e55..f004050 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -23,7 +23,8 @@ use constant {
# 3 - message-ID is compressed if it includes '%' (hack!)
# 4 - change "Re: " normalization, avoid circular Reference ghosts
# 5 - subject_path drops trailing '.'
- SCHEMA_VERSION => 5,
+ # 6 - preserve References: order in document data
+ SCHEMA_VERSION => 6,
QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD,
};
@@ -49,9 +50,9 @@ my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix);
sub xpfx { $all_pfx{$_[0]} }
our %PFX2TERM_RMAP;
+my %meta_pfx = (mid => 1, thread => 1, path => 1, type => 1);
while (my ($k, $v) = each %all_pfx) {
- next if $prob_prefix{$k};
- $PFX2TERM_RMAP{$v} = $k;
+ $PFX2TERM_RMAP{$v} = $k if $meta_pfx{$k};
}
my $mail_query = Search::Xapian::Query->new(xpfx('type') . 'mail');
@@ -129,8 +130,6 @@ sub add_message {
my $ts = Search::Xapian::sortable_serialise($smsg->ts);
$doc->add_value(PublicInbox::Search::TS, $ts);
- $doc->set_data($smsg->to_doc_data);
-
my $tg = $self->term_generator;
$tg->set_document($doc);
@@ -176,9 +175,11 @@ sub add_message {
if ($was_ghost) {
$doc_id = $smsg->doc_id;
$self->link_message($smsg, 0);
+ $doc->set_data($smsg->to_doc_data);
$db->replace_document($doc_id, $doc);
} else {
$self->link_message($smsg, 0);
+ $doc->set_data($smsg->to_doc_data);
$doc_id = $db->add_document($doc);
}
};
@@ -352,14 +353,14 @@ sub link_message_to_parents {
my @refs = $refs ? ($refs =~ /<([^>]+)>/g) : ();
my $irt = $mime->header_obj->header('In-Reply-To');
if ($irt) {
- if ($irt =~ /<([^>]+)>/) {
- $irt = $1;
- }
+ $irt = mid_compressed(mid_clean($irt));
# maybe some crazies will try to make a circular reference:
if ($irt eq $mid) {
$irt = undef;
} else {
+ # last References should be $irt
+ # we will de-dupe later
push @refs, $irt;
}
}
@@ -376,12 +377,10 @@ sub link_message_to_parents {
$uniq{$ref} = 1;
push @refs, $ref;
}
- $irt = undef if (defined $irt && !$uniq{$irt});
}
if (@refs) {
- if (defined $irt) {
- $doc->add_term(xpfx('inreplyto') . $irt);
- }
+ $doc->add_term(xpfx('inreplyto') . $irt) if defined $irt;
+ $smsg->{references_sorted} = '<'.join('><', @refs).'>';
my $ref_pfx = xpfx('references');
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 14a62eb..03df7ab 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -31,13 +31,14 @@ sub load_doc {
my ($class, $doc) = @_;
my $data = $doc->get_data;
$data = $enc_utf8->decode($data);
- my ($mid, $subj, $from, $date) = split(/\n/, $data);
+ my ($mid, $subj, $from, $date, $refs) = split(/\n/, $data);
bless {
doc => $doc,
mid => $mid,
subject => $subj,
date => $date,
from_name => $from,
+ references_sorted => $refs,
}, $class;
}
@@ -78,17 +79,16 @@ sub ts {
my ($self) = @_;
my $ts = $self->{ts};
return $ts if $ts;
- $self->{date} = undef;
- $self->date;
- $self->{ts};
+ $self->{ts} = eval {
+ str2time($self->date || $self->mime->header('Date'))
+ } || 0;
}
sub date {
my ($self) = @_;
my $date = $self->{date};
return $date if $date;
- my $ts = eval { str2time($self->mime->header('Date')) } || 0;
- $self->{ts} = $ts;
+ my $ts = eval { str2time($self->mime->header('Date')) };
$self->{date} = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
}
@@ -98,7 +98,14 @@ sub to_doc_data {
$self->mid . "\n" .
$self->subject . "\n" .
$self->from_name . "\n".
- $self->date;
+ $self->date . "\n" .
+ $self->references_sorted;
+}
+
+sub references_sorted {
+ my ($self) = @_;
+ my $x = $self->{references_sorted};
+ defined $x ? $x : '';
}
sub ensure_metadata {
@@ -117,12 +124,7 @@ sub ensure_metadata {
if ($val =~ s/$PFX2TERM_RE//o) {
my $field = $PublicInbox::Search::PFX2TERM_RMAP{$1};
- if ($field eq 'references') {
- my $refs = $self->{references} ||= [];
- push @$refs, $val;
- } else {
- $self->{$field} = $val;
- }
+ $self->{$field} = $val;
}
}
}
@@ -138,14 +140,11 @@ sub mini_mime {
'X-PI-TS' => $self->ts,
'Message-ID' => "<$self->{mid}>",
);
- if (my $refs = $self->{references}) {
- push @h, References => '<' . join('> <', @$refs) . '>';
- }
- if (my $irt = $self->{inreplyto}) {
- push @h, 'In-Reply-To' => "<$irt>";
- }
- Email::MIME->create(header_str => \@h);
+ my $refs = $self->{references_sorted};
+ my $mime = Email::MIME->create(header_str => \@h);
+ $mime->header_set('References', $refs) if (defined $refs);
+ $mime;
}
sub mid {
--
EW
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2015-08-20 10:20 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-08-20 10:20 [PATCH 1/4] dead code cleanup Eric Wong
2015-08-20 10:20 ` [PATCH 2/4] view: simplify message threading dumpers Eric Wong
2015-08-20 10:20 ` [PATCH 3/4] avoid using header_raw for Message-ID retrieval Eric Wong
2015-08-20 10:20 ` [PATCH 4/4] search: preserve References: order in document data Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).