unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/9] search pushed and deployed to master
@ 2015-08-17  3:38 Eric Wong
  2015-08-17  3:38 ` [PATCH 1/9] feed: remove unnecesary time paramenter in index state Eric Wong
                   ` (8 more replies)
  0 siblings, 9 replies; 10+ messages in thread
From: Eric Wong @ 2015-08-17  3:38 UTC (permalink / raw)
  To: meta

This should appear shortly in: http://public-inbox.org/meta/

Eric Wong (9):
      feed: remove unnecesary time paramenter in index state
      favor /t/ to /s/, since subjects may change mid-thread
      WWW: eliminate "top" parameter for feeds
      www: simplify parameter passing to feed
      terminology: replies => followups
      search: use raw headers without MIME decoding
      feed: disable the generator statement
      drop bodies and messages ASAP after processing
      search: apply mid_compression to subject paths, too

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/9] feed: remove unnecesary time paramenter in index state
  2015-08-17  3:38 [PATCH 0/9] search pushed and deployed to master Eric Wong
@ 2015-08-17  3:38 ` Eric Wong
  2015-08-17  3:38 ` [PATCH 2/9] favor /t/ to /s/, since subjects may change mid-thread Eric Wong
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2015-08-17  3:38 UTC (permalink / raw)
  To: meta

We no longer do "smart" time displays as of
commit ea0e8649f90d1fd0850a41c0ca16642faadf4f14
("view: simplify timestamp generation").

In retrospect, that commit also made us more cache-friendly, too.
---
 lib/PublicInbox/Feed.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index b532559..1a89fba 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -76,7 +76,7 @@ sub generate_html_index {
 	$th->order(*PublicInbox::Thread::sort_ts);
 
 	# except we sort top-level messages reverse chronologically
-	my $state = [ time, {}, $first, 0 ];
+	my $state = [ undef, {}, $first, 0 ];
 	for (PublicInbox::Thread::rsort_ts($th->rootset)) {
 		dump_msg($_, 0, \$html, $state)
 	}
-- 
EW


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/9] favor /t/ to /s/, since subjects may change mid-thread
  2015-08-17  3:38 [PATCH 0/9] search pushed and deployed to master Eric Wong
  2015-08-17  3:38 ` [PATCH 1/9] feed: remove unnecesary time paramenter in index state Eric Wong
@ 2015-08-17  3:38 ` Eric Wong
  2015-08-17  3:38 ` [PATCH 3/9] WWW: eliminate "top" parameter for feeds Eric Wong
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2015-08-17  3:38 UTC (permalink / raw)
  To: meta

/t/ always falls back to subject path searching anyways,
so there's little lost besides perhaps more readable URLs.
Unfortunately people still use non-compliant mail clients which fail
to set In-Reply-To or References headers :<
---
 lib/PublicInbox/Feed.pm |  2 +-
 lib/PublicInbox/View.pm | 53 +++++++++++++++++++++++++++----------------------
 lib/PublicInbox/WWW.pm  |  2 ++
 3 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 1a89fba..5a41bea 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -76,7 +76,7 @@ sub generate_html_index {
 	$th->order(*PublicInbox::Thread::sort_ts);
 
 	# except we sort top-level messages reverse chronologically
-	my $state = [ undef, {}, $first, 0 ];
+	my $state = [ $args->{srch}, {}, $first, 0 ];
 	for (PublicInbox::Thread::rsort_ts($th->rootset)) {
 		dump_msg($_, 0, \$html, $state)
 	}
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 4708243..4960935 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -48,7 +48,7 @@ sub feed_entry {
 # state = [ time, seen = {}, first_commit, page_nr = 0 ]
 sub index_entry {
 	my (undef, $mime, $level, $state) = @_;
-	my (undef, $seen, $first_commit) = @$state;
+	my ($srch, $seen, $first_commit) = @$state;
 	my $midx = $state->[3]++;
 	my ($prev, $next) = ($midx - 1, $midx + 1);
 	my $rv = '';
@@ -75,10 +75,26 @@ sub index_entry {
 	my $more = 'permalink';
 	if ($root_anchor) {
 		$path = '../';
-		$subj = "<u\nid=\"u\">$subj</u>" if $root_anchor eq $id;
 	} else {
 		$path = '';
 	}
+	my $href = $mid->as_href;
+	my $irt = $header_obj->header_raw('In-Reply-To');
+	my ($anchor_idx, $anchor, $t_anchor);
+	if (defined $irt) {
+		$anchor_idx = anchor_for($irt);
+		$anchor = $seen->{$anchor_idx};
+		$t_anchor = T_ANCHOR;
+	} else {
+		$t_anchor = '';
+	}
+
+	if (defined $srch) {
+		$subj = "<a\nhref=\"${path}t/$href.html#u\">$subj</a>";
+	}
+	if ($root_anchor && $root_anchor eq $id) {
+		$subj = "<u\nid=\"u\">$subj</u>";
+	}
 
 	my $ts = $mime->header('X-PI-TS');
 	my $fmt = '%Y-%m-%d %H:%M UTC';
@@ -92,16 +108,6 @@ sub index_entry {
 	}
 	$rv .= "\n\n";
 
-	my $irt = $header_obj->header_raw('In-Reply-To');
-	my ($anchor_idx, $anchor, $t_anchor);
-	if (defined $irt) {
-		$anchor_idx = anchor_for($irt);
-		$anchor = $seen->{$anchor_idx};
-		$t_anchor = T_ANCHOR;
-	} else {
-		$t_anchor = '';
-	}
-	my $href = $mid->as_href;
 	my $mhref = "${path}m/$href.html";
 	my $fhref = "${path}f/$href.html";
 	# scan through all parts, looking for displayable text
@@ -126,8 +132,8 @@ sub index_entry {
 		$rv .= " <a\nhref=\"$anchor\">parent</a>";
 	}
 
-	if ($first_commit) {
-		$rv .= " <a\nhref=\"t/$href.html$t_anchor\">thread</a>";
+	if ($srch) {
+		$rv .= " <a\nhref=\"${path}t/$href.html$t_anchor\">thread</a>";
 	}
 
 	$rv . "\n\n";
@@ -145,7 +151,7 @@ sub thread_html {
 	my $th = PublicInbox::Thread->new(@$msgs);
 	$th->thread;
 	$th->order(*PublicInbox::Thread::sort_ts);
-	my $state = [ undef, { root_anchor => anchor_for($mid) }, undef, 0 ];
+	my $state = [ $srch, { root_anchor => anchor_for($mid) }, undef, 0 ];
 	thread_entry(\$rv, $state, $_, 0) for $th->rootset;
 	my $final_anchor = $state->[3];
 	my $next = "<a\nid=\"s$final_anchor\">end of thread</a>\n";
@@ -165,7 +171,7 @@ sub subject_path_html {
 	my $th = PublicInbox::Thread->new(@$msgs);
 	$th->thread;
 	$th->order(*PublicInbox::Thread::sort_ts);
-	my $state = [ undef, { root_anchor => 'dummy' }, undef, 0 ];
+	my $state = [ $srch, { root_anchor => 'dummy' }, undef, 0 ];
 	thread_entry(\$rv, $state, $_, 0) for $th->rootset;
 	my $final_anchor = $state->[3];
 	my $next = "<a\nid=\"s$final_anchor\">end of thread</a>\n";
@@ -340,6 +346,10 @@ sub headers_to_html_header {
 
 	my $rv = "";
 	my @title;
+	my $header_obj = $mime->header_obj;
+	my $mid = $header_obj->header_raw('Message-ID');
+	$mid = PublicInbox::Hval->new_msgid($mid);
+	my $mid_href = $mid->as_href;
 	foreach my $h (qw(From To Cc Subject Date)) {
 		my $v = $mime->header($h);
 		defined($v) && length($v) or next;
@@ -351,8 +361,7 @@ sub headers_to_html_header {
 		} elsif ($h eq 'Subject') {
 			$title[0] = $v->as_html;
 			if ($srch) {
-				my $path = $srch->subject_path($v->raw);
-				$rv .= "$h: <a\nhref=\"../s/$path.html\">";
+				$rv .= "$h: <a\nhref=\"../t/$mid_href.html\">";
 				$rv .= $v->as_html . "</a>\n";
 				next;
 			}
@@ -361,13 +370,9 @@ sub headers_to_html_header {
 
 	}
 
-	my $header_obj = $mime->header_obj;
-	my $mid = $header_obj->header_raw('Message-ID');
-	$mid = PublicInbox::Hval->new_msgid($mid);
 	$rv .= 'Message-ID: &lt;' . $mid->as_html . '&gt; ';
-	my $href = $mid->as_href;
-	$href = "../m/$href" unless $full_pfx;
-	$rv .= "(<a\nhref=\"$href.txt\">raw</a>)\n";
+	$mid_href = "../m/$mid_href" unless $full_pfx;
+	$rv .= "(<a\nhref=\"$mid_href.txt\">raw</a>)\n";
 
 	my $irt = $header_obj->header_raw('In-Reply-To');
 	if (defined $irt) {
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index bbd438a..5021509 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -126,8 +126,10 @@ sub get_atom {
 sub get_index {
 	my ($ctx, $cgi, $top) = @_;
 	require PublicInbox::Feed;
+	my $srch = searcher($ctx);
 	[ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
 	  [ PublicInbox::Feed->generate_html_index({
+			srch => $srch,
 			git_dir => $ctx->{git_dir},
 			listname => $ctx->{listname},
 			pi_config => $pi_config,
-- 
EW


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/9] WWW: eliminate "top" parameter for feeds
  2015-08-17  3:38 [PATCH 0/9] search pushed and deployed to master Eric Wong
  2015-08-17  3:38 ` [PATCH 1/9] feed: remove unnecesary time paramenter in index state Eric Wong
  2015-08-17  3:38 ` [PATCH 2/9] favor /t/ to /s/, since subjects may change mid-thread Eric Wong
@ 2015-08-17  3:38 ` Eric Wong
  2015-08-17  3:38 ` [PATCH 4/9] www: simplify parameter passing to feed Eric Wong
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2015-08-17  3:38 UTC (permalink / raw)
  To: meta

This parameter hasn't been used since
commit 5adf8d639e9b5abd4cbac975d70ddc0fb76541fc
("feed: dead code elimination around dropped endpoints")
---
 lib/PublicInbox/WWW.pm | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 5021509..7cbfa35 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -35,9 +35,9 @@ sub run {
 	} elsif ($path_info =~ m!$LISTNAME_RE\z!o) {
 		invalid_list(\%ctx, $1) || redirect_list_index(\%ctx, $cgi);
 	} elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) {
-		invalid_list(\%ctx, $1) || get_index(\%ctx, $cgi, 0);
+		invalid_list(\%ctx, $1) || get_index(\%ctx, $cgi);
 	} elsif ($path_info =~ m!$LISTNAME_RE/atom\.xml\z!o) {
-		invalid_list(\%ctx, $1) || get_atom(\%ctx, $cgi, 0);
+		invalid_list(\%ctx, $1) || get_atom(\%ctx, $cgi);
 
 	# single-message pages
 	} elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.txt\z!o) {
@@ -109,7 +109,7 @@ sub invalid_list_mid {
 
 # /$LISTNAME/atom.xml                       -> Atom feed, includes replies
 sub get_atom {
-	my ($ctx, $cgi, $top) = @_;
+	my ($ctx, $cgi) = @_;
 	require PublicInbox::Feed;
 	[ 200, [ 'Content-Type' => 'application/xml' ],
 	  [ PublicInbox::Feed->generate({
@@ -117,14 +117,13 @@ sub get_atom {
 			listname => $ctx->{listname},
 			pi_config => $pi_config,
 			cgi => $cgi,
-			top => $top,
 		}) ]
 	];
 }
 
 # /$LISTNAME/?r=$GIT_COMMIT                 -> HTML only
 sub get_index {
-	my ($ctx, $cgi, $top) = @_;
+	my ($ctx, $cgi) = @_;
 	require PublicInbox::Feed;
 	my $srch = searcher($ctx);
 	[ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
@@ -135,7 +134,6 @@ sub get_index {
 			pi_config => $pi_config,
 			cgi => $cgi,
 			footer => footer($ctx),
-			top => $top,
 		}) ]
 	];
 }
-- 
EW


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/9] www: simplify parameter passing to feed
  2015-08-17  3:38 [PATCH 0/9] search pushed and deployed to master Eric Wong
                   ` (2 preceding siblings ...)
  2015-08-17  3:38 ` [PATCH 3/9] WWW: eliminate "top" parameter for feeds Eric Wong
@ 2015-08-17  3:38 ` Eric Wong
  2015-08-17  3:38 ` [PATCH 5/9] terminology: replies => followups Eric Wong
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2015-08-17  3:38 UTC (permalink / raw)
  To: meta

No need to create a new hash when we can reuse the existing one
more.
---
 lib/PublicInbox/Feed.pm | 44 ++++++++++++++++++++++----------------------
 lib/PublicInbox/WWW.pm  | 29 +++++++++++------------------
 2 files changed, 33 insertions(+), 40 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 5a41bea..0e0b0f6 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -17,12 +17,12 @@ use constant {
 
 # main function
 sub generate {
-	my ($class, $args) = @_;
+	my ($class, $ctx) = @_;
 	require XML::Atom::SimpleFeed;
 	require POSIX;
-	my $max = $args->{max} || MAX_PER_PAGE;
+	my $max = $ctx->{max} || MAX_PER_PAGE;
 
-	my $feed_opts = get_feedopts($args);
+	my $feed_opts = get_feedopts($ctx);
 	my $addr = $feed_opts->{address};
 	$addr = $addr->[0] if ref($addr);
 	my $feed = XML::Atom::SimpleFeed->new(
@@ -37,8 +37,8 @@ sub generate {
 		updated => POSIX::strftime(DATEFMT, gmtime),
 	);
 
-	my $git = PublicInbox::GitCatFile->new($args->{git_dir});
-	each_recent_blob($args, sub {
+	my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
+	each_recent_blob($ctx, sub {
 		my ($add) = @_;
 		add_to_feed($feed_opts, $feed, $add, $git);
 	});
@@ -48,19 +48,19 @@ sub generate {
 }
 
 sub generate_html_index {
-	my ($class, $args) = @_;
+	my ($class, $ctx) = @_;
 	require PublicInbox::Thread;
 
-	my $max = $args->{max} || MAX_PER_PAGE;
-	my $feed_opts = get_feedopts($args);
+	my $max = $ctx->{max} || MAX_PER_PAGE;
+	my $feed_opts = get_feedopts($ctx);
 
 	my $title = $feed_opts->{description} || '';
 	$title = PublicInbox::Hval->new_oneline($title)->as_html;
 
 	my @messages;
-	my $git_dir = $args->{git_dir};
+	my $git_dir = $ctx->{git_dir};
 	my $git = PublicInbox::GitCatFile->new($git_dir);
-	my ($first, $last) = each_recent_blob($args, sub {
+	my ($first, $last) = each_recent_blob($ctx, sub {
 		mime_load_for_sort($git, $_[0], \@messages);
 	});
 	$git = undef; # destroy pipes.
@@ -76,15 +76,15 @@ sub generate_html_index {
 	$th->order(*PublicInbox::Thread::sort_ts);
 
 	# except we sort top-level messages reverse chronologically
-	my $state = [ $args->{srch}, {}, $first, 0 ];
+	my $state = [ $ctx->{srch}, {}, $first, 0 ];
 	for (PublicInbox::Thread::rsort_ts($th->rootset)) {
 		dump_msg($_, 0, \$html, $state)
 	}
 	Email::Address->purge_cache;
 
-	my $footer = nav_footer($args->{cgi}, $last, $feed_opts, $state);
+	my $footer = nav_footer($ctx->{cgi}, $last, $feed_opts, $state);
 	if ($footer) {
-		my $list_footer = $args->{footer};
+		my $list_footer = $ctx->{footer};
 		$footer .= "\n" . $list_footer if $list_footer;
 		$footer = "<hr />" . PRE_WRAP . "$footer</pre>";
 	}
@@ -115,13 +115,13 @@ sub nav_footer {
 }
 
 sub each_recent_blob {
-	my ($args, $cb) = @_;
-	my $max = $args->{max} || MAX_PER_PAGE;
+	my ($ctx, $cb) = @_;
+	my $max = $ctx->{max} || MAX_PER_PAGE;
 	my $hex = '[a-f0-9]';
 	my $addmsg = qr!^:000000 100644 \S+ \S+ A\t(${hex}{2}/${hex}{38})$!;
 	my $delmsg = qr!^:100644 000000 \S+ \S+ D\t(${hex}{2}/${hex}{38})$!;
 	my $refhex = qr/${hex}{4,40}(?:~\d+)?/;
-	my $cgi = $args->{cgi};
+	my $cgi = $ctx->{cgi};
 
 	# revision ranges may be specified
 	my $range = 'HEAD';
@@ -133,7 +133,7 @@ sub each_recent_blob {
 	# get recent messages
 	# we could use git log -z, but, we already know ssoma will not
 	# leave us with filenames with spaces in them..
-	my @cmd = ('git', "--git-dir=$args->{git_dir}",
+	my @cmd = ('git', "--git-dir=$ctx->{git_dir}",
 			qw/log --no-notes --no-color --raw -r
 			   --abbrev=16 --abbrev-commit/);
 	push @cmd, $range;
@@ -178,12 +178,12 @@ sub each_recent_blob {
 
 # private functions below
 sub get_feedopts {
-	my ($args) = @_;
-	my $pi_config = $args->{pi_config};
-	my $listname = $args->{listname};
-	my $cgi = $args->{cgi};
+	my ($ctx) = @_;
+	my $pi_config = $ctx->{pi_config};
+	my $listname = $ctx->{listname};
+	my $cgi = $ctx->{cgi};
 	my %rv;
-	if (open my $fh, '<', "$args->{git_dir}/description") {
+	if (open my $fh, '<', "$ctx->{git_dir}/description") {
 		chomp($rv{description} = <$fh>);
 		close $fh;
 	}
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 7cbfa35..be34e1c 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -111,14 +111,10 @@ sub invalid_list_mid {
 sub get_atom {
 	my ($ctx, $cgi) = @_;
 	require PublicInbox::Feed;
+	$ctx->{pi_config} = $pi_config;
+	$ctx->{cgi} = $cgi;
 	[ 200, [ 'Content-Type' => 'application/xml' ],
-	  [ PublicInbox::Feed->generate({
-			git_dir => $ctx->{git_dir},
-			listname => $ctx->{listname},
-			pi_config => $pi_config,
-			cgi => $cgi,
-		}) ]
-	];
+	  [ PublicInbox::Feed->generate($ctx) ] ]
 }
 
 # /$LISTNAME/?r=$GIT_COMMIT                 -> HTML only
@@ -126,16 +122,11 @@ sub get_index {
 	my ($ctx, $cgi) = @_;
 	require PublicInbox::Feed;
 	my $srch = searcher($ctx);
+	$ctx->{pi_config} = $pi_config;
+	$ctx->{cgi} = $cgi;
+	footer($ctx);
 	[ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
-	  [ PublicInbox::Feed->generate_html_index({
-			srch => $srch,
-			git_dir => $ctx->{git_dir},
-			listname => $ctx->{listname},
-			pi_config => $pi_config,
-			cgi => $cgi,
-			footer => footer($ctx),
-		}) ]
-	];
+	  [ PublicInbox::Feed->generate_html_index($ctx) ] ]
 }
 
 # just returns a string ref for the blob in the current ctx
@@ -275,6 +266,7 @@ sub footer {
 	my $footer = try_cat("$git_dir/public-inbox/footer.html");
 	if (defined $footer) {
 		chomp $footer;
+		$ctx->{footer} = $footer;
 		return $footer;
 	}
 
@@ -304,7 +296,8 @@ sub footer {
 
 	$addr = "<a\nhref=\"mailto:$addr\">$addr</a>";
 	$desc =  $desc;
-	join("\n",
+
+	$ctx->{footer} = join("\n",
 		'- ' . $desc,
 		"A <a\nhref=\"" . PI_URL .  '">public-inbox</a>, ' .
 			'anybody may post in plain-text (not HTML):',
@@ -319,7 +312,7 @@ sub searcher {
 	my ($ctx) = @_;
 	eval {
 		require PublicInbox::Search;
-		PublicInbox::Search->new($ctx->{git_dir});
+		$ctx->{srch} = PublicInbox::Search->new($ctx->{git_dir});
 	};
 }
 
-- 
EW


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 5/9] terminology: replies => followups
  2015-08-17  3:38 [PATCH 0/9] search pushed and deployed to master Eric Wong
                   ` (3 preceding siblings ...)
  2015-08-17  3:38 ` [PATCH 4/9] www: simplify parameter passing to feed Eric Wong
@ 2015-08-17  3:38 ` Eric Wong
  2015-08-17  3:38 ` [PATCH 6/9] search: use raw headers without MIME decoding Eric Wong
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2015-08-17  3:38 UTC (permalink / raw)
  To: meta

Replies are only direct replies, but followups could be any message
further down the thread.  The latter is more useful.
---
 lib/PublicInbox/Search.pm |  4 ++--
 lib/PublicInbox/View.pm   | 28 +++++++++++++++++-----------
 t/search.t                |  6 +++---
 3 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index f4f00b2..6fd46f4 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -218,8 +218,8 @@ sub get_subject_path {
 	$self->do_enquire($query);
 }
 
-# given a message ID, get replies to a message
-sub get_replies {
+# given a message ID, get followups to a message
+sub get_followups {
 	my ($self, $mid, $opts) = @_;
 	$mid = mid_clean($mid);
 	$mid = mid_compressed($mid);
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 4960935..4f2833b 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -133,7 +133,8 @@ sub index_entry {
 	}
 
 	if ($srch) {
-		$rv .= " <a\nhref=\"${path}t/$href.html$t_anchor\">thread</a>";
+		$rv .= " <a\nhref=\"${path}t/$href.html$t_anchor\">" .
+		       "threadlink</a>";
 	}
 
 	$rv . "\n\n";
@@ -154,7 +155,14 @@ sub thread_html {
 	my $state = [ $srch, { root_anchor => anchor_for($mid) }, undef, 0 ];
 	thread_entry(\$rv, $state, $_, 0) for $th->rootset;
 	my $final_anchor = $state->[3];
-	my $next = "<a\nid=\"s$final_anchor\">end of thread</a>\n";
+	my $next = "<a\nid=\"s$final_anchor\">";
+
+	if ($final_anchor == 1) {
+		$next .= 'only message in thread';
+	} else {
+		$next .= 'end of thread';
+	}
+	$next .= ", back to <a\nhref=\"../\">index</a>\n";
 
 	$rv .= "</pre><hr />" . PRE_WRAP . $next . $foot . "</pre>";
 }
@@ -432,14 +440,15 @@ sub html_footer {
 		$irt = $mime->header_obj->header_raw('In-Reply-To') || '';
 		$mid = mid_compressed(mid_clean($mid));
 		my $t_anchor = length $irt ? T_ANCHOR : '';
-		$idx = " <a\nhref=\"../t/$mid.html$t_anchor\">thread</a>$idx";
-		my $res = $srch->get_replies($mid);
+		$idx = " <a\nhref=\"../t/$mid.html$t_anchor\">".
+		       "threadlink</a>$idx";
+		my $res = $srch->get_followups($mid);
 		if (my $c = $res->{count}) {
-			$c = $c == 1 ? '1 reply' : "$c replies";
+			$c = $c == 1 ? '1 followup' : "$c followups";
 			$idx .= "\n$c:\n";
-			thread_replies(\$idx, $mime, $res);
+			thread_followups(\$idx, $mime, $res);
 		} else {
-			$idx .= "\n(no replies yet)\n";
+			$idx .= "\n(no followups, yet)\n";
 		}
 		if ($irt) {
 			$irt = PublicInbox::Hval->new_msgid($irt);
@@ -512,12 +521,9 @@ sub hash_subj {
 	Digest::SHA::sha1($subj);
 }
 
-sub thread_replies {
+sub thread_followups {
 	my ($dst, $root, $res) = @_;
 	my @msgs = map { $_->mini_mime } @{$res->{msgs}};
-	foreach (@{$res->{msgs}}) {
-		print STDERR "smsg->path: <", $_->path, ">\n";
-	}
 	require PublicInbox::Thread;
 	$root->header_set('X-PI-TS', '0');
 	my $th = PublicInbox::Thread->new($root, @msgs);
diff --git a/t/search.t b/t/search.t
index 7ef86cd..2bb6b6c 100644
--- a/t/search.t
+++ b/t/search.t
@@ -175,9 +175,9 @@ sub filter_mids {
 	is_deeply(\@res, [ sort('last@s', $long_midc) ],
 		  "got expected results for references: match");
 
-	my $replies = $ro->get_replies('root@s');
-	$replies = [ filter_mids($replies) ];
-	is_deeply($replies, [ filter_mids($res) ], "get_replies matches");
+	my $followups = $ro->get_followups('root@s');
+	$followups = [ filter_mids($followups) ];
+	is_deeply($followups, [ filter_mids($res) ], "get_followups matches");
 
 	my $long_reply_mid = 'reply-to-long@1';
 	my $long_reply = Email::MIME->create(
-- 
EW


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 6/9] search: use raw headers without MIME decoding
  2015-08-17  3:38 [PATCH 0/9] search pushed and deployed to master Eric Wong
                   ` (4 preceding siblings ...)
  2015-08-17  3:38 ` [PATCH 5/9] terminology: replies => followups Eric Wong
@ 2015-08-17  3:38 ` Eric Wong
  2015-08-17  3:38 ` [PATCH 7/9] feed: disable the generator statement Eric Wong
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2015-08-17  3:38 UTC (permalink / raw)
  To: meta

This should be less error-prone in case somebody tries to screw with
us and our thread_id mechanism or somehow waste our resources.
Unfortunately Mail::Thread isn't smart enough for this, yet, so we
may need to downgrade to Email::Simple objects as a workaround.

Or simply not worry about the display so much if somebody is
intentionally trying to make it thread badly/incorrectly.
---
 lib/PublicInbox/Search.pm | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 6fd46f4..051f7a5 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -74,7 +74,7 @@ sub add_message {
 	my $db = $self->{xdb};
 
 	my $doc_id;
-	my $mid = mid_clean($mime->header('Message-ID'));
+	my $mid = mid_clean($mime->header_obj->header_raw('Message-ID'));
 	$mid = mid_compressed($mid);
 	my $was_ghost = 0;
 	my $ct_msg = $mime->header('Content-Type') || 'text/plain';
@@ -341,9 +341,9 @@ sub link_message_to_parents {
 	my $doc = $smsg->{doc};
 	my $mid = mid_compressed($smsg->mid);
 	my $mime = $smsg->mime;
-	my $refs = $mime->header('References');
+	my $refs = $mime->header_obj->header_raw('References');
 	my @refs = $refs ? ($refs =~ /<([^>]+)>/g) : ();
-	my $irt = $mime->header('In-Reply-To');
+	my $irt = $mime->header_obj->header_raw('In-Reply-To');
 	if ($irt) {
 		if ($irt =~ /<([^>]+)>/) {
 			$irt = $1;
@@ -498,7 +498,7 @@ sub index_blob {
 sub unindex_blob {
 	my ($self, $git, $blob) = @_;
 	my $mime = do_cat_mail($git, $blob) or return;
-	my $mid = $mime->header('Message-ID');
+	my $mid = $mime->header_obj->header_raw('Message-ID');
 	eval { $self->remove_message($mid) } if defined $mid;
 	warn "W: unindex_blob $blob: $@\n" if $@;
 }
-- 
EW


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 7/9] feed: disable the generator statement
  2015-08-17  3:38 [PATCH 0/9] search pushed and deployed to master Eric Wong
                   ` (5 preceding siblings ...)
  2015-08-17  3:38 ` [PATCH 6/9] search: use raw headers without MIME decoding Eric Wong
@ 2015-08-17  3:38 ` Eric Wong
  2015-08-17  3:38 ` [PATCH 8/9] drop bodies and messages ASAP after processing Eric Wong
  2015-08-17  3:38 ` [PATCH 9/9] search: apply mid_compression to subject paths, too Eric Wong
  8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2015-08-17  3:38 UTC (permalink / raw)
  To: meta

No need to waste bandwidth, here
---
 lib/PublicInbox/Feed.pm | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 0e0b0f6..226c50e 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -36,6 +36,7 @@ sub generate {
 		id => 'mailto:' . ($addr || 'public-inbox@example.com'),
 		updated => POSIX::strftime(DATEFMT, gmtime),
 	);
+	$feed->no_generator;
 
 	my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
 	each_recent_blob($ctx, sub {
-- 
EW


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 8/9] drop bodies and messages ASAP after processing
  2015-08-17  3:38 [PATCH 0/9] search pushed and deployed to master Eric Wong
                   ` (6 preceding siblings ...)
  2015-08-17  3:38 ` [PATCH 7/9] feed: disable the generator statement Eric Wong
@ 2015-08-17  3:38 ` Eric Wong
  2015-08-17  3:38 ` [PATCH 9/9] search: apply mid_compression to subject paths, too Eric Wong
  8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2015-08-17  3:38 UTC (permalink / raw)
  To: meta

We can rely on reference counting to lower memory usage for
big messages.
---
 lib/PublicInbox/Feed.pm | 10 ++++++----
 lib/PublicInbox/View.pm |  2 ++
 t/view.t                | 12 +++++++-----
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 226c50e..95bde4f 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -243,23 +243,25 @@ sub add_to_feed {
 	my $midurl = $feed_opts->{midurl} || 'http://example.com/m/';
 	my $fullurl = $feed_opts->{fullurl} || 'http://example.com/f/';
 
-	my $mid = $mime->header_obj->header_raw('Message-ID');
+	my $header_obj = $mime->header_obj;
+	my $mid = $header_obj->header_raw('Message-ID');
 	defined $mid or return 0;
 	$mid = PublicInbox::Hval->new_msgid($mid);
 	my $href = $mid->as_href . '.html';
 	my $content = PublicInbox::View->feed_entry($mime, $fullurl . $href);
 	defined($content) or return 0;
+	$mime = undef;
 
-	my $subject = mime_header($mime, 'Subject') or return 0;
+	my $subject = mime_header($header_obj, 'Subject') or return 0;
 
-	my $from = mime_header($mime, 'From') or return 0;
+	my $from = mime_header($header_obj, 'From') or return 0;
 	my @from = Email::Address->parse($from);
 	my $name = $from[0]->name;
 	defined $name or $name = "";
 	my $email = $from[0]->address;
 	defined $email or $email = "";
 
-	my $date = $mime->header('Date');
+	my $date = $header_obj->header('Date');
 	$date = PublicInbox::Hval->new_oneline($date);
 	$date = feed_date($date->raw) or return 0;
 	$add =~ tr!/!!d;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 4f2833b..55e12f2 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -116,6 +116,7 @@ sub index_entry {
 				  \$more);
 		$part_nr++;
 	});
+	$mime->body_set('');
 
 	$rv .= "\n$pfx<a\nhref=\"$mhref\">$more</a> ";
 	my $txt = "${path}m/$href.txt";
@@ -277,6 +278,7 @@ sub multipart_text_as_html {
 		$rv .= "\n" unless $rv =~ /\n\z/s;
 		++$part_nr;
 	});
+	$mime->body_set('');
 	$rv;
 }
 
diff --git a/t/view.t b/t/view.t
index 63de49b..3107285 100644
--- a/t/view.t
+++ b/t/view.t
@@ -39,9 +39,9 @@ EOF
 			Subject => 'this is a subject',
 		],
 		body => $body,
-	);
-	$s = Email::MIME->new($s->as_string);
-	my $html = PublicInbox::View->msg_html($s);
+	)->as_string;
+	my $mime = Email::MIME->new($s);
+	my $html = PublicInbox::View->msg_html($mime);
 
 	# ghetto tests
 	like($html, qr!<a\nhref="\.\./m/hello%40!s, "MID link present");
@@ -52,7 +52,8 @@ EOF
 
 	# short page
 	my $pfx = "http://example.com/test/f";
-	my $short = PublicInbox::View->msg_html($s, $pfx);
+	$mime = Email::MIME->new($s);
+	my $short = PublicInbox::View->msg_html($mime, $pfx);
 	like($short, qr!<a\nhref="hello%40!s, "MID link present");
 	like($short, qr/\n&gt; keep this inline/,
 		"short quoted text is inline");
@@ -137,8 +138,9 @@ EOF
 		parts => $parts,
 	);
 
+	my $orig = $mime->body_raw;
 	my $html = PublicInbox::View->msg_html($mime);
-	like($mime->body_raw, qr/hi =3D bye=/, "our test used QP correctly");
+	like($orig, qr/hi =3D bye=/, "our test used QP correctly");
 	like($html, qr/\bhi = bye\b/, "HTML output decoded QP");
 }
 
-- 
EW


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 9/9] search: apply mid_compression to subject paths, too
  2015-08-17  3:38 [PATCH 0/9] search pushed and deployed to master Eric Wong
                   ` (7 preceding siblings ...)
  2015-08-17  3:38 ` [PATCH 8/9] drop bodies and messages ASAP after processing Eric Wong
@ 2015-08-17  3:38 ` Eric Wong
  8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2015-08-17  3:38 UTC (permalink / raw)
  To: meta

Otherwise we'll be wasting space in our index for long
subjects.
---
 lib/PublicInbox/Search.pm | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 051f7a5..862ed6d 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -15,7 +15,8 @@ use constant {
 	# SCHEMA_VERSION history
 	# 0 - initial
 	# 1 - subject_path is lower-cased
-	SCHEMA_VERSION => 1,
+	# 2 - subject_path is mid_compressed in the index, only
+	SCHEMA_VERSION => 2,
 	LANG => 'english',
 	QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD,
 };
@@ -113,7 +114,7 @@ sub add_message {
 			$doc->add_term(xpfx('subject') . $subj);
 
 			my $path = subject_path($subj);
-			$doc->add_term(xpfx('path') . $path);
+			$doc->add_term(xpfx('path') . mid_compressed($path));
 		}
 
 		my $from = $smsg->from_name;
@@ -214,7 +215,7 @@ sub query {
 
 sub get_subject_path {
 	my ($self, $path, $opts) = @_;
-	my $query = $self->qp->parse_query("path:$path", 0);
+	my $query = $self->qp->parse_query("path:".mid_compressed($path), 0);
 	$self->do_enquire($query);
 }
 
@@ -238,7 +239,7 @@ sub get_thread {
 	return { count => 0, msgs => [] } unless $smsg;
 	my $qp = $self->qp;
 	my $qtid = $qp->parse_query('thread:'.$smsg->thread_id);
-	my $qsub = $qp->parse_query('path:'.$smsg->path);
+	my $qsub = $qp->parse_query('path:'.mid_compressed($smsg->path));
 	my $query = Search::Xapian::Query->new(OP_OR, $qtid, $qsub);
 	$self->do_enquire($query);
 }
-- 
EW


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2015-08-17  3:38 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-08-17  3:38 [PATCH 0/9] search pushed and deployed to master Eric Wong
2015-08-17  3:38 ` [PATCH 1/9] feed: remove unnecesary time paramenter in index state Eric Wong
2015-08-17  3:38 ` [PATCH 2/9] favor /t/ to /s/, since subjects may change mid-thread Eric Wong
2015-08-17  3:38 ` [PATCH 3/9] WWW: eliminate "top" parameter for feeds Eric Wong
2015-08-17  3:38 ` [PATCH 4/9] www: simplify parameter passing to feed Eric Wong
2015-08-17  3:38 ` [PATCH 5/9] terminology: replies => followups Eric Wong
2015-08-17  3:38 ` [PATCH 6/9] search: use raw headers without MIME decoding Eric Wong
2015-08-17  3:38 ` [PATCH 7/9] feed: disable the generator statement Eric Wong
2015-08-17  3:38 ` [PATCH 8/9] drop bodies and messages ASAP after processing Eric Wong
2015-08-17  3:38 ` [PATCH 9/9] search: apply mid_compression to subject paths, too Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).