unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 2/7] index: use message threading if search is available
Date: Fri,  4 Sep 2015 02:18:06 +0000	[thread overview]
Message-ID: <1441333091-32421-3-git-send-email-e@80x24.org> (raw)
In-Reply-To: <1441333091-32421-1-git-send-email-e@80x24.org>

This lets us merge topics with different subjects with a common parent
(common in "[PATCH 0/X]" threads).  This also lets us avoid forking for
the HTML index page, too.
---
 lib/PublicInbox/Feed.pm | 124 +++++++++++++-----------------------------------
 lib/PublicInbox/View.pm | 110 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 142 insertions(+), 92 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 75fecf5..8fcb8d8 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -123,6 +123,8 @@ sub emit_html_index {
 	my $title = $feed_opts->{description} || '';
 	$title = PublicInbox::Hval->new_oneline($title)->as_html;
 	my $atom_url = $feed_opts->{atomurl};
+	my ($footer, $param, $last, $srch);
+	my $state = { ctx => $ctx, seen => {}, anchor_idx => 0 };
 
 	$fh->write("<html><head><title>$title</title>" .
 		   "<link\nrel=alternate\ntitle=\"Atom feed\"\n".
@@ -130,61 +132,60 @@ sub emit_html_index {
 		   '</head><body>' . PublicInbox::View::PRE_WRAP .
 		   "<b>$title</b> (<a\nhref=\"$atom_url\">Atom feed</a>)\n");
 
-	my $state;
-	my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
-	my $topics;
-	my $srch = $ctx->{srch};
-	$srch and $topics = [ [], {} ];
-	my (undef, $last) = each_recent_blob($ctx, sub {
-		my ($path, $commit, $ts, $u, $subj) = @_;
-		$state ||= {
-			ctx => $ctx,
-			seen => {},
-			first_commit => $commit,
-			anchor_idx => 0,
-		};
-
-		if ($srch) {
-			add_topic($git, $srch, $topics, $path, $ts, $u, $subj);
-		} else {
-			my $mime = do_cat_mail($git, $path) or return 0;
-			PublicInbox::View::index_entry($fh, $mime, 0, $state);
-			1;
-		}
-	});
-	Email::Address->purge_cache;
-	$git = undef; # destroy pipes.
-
-	my $footer = nav_footer($ctx->{cgi}, $last, $feed_opts, $state);
+	# if the 'r' query parameter is given, it is a legacy permalink
+	# which we must continue supporting:
+	my $cgi = $ctx->{cgi};
+	if ($cgi && !$cgi->param('r') && ($srch = $ctx->{srch})) {
+		$state->{srch} = $srch;
+		$last = PublicInbox::View::emit_index_topics($state, $fh);
+		$param = 'o';
+	} else {
+		$last = emit_index_nosrch($ctx, $state, $fh);
+		$param = 'r';
+	}
+	$footer = nav_footer($cgi, $last, $feed_opts, $state, $param);
 	if ($footer) {
 		my $list_footer = $ctx->{footer};
 		$footer .= "\n\n" . $list_footer if $list_footer;
 		$footer = "<hr /><pre>$footer</pre>";
 	}
-	$fh->write(dump_topics($topics)) if $topics;
 	$fh->write("$footer</body></html>");
 	$fh->close;
 }
 
+sub emit_index_nosrch {
+	my ($ctx, $state, $fh) = @_;
+	my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
+	my (undef, $last) = each_recent_blob($ctx, sub {
+		my ($path, $commit, $ts, $u, $subj) = @_;
+		$state->{first} ||= $commit;
+
+		my $mime = do_cat_mail($git, $path) or return 0;
+		PublicInbox::View::index_entry($fh, $mime, 0, $state);
+		1;
+	});
+	Email::Address->purge_cache;
+	$last;
+}
+
 sub nav_footer {
-	my ($cgi, $last, $feed_opts, $state) = @_;
+	my ($cgi, $last, $feed_opts, $state, $param) = @_;
 	$cgi or return '';
-	my $old_r = $cgi->param('r');
+	my $old_r = $cgi->param($param);
 	my $head = '    ';
 	my $next = '    ';
-	my $first = $state->{first_commit};
+	my $first = $state->{first};
 	my $anchor = $state->{anchor_idx};
 
 	if ($last) {
-		$next = qq!<a\nhref="?r=$last">next</a>!;
+		$next = qq!<a\nhref="?$param=$last">next</a>!;
 	}
 	if ($old_r) {
 		$head = $cgi->path_info;
 		$head = qq!<a\nhref="$head">head</a>!;
 	}
 	my $atom = "<a\nhref=\"$feed_opts->{atomurl}\">atom</a>";
-	my $permalink = "<a\nhref=\"?r=$first\">permalink</a>";
-	"<a\nname=\"s$anchor\">page:</a> $next $head $atom $permalink";
+	"<a\nname=\"s$anchor\">page:</a> $next $head $atom";
 }
 
 sub each_recent_blob {
@@ -369,61 +370,4 @@ sub do_cat_mail {
 	$@ ? undef : $mime;
 }
 
-# accumulate recent topics if search is supported
-sub add_topic {
-	my ($git, $srch, $topics, $path, $ts, $u, $subj) = @_;
-	my ($order, $subjs) = @$topics;
-	my $header_obj;
-
-	# legacy ssoma did not set commit titles based on Subject
-	$subj = $enc_utf8->decode($subj);
-	if ($subj eq 'mda') {
-		my $mime = do_cat_mail($git, $path) or return 0;
-		$header_obj = $mime->header_obj;
-		$subj = mime_header($header_obj, 'Subject');
-	}
-
-	my $topic = $subj = $srch->subject_normalized($subj);
-
-	# kill "[PATCH v2]" etc. for summarization
-	$topic =~ s/\A\s*\[[^\]]+\]\s*//g;
-
-	if (++$subjs->{$topic} == 1) {
-		unless ($header_obj) {
-			my $mime = do_cat_mail($git, $path) or return 0;
-			$header_obj = $mime->header_obj;
-		}
-		my $mid = mid_clean($header_obj->header('Message-ID'));
-		$u = $enc_utf8->decode($u);
-		push @$order, [ $mid, $ts, $u, $subj, $topic ];
-		return 1;
-	}
-	0; # old topic, continue going
-}
-
-sub dump_topics {
-	my ($topics) = @_;
-	my ($order, $subjs) = @$topics;
-	my $dst = '';
-	$dst .= "\n[No recent topics]" unless (scalar @$order);
-	while (defined(my $info = shift @$order)) {
-		my ($mid, $ts, $u, $subj, $topic) = @$info;
-		my $n = delete $subjs->{$topic};
-		$mid = PublicInbox::Hval->new($mid)->as_href;
-		$subj = PublicInbox::Hval->new($subj)->as_html;
-		$u = PublicInbox::Hval->new($u)->as_html;
-		$dst .= "\n<a\nhref=\"$mid/t/#u\"><b>$subj</b></a>\n- ";
-		$ts = strftime('%Y-%m-%d %H:%M', gmtime($ts));
-		if ($n == 1) {
-			$dst .= "created by $u @ $ts UTC\n"
-		} else {
-			# $n isn't the total number of posts on the topic,
-			# just the number of posts in the current "git log"
-			# window, so leave it unlabeled
-			$dst .= "updated by $u @ $ts UTC ($n)\n"
-		}
-	}
-	$dst .= '</pre>'
-}
-
 1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 129aa89..ee5ba20 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -656,11 +656,12 @@ sub msg_timestamp {
 }
 
 sub thread_results {
-	my ($msgs) = @_;
+	my ($msgs, $nosubject) = @_;
 	require PublicInbox::Thread;
 	my $th = PublicInbox::Thread->new(@$msgs);
-	$th->thread;
 	no warnings 'once';
+	$Mail::Thread::nosubject = $nosubject;
+	$th->thread;
 	$th->order(*PublicInbox::Thread::sort_ts);
 	$th
 }
@@ -746,4 +747,109 @@ sub inline_dump {
 	inline_dump($dst, $state, $upfx, $node->next, $level);
 }
 
+sub rsort_ts {
+	sort {
+		(eval { $b->topmost->message->header('X-PI-TS') } || 0) <=>
+		(eval { $a->topmost->message->header('X-PI-TS') } || 0)
+	} @_;
+}
+
+# accumulate recent topics if search is supported
+# returns 1 if done, undef if not
+sub add_topic {
+	my ($state, $node, $level) = @_;
+	return unless $node;
+
+	if (my $x = $node->message) {
+		$x = $x->header_obj;
+		my ($topic, $subj);
+
+		$subj = $x->header('Subject');
+		$subj = $state->{srch}->subject_normalized($subj);
+		$topic = $subj;
+
+		# kill "[PATCH v2]" etc. for summarization
+		$topic =~ s/\A\s*\[[^\]]+\]\s*//g;
+		$topic = substr($topic, 0, 30);
+
+		if (++$state->{subjs}->{$topic} == 1) {
+			push @{$state->{order}}, [ $level, $subj, $topic ];
+		}
+
+		my $mid = mid_clean($x->header('Message-ID'));
+
+		my $u = $x->header('X-PI-From');
+		my $ts = $x->header('X-PI-TS');
+		$state->{latest}->{$topic} = [ $mid, $u, $ts ];
+	} # else { } # ghost ignored...
+
+	add_topic($state, $node->child, $level + 1);
+	add_topic($state, $node->next, $level);
+}
+
+sub dump_topics {
+	my ($state) = @_;
+	my $order = $state->{order};
+	my $subjs = $state->{subjs};
+	my $latest = $state->{latest};
+	return "\n[No recent topics]</pre>" unless (scalar @$order);
+	my $dst = '';
+	my $pfx;
+	my $prev = 0;
+	my $prev_attr = '';
+	while (defined(my $info = shift @$order)) {
+		my ($level, $subj, $topic) = @$info;
+		my $n = delete $subjs->{$topic};
+		my ($mid, $u, $ts) = @{delete $latest->{$topic}};
+		$mid = PublicInbox::Hval->new($mid)->as_href;
+		$subj = PublicInbox::Hval->new($subj)->as_html;
+		$u = PublicInbox::Hval->new($u)->as_html;
+		$pfx = INDENT x $level;
+		my $nl = $level == $prev ? "\n" : '';
+		my $dot = $level == 0 ? '' : '`';
+		$dst .= "$nl$pfx$dot<a\nhref=\"$mid/t/#u\"><b>$subj</b></a>\n";
+
+		my $attr;
+		$ts = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
+		if ($n == 1) {
+			$attr = "created by $u @ $ts UTC";
+			$n = "\n";
+		} else {
+			# $n isn't the total number of posts on the topic,
+			# just the number of posts in the current results
+			# window, so leave it unlabeled
+			$attr = "updated by $u @ $ts UTC";
+			$n = " ($n)\n";
+		}
+		if ($level == 0 || $attr ne $prev_attr) {
+			$dst .= "$pfx - ". $attr . $n;
+			$prev_attr = $attr;
+		}
+	}
+	$dst .= '</pre>';
+}
+
+sub emit_index_topics {
+	my ($state, $fh) = @_;
+	my $off = $state->{ctx}->{cgi}->param('o');
+	$off = 0 unless defined $off;
+	$state->{order} = [];
+	$state->{subjs} = {};
+	$state->{latest} = {};
+	my $max = 25;
+	my %opts = ( offset => int $off, limit => $max * 4 );
+	while (scalar @{$state->{order}} < $max) {
+		my $res = $state->{srch}->query('', \%opts);
+		my $nr = scalar @{$res->{msgs}} or last;
+
+		for (rsort_ts(thread_results(load_results($res), 1)->rootset)) {
+			add_topic($state, $_, 0);
+		}
+		$opts{offset} += $nr;
+	}
+
+	$fh->write(dump_topics($state));
+	$opts{offset};
+}
+
 1;
-- 
EW


  parent reply	other threads:[~2015-09-04  2:18 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-04  2:18 [PATCH 0/7] new index and misc fixes Eric Wong
2015-09-04  2:18 ` [PATCH 1/7] doc: design_www: more accessibility guidelines Eric Wong
2015-09-04  2:18 ` Eric Wong [this message]
2015-09-04  2:18 ` [PATCH 3/7] consolidate thread sorting in view Eric Wong
2015-09-04  2:18 ` [PATCH 4/7] extmsg: close HTML tag in response Eric Wong
2015-09-04  2:18 ` [PATCH 5/7] view: avoid attempting to find "subject dummy" Eric Wong
2015-09-04  2:18 ` [PATCH 6/7] SearchMsg: avoid encoding Message-IDs Eric Wong
2015-09-04  2:37   ` Eric Wong
2015-09-04  2:18 ` [PATCH 7/7] view: do not generate anchors in thread views Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1441333091-32421-3-git-send-email-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).