unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Cc: Eric Wong <e@80x24.org>
Subject: [PATCH 2/5] redo main HTML index to show nested messages
Date: Thu, 28 Aug 2014 02:47:33 +0000	[thread overview]
Message-ID: <1409194056-4735-2-git-send-email-e@80x24.org> (raw)
In-Reply-To: <1409194056-4735-1-git-send-email-e@80x24.org>

This reduces the need for page reloads in common cases and should
improve reading speed so users do not need to open many browser
tabs.  This will hopefully increase an encourage readership.

The downside of this are increased server processing overhead and
easier address scraping by spam bots.
---
 lib/PublicInbox/Feed.pm | 37 +++++--------------
 lib/PublicInbox/View.pm | 97 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 105 insertions(+), 29 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 4ec8e97..cf64517 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -8,6 +8,7 @@ use Email::MIME;
 use Date::Parse qw(strptime str2time);
 use PublicInbox::Hval;
 use PublicInbox::GitCatFile;
+use PublicInbox::View;
 use constant {
 	DATEFMT => '%Y-%m-%dT%H:%M:%SZ', # atom standard
 	MAX_PER_PAGE => 25, # this needs to be tunable
@@ -18,7 +19,6 @@ use constant {
 sub generate {
 	my ($class, $args) = @_;
 	require XML::Atom::SimpleFeed;
-	require PublicInbox::View;
 	require POSIX;
 	my $max = $args->{max} || MAX_PER_PAGE;
 
@@ -61,7 +61,6 @@ sub generate_html_index {
 	my $git = PublicInbox::GitCatFile->new($args->{git_dir});
 	my $last = each_recent_blob($args, sub {
 		my $mime = do_cat_mail($git, $_[0]) or return 0;
-		$mime->body_set(''); # save some memory
 
 		my $t = eval { str2time($mime->header('Date')) };
 		defined($t) or $t = 0;
@@ -85,7 +84,8 @@ sub generate_html_index {
 			$a->topmost->message->header('X-PI-Date')
 		} @_;
 	});
-	dump_html_line($_, 0, \$html, time) for $th->rootset;
+	my %seen;
+	dump_msg($_, 0, \$html, time, \%seen) for $th->rootset;
 
 	Email::Address->purge_cache;
 
@@ -277,34 +277,15 @@ sub add_to_feed {
 	1;
 }
 
-sub dump_html_line {
-	my ($self, $level, $html, $now) = @_;
+sub dump_msg {
+	my ($self, $level, $html, $now, $seen) = @_;
 	if ($self->message) {
 		my $mime = $self->message;
-		my $subj = $mime->header('Subject');
-		my $ts = $mime->header('X-PI-Date');
-		my $mid = $mime->header_obj->header_raw('Message-ID');
-		$mid = PublicInbox::Hval->new_msgid($mid);
-		my $href = 'm/' . $mid->as_href . '.html';
-		my $from = mime_header($mime, 'From');
-
-		my @from = Email::Address->parse($from);
-		$from = $from[0]->name;
-		(defined($from) && length($from)) or $from = $from[0]->address;
-
-		$from = PublicInbox::Hval->new_oneline($from)->as_html;
-		$subj = PublicInbox::Hval->new_oneline($subj)->as_html;
-		if ($now > ($ts + (24 * 60 * 60))) {
-			$ts = POSIX::strftime('%m/%d ', gmtime($ts));
-		} else {
-			$ts = POSIX::strftime('%H:%M ', gmtime($ts));
-		}
-
-		$$html .= $ts . (' ' x $level);
-		$$html .= "<a href=\"$href\">$subj</a> $from\n";
+		$$html .=
+		    PublicInbox::View->index_entry($mime, $now, $level, $seen);
 	}
-	dump_html_line($self->child, $level+1, $html, $now) if $self->child;
-	dump_html_line($self->next, $level, $html, $now) if $self->next;
+	dump_msg($self->child, $level+1, $html, $now, $seen) if $self->child;
+	dump_msg($self->next, $level, $html, $now, $seen) if $self->next;
 }
 
 sub do_cat_mail {
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index ab607a0..0d97428 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -8,6 +8,7 @@ use URI::Escape qw/uri_escape_utf8/;
 use Encode qw/find_encoding/;
 use Encode::MIME::Header;
 use Email::MIME::ContentType qw/parse_content_type/;
+require POSIX;
 
 # TODO: make these constants tunable
 use constant MAX_INLINE_QUOTED => 5;
@@ -40,6 +41,92 @@ sub feed_entry {
 	PRE_WRAP . multipart_text_as_html($mime, $full_pfx) . '</pre>';
 }
 
+# this is already inside a <pre>
+sub index_entry {
+	my ($class, $mime, $now, $level, $seen) = @_;
+	my $rv = "";
+	my $part_nr = 0;
+	my $enc_msg = enc_for($mime->header("Content-Type"));
+	my $subj = $mime->header('Subject');
+	my $header_obj = $mime->header_obj;
+
+	my $mid_raw = $header_obj->header_raw('Message-ID');
+	my $name = anchor_for($mid_raw);
+	$seen->{$name} = "#$name"; # save the anchor for later
+
+	my $mid = PublicInbox::Hval->new_msgid($mid_raw);
+	my $from = PublicInbox::Hval->new_oneline($mime->header('From'))->raw;
+	my @from = Email::Address->parse($from);
+	$from = $from[0]->name;
+	(defined($from) && length($from)) or $from = $from[0]->address;
+
+	$from = PublicInbox::Hval->new_oneline($from)->as_html;
+	$subj = PublicInbox::Hval->new_oneline($subj)->as_html;
+	my $pfx = ('  ' x $level);
+
+	my $ts = $mime->header('X-PI-Date');
+	my $fmt = '%H:%M';
+	if ($now > ($ts + (365 * 24 * 60 * 60))) {
+		# doesn't have to be exactly 1 year
+		$fmt = '%Y/%m/%d';
+	} elsif ($now > ($ts + (24 * 60 * 60))) {
+		$fmt = '%m/%d';
+	}
+	$ts = POSIX::strftime($fmt, gmtime($ts));
+
+	$rv .= "$pfx<a name=\"$name\"><b>$subj</b> $from - $ts</a>\n\n";
+
+	# scan through all parts, looking for displayable text
+	$mime->walk_parts(sub {
+		my ($part) = @_;
+		return if $part->subparts; # walk_parts already recurses
+		my $enc = enc_for($part->content_type) || $enc_msg || $enc_utf8;
+
+		if ($part_nr > 0) {
+			my $fn = $part->filename;
+			defined($fn) or $fn = "part #" . ($part_nr + 1);
+			$rv .= $pfx . add_filename_line($enc->decode($fn));
+		}
+
+		my $s = ascii_html($enc->decode($part->body));
+
+		# drop quotes, including the "so-and-so wrote:" line
+		$s =~ s/(?:^[^\n]*:\s*\n)?(?:^&gt;[^\n]*\n)+(?:^\s*\n)?//mg;
+
+		# Drop signatures
+		$s =~ s/\n*-- \n.*\z//s;
+
+		# kill any trailing whitespace
+		$s =~ s/\s+\z//s;
+
+		# add prefix:
+		$s =~ s/^/$pfx/sgm;
+
+		$rv .= $s . "\n";
+		++$part_nr;
+	});
+
+	my $href = 'm/' . $mid->as_href . '.html';
+	$rv .= "$pfx<a\nhref=\"$href\">more</a> ";
+	my $txt = 'm/' . $mid->as_href . '.txt';
+	$rv .= "<a\nhref=\"$txt\">raw</a> ";
+	$rv .= html_footer($mime, 0);
+
+	my $irp = $header_obj->header_raw('In-Reply-To');
+	if (defined $irp) {
+		my $anchor_idx = anchor_for($irp);
+		my $anchor = $seen->{$anchor_idx};
+		unless (defined $anchor) {
+			my $v = PublicInbox::Hval->new_msgid($irp);
+			my $html = $v->as_html;
+			$anchor = 'm/' . $v->as_href . '.html';
+			$seen->{$anchor_idx} = $anchor;
+		}
+		$rv .= " <a\nhref=\"$anchor\">parent</a>";
+	}
+
+	$rv . "\n\n";
+}
 
 # only private functions below.
 
@@ -232,7 +319,7 @@ sub html_footer {
 	my $cc = uri_escape_utf8(join(',', values %cc));
 	my $href = "mailto:$to?In-Reply-To=$irp&Cc=${cc}&Subject=$subj";
 
-	'<a href="' . ascii_html($href) . '">reply</a>';
+	"<a\nhref=\"" . ascii_html($href) . '">reply</a>';
 }
 
 sub linkify_refs {
@@ -244,4 +331,12 @@ sub linkify_refs {
 	} @_);
 }
 
+require Digest::SHA;
+sub anchor_for {
+	my ($msgid) = @_;
+	$msgid =~ s/\A\s*<?//;
+	$msgid =~ s/>?\s*\z//;
+	Digest::SHA::sha1_hex($msgid);
+}
+
 1;
-- 
EW


  reply	other threads:[~2014-08-28  2:47 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-28  2:47 [PATCH 1/5] view: Email::Address cache purge is optional Eric Wong
2014-08-28  2:47 ` Eric Wong [this message]
2014-08-28  2:47 ` [PATCH 3/5] view: increase MAX_INLINE_QUOTED threshold to 12 Eric Wong
2014-08-28  2:47 ` [PATCH 4/5] feed: show permalink to home page Eric Wong
2014-08-28  2:47 ` [PATCH 5/5] feed: deal with removed files Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1409194056-4735-2-git-send-email-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).