[PATCH 01/11] search: reduce redundant doc data

unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed

* [PATCH 01/11] search: reduce redundant doc data
@ 2015-09-01  8:55 Eric Wong
  2015-09-01  8:55 ` [PATCH 02/11] search: allow querying all mail with '' Eric Wong
                   ` (9 more replies)
  0 siblings, 10 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01  8:55 UTC (permalink / raw)
  To: meta

Redundant document data increases our database size, pull the
smsg->mid off the unique term, the smsg->ts off the value, and
only generate the formatted display date off smsg->ts.
---
 lib/PublicInbox/Search.pm    |  7 ++++---
 lib/PublicInbox/SearchIdx.pm |  2 --
 lib/PublicInbox/SearchMsg.pm | 42 ++++++++++++++++++++++--------------------
 lib/PublicInbox/View.pm      | 15 ++++++++-------
 4 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index d3faaeb..b7b215f 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -4,8 +4,9 @@
 package PublicInbox::Search;
 use strict;
 use warnings;
-use PublicInbox::SearchMsg;
+use constant TS => 0;
 use Search::Xapian qw/:standard/;
+use PublicInbox::SearchMsg;
 use Email::MIME;
 use PublicInbox::MID qw/mid_clean mid_compress/;
 
@@ -15,7 +16,6 @@ our $REPLY_RE = qr/^re:\s+/i;
 our $LANG = 'english';
 
 use constant {
-	TS => 0,
 	# SCHEMA_VERSION history
 	# 0 - initial
 	# 1 - subject_path is lower-cased
@@ -25,7 +25,8 @@ use constant {
 	# 5 - subject_path drops trailing '.'
 	# 6 - preserve References: order in document data
 	# 7 - remove references and inreplyto terms
-	SCHEMA_VERSION => 7,
+	# 8 - remove redundant/unneeded document data
+	SCHEMA_VERSION => 8,
 	QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD,
 };
 
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index dec3333..32e0714 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -81,8 +81,6 @@ sub add_message {
 			$doc->add_term(xpfx('path') . mid_compress($path));
 		}
 
-		my $from = $smsg->from_name;
-		my $date = $smsg->date;
 		my $ts = Search::Xapian::sortable_serialise($smsg->ts);
 		$doc->add_value(PublicInbox::Search::TS, $ts);
 
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 4ad8a0c..1821b07 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -13,6 +13,7 @@ use PublicInbox::MID qw/mid_clean mid_compress/;
 use Encode qw/find_encoding/;
 my $enc_utf8 = find_encoding('UTF-8');
 our $PFX2TERM_RE = undef;
+use constant EPOCH_822 => 'Thu, 01 Jan 1970 00:00:00 +0000';
 
 sub new {
 	my ($class, $mime) = @_;
@@ -30,13 +31,17 @@ sub wrap {
 sub load_doc {
 	my ($class, $doc) = @_;
 	my $data = $doc->get_data;
+	my $ts = eval {
+		no strict 'subs';
+		$doc->get_value(PublicInbox::Search::TS);
+	};
+	$ts = Search::Xapian::sortable_unserialise($ts);
 	$data = $enc_utf8->decode($data);
-	my ($mid, $subj, $from, $date, $refs) = split(/\n/, $data);
+	my ($subj, $from, $refs) = split(/\n/, $data);
 	bless {
 		doc => $doc,
-		mid => $mid,
 		subject => $subj,
-		date => $date,
+		ts => $ts,
 		from_name => $from,
 		references_sorted => $refs,
 	}, $class;
@@ -77,27 +82,13 @@ sub from_name {
 
 sub ts {
 	my ($self) = @_;
-	my $ts = $self->{ts};
-	return $ts if $ts;
-	$self->{ts} = eval {
-		str2time($self->date || $self->mime->header('Date'))
-	} || 0;
-}
-
-sub date {
-	my ($self) = @_;
-	my $date = $self->{date};
-	return $date if $date;
-	my $ts = eval { str2time($self->mime->header('Date')) };
-	$self->{date} = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
+	$self->{ts} ||= eval { str2time($self->mime->header('Date')) } || 0;
 }
 
 sub to_doc_data {
 	my ($self) = @_;
-	$self->mid . "\n" .
 	PublicInbox::Search::subject_summary($self->subject) . "\n" .
 	$self->from_name . "\n".
-	$self->date . "\n" .
 	$self->references_sorted;
 }
 
@@ -139,14 +130,23 @@ sub mini_mime {
 	my @h = (
 		Subject => $self->subject,
 		'X-PI-From' => $self->from_name,
-		'X-PI-Date' => $self->date,
 		'X-PI-TS' => $self->ts,
 		'Message-ID' => "<$self->{mid}>",
+
+		# prevent Email::Simple::Creator from running,
+		# this header is useless for threading as we use X-PI-TS
+		# for sorting and display:
+		'Date' => EPOCH_822,
 	);
 
 	my $refs = $self->{references_sorted};
 	my $mime = Email::MIME->create(header_str => \@h);
-	$mime->header_set('References', $refs) if (defined $refs);
+	my $h = $mime->header_obj;
+	$h->header_set('References', $refs) if (defined $refs);
+
+	# drop useless headers Email::MIME set for us
+	$h->header_set('Date');
+	$h->header_set('MIME-Version');
 	$mime;
 }
 
@@ -155,6 +155,8 @@ sub mid {
 
 	if (defined $mid) {
 		$self->{mid} = $mid;
+	} elsif (my $rv = $self->{mid}) {
+		$rv;
 	} else {
 		$self->ensure_metadata; # needed for ghosts
 		$self->{mid} ||= $self->_extract_mid;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 584a2d7..477c4b6 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -86,12 +86,7 @@ sub index_entry {
 		$subj = "<u\nid=\"u\">$subj</u>";
 	}
 
-	my $ts = $mime->header('X-PI-TS');
-	unless (defined $ts) {
-		$ts = msg_timestamp($mime);
-	}
-	$ts = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
-
+	my $ts = _msg_date($mime);
 	my $rv = "<table\nsummary=l$level><tr>";
 	if ($level) {
 		$rv .= '<td><pre>' . ('  ' x $level) . '</pre></td>';
@@ -561,6 +556,12 @@ sub missing_thread {
 EOF
 }
 
+sub _msg_date {
+	my ($mime) = @_;
+	my $ts = $mime->header('X-PI-TS') || msg_timestamp($mime);
+	POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
+}
+
 sub _inline_header {
 	my ($dst, $state, $mime, $level) = @_;
 	my $pfx = '  ' x $level;
@@ -568,7 +569,7 @@ sub _inline_header {
 	my $cur = $state->{cur};
 	my $mid = $mime->header('Message-ID');
 	my $f = $mime->header('X-PI-From');
-	my $d = $mime->header('X-PI-Date');
+	my $d = _msg_date($mime);
 	$f = PublicInbox::Hval->new($f);
 	$d = PublicInbox::Hval->new($d);
 	$f = $f->as_html;
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 02/11] search: allow querying all mail with ''
  2015-09-01  8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
@ 2015-09-01  8:55 ` Eric Wong
  2015-09-01  8:55 ` [PATCH 03/11] search: show newest results first Eric Wong
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01  8:55 UTC (permalink / raw)
  To: meta

This makes dumping recent topics easier, hopefully.
---
 lib/PublicInbox/Search.pm | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index b7b215f..831c4fd 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -74,10 +74,14 @@ sub reopen { $_[0]->{xdb}->reopen }
 # read-only
 sub query {
 	my ($self, $query_string, $opts) = @_;
-	my $query = $self->qp->parse_query($query_string, QP_FLAGS);
+	my $query;
 
 	$opts ||= {};
-	$opts->{relevance} = 1;
+	unless ($query_string eq '') {
+		$query = $self->qp->parse_query($query_string, QP_FLAGS);
+		$opts->{relevance} = 1;
+	}
+
 	$self->do_enquire($query, $opts);
 }
 
@@ -104,8 +108,11 @@ sub get_thread {
 sub do_enquire {
 	my ($self, $query, $opts) = @_;
 	my $enquire = $self->enquire;
-
-	$query = Search::Xapian::Query->new(OP_AND, $query, $mail_query);
+	if (defined $query) {
+		$query = Search::Xapian::Query->new(OP_AND,$query,$mail_query);
+	} else {
+		$query = $mail_query;
+	}
 	$enquire->set_query($query);
 	if ($opts->{relevance}) {
 		$enquire->set_sort_by_relevance_then_value(TS, 0);
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 03/11] search: show newest results first
  2015-09-01  8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
  2015-09-01  8:55 ` [PATCH 02/11] search: allow querying all mail with '' Eric Wong
@ 2015-09-01  8:55 ` Eric Wong
  2015-09-01  8:55 ` [PATCH 04/11] feed: use updated date based on git commit date Eric Wong
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01  8:55 UTC (permalink / raw)
  To: meta

Like revision control history, older stuff is less relevant,
so favor newer stuff, first.
---
 lib/PublicInbox/Search.pm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 831c4fd..8b32ef3 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -115,9 +115,9 @@ sub do_enquire {
 	}
 	$enquire->set_query($query);
 	if ($opts->{relevance}) {
-		$enquire->set_sort_by_relevance_then_value(TS, 0);
+		$enquire->set_sort_by_relevance_then_value(TS, 1);
 	} else {
-		$enquire->set_sort_by_value(TS, 0);
+		$enquire->set_sort_by_value(TS, 1);
 	}
 	$opts ||= {};
 	my $offset = $opts->{offset} || 0;
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 04/11] feed: use updated date based on git commit date
  2015-09-01  8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
  2015-09-01  8:55 ` [PATCH 02/11] search: allow querying all mail with '' Eric Wong
  2015-09-01  8:55 ` [PATCH 03/11] search: show newest results first Eric Wong
@ 2015-09-01  8:55 ` Eric Wong
  2015-09-01  8:55 ` [PATCH 05/11] feed: extract atom header generation Eric Wong
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01  8:55 UTC (permalink / raw)
  To: meta

This will hopefully make life easier for feed readers.
---
 lib/PublicInbox/Feed.pm | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index bc76cdc..71042d7 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -10,6 +10,7 @@ use PublicInbox::Hval;
 use PublicInbox::GitCatFile;
 use PublicInbox::View;
 use PublicInbox::MID qw/mid_clean mid_compress/;
+use POSIX qw/strftime/;
 use constant {
 	DATEFMT => '%Y-%m-%dT%H:%M:%SZ', # atom standard
 	MAX_PER_PAGE => 25, # this needs to be tunable
@@ -33,7 +34,6 @@ sub generate_html_index {
 
 sub emit_atom {
 	my ($cb, $ctx) = @_;
-	require POSIX;
 	my $fh = $cb->([ 200, ['Content-Type' => 'application/xml']]);
 	my $max = $ctx->{max} || MAX_PER_PAGE;
 	my $feed_opts = get_feedopts($ctx);
@@ -45,18 +45,23 @@ sub emit_atom {
 	my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
 	my $url = $feed_opts->{url} || "http://example.com/";
 	my $atomurl = $feed_opts->{atomurl};
-	$fh->write(qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
+	my $x = qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
 		qq{<feed\nxmlns="http://www.w3.org/2005/Atom">} .
 		qq{<title$type>$title</title>} .
 		qq{<link\nhref="$url"/>} .
 		qq{<link\nrel="self"\nhref="$atomurl"/>} .
-		qq{<id>mailto:$addr</id>} .
-		'<updated>' . POSIX::strftime(DATEFMT, gmtime) . '</updated>');
+		qq{<id>mailto:$addr</id>};
 
 	my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
 	each_recent_blob($ctx, sub {
-		my ($add, undef) = @_;
-		add_to_feed($feed_opts, $fh, $add, $git);
+		my ($path, undef, $ts) = @_;
+		if (defined $x) {
+			$fh->write($x . '<updated>'.
+					strftime(DATEFMT, gmtime($ts)) .
+					'</updated>');
+			$x = undef;
+		}
+		add_to_feed($feed_opts, $fh, $path, $git);
 	});
 	$git = undef; # destroy pipes
 	Email::Address->purge_cache;
@@ -259,7 +264,7 @@ sub feed_date {
 	my ($date) = @_;
 	my @t = eval { strptime($date) };
 
-	scalar(@t) ? POSIX::strftime(DATEFMT, @t) : 0;
+	scalar(@t) ? strftime(DATEFMT, @t) : 0;
 }
 
 # returns 0 (skipped) or 1 (added)
@@ -363,7 +368,7 @@ sub dump_topics {
 		$subj = PublicInbox::Hval->new($subj)->as_html;
 		$u = PublicInbox::Hval->new($u)->as_html;
 		$dst .= "\n<a\nhref=\"t/$mid/#u\"><b>$subj</b></a>\n- ";
-		$ts = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
+		$ts = strftime('%Y-%m-%d %H:%M', gmtime($ts));
 		if ($n == 1) {
 			$dst .= "created by $u @ $ts UTC\n"
 		} else {
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 05/11] feed: extract atom header generation
  2015-09-01  8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
                   ` (2 preceding siblings ...)
  2015-09-01  8:55 ` [PATCH 04/11] feed: use updated date based on git commit date Eric Wong
@ 2015-09-01  8:55 ` Eric Wong
  2015-09-01  8:55 ` [PATCH 06/11] implement per-thread Atom feeds Eric Wong
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01  8:55 UTC (permalink / raw)
  To: meta

We'll be using it for per-thread subscriptions
---
 lib/PublicInbox/Feed.pm | 41 +++++++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 71042d7..3540e9a 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -32,26 +32,26 @@ sub generate_html_index {
 
 # private subs
 
+sub atom_header {
+	my ($feed_opts) = @_;
+	my $title = $feed_opts->{description};
+	$title = PublicInbox::Hval->new_oneline($title)->as_html;
+	my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
+
+	qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
+	qq{<feed\nxmlns="http://www.w3.org/2005/Atom">} .
+	qq{<title$type>$title</title>} .
+	qq(<link\nhref="$feed_opts->{url}"/>) .
+	qq(<link\nrel="self"\nhref="$feed_opts->{atomurl}"/>) .
+	qq(<id>mailto:$feed_opts->{id_addr}</id>);
+}
+
 sub emit_atom {
 	my ($cb, $ctx) = @_;
 	my $fh = $cb->([ 200, ['Content-Type' => 'application/xml']]);
 	my $max = $ctx->{max} || MAX_PER_PAGE;
 	my $feed_opts = get_feedopts($ctx);
-	my $addr = $feed_opts->{address};
-	$addr = $addr->[0] if ref($addr);
-	$addr ||= 'public-inbox@example.com';
-	my $title = $feed_opts->{description} || "unnamed feed";
-	$title = PublicInbox::Hval->new_oneline($title)->as_html;
-	my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
-	my $url = $feed_opts->{url} || "http://example.com/";
-	my $atomurl = $feed_opts->{atomurl};
-	my $x = qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
-		qq{<feed\nxmlns="http://www.w3.org/2005/Atom">} .
-		qq{<title$type>$title</title>} .
-		qq{<link\nhref="$url"/>} .
-		qq{<link\nrel="self"\nhref="$atomurl"/>} .
-		qq{<id>mailto:$addr</id>};
-
+	my $x = atom_header($feed_opts);
 	my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
 	each_recent_blob($ctx, sub {
 		my ($path, undef, $ts) = @_;
@@ -219,13 +219,18 @@ sub get_feedopts {
 	if (open my $fh, '<', "$ctx->{git_dir}/description") {
 		chomp($rv{description} = <$fh>);
 		close $fh;
+	} else {
+		$rv{description} = '($GIT_DIR/description missing)';
 	}
 
 	if ($pi_config && defined $listname && $listname ne '') {
-		foreach my $key (qw(address)) {
-			$rv{$key} = $pi_config->get($listname, $key) || "";
-		}
+		my $addr = $pi_config->get($listname, 'address') || "";
+		$rv{address} = $addr;
+		$addr = $addr->[0] if ref($addr);
+		$rv{id_addr} = $addr;
 	}
+	$rv{id_addr} ||= 'public-inbox@example.com';
+
 	my $url_base;
 	if ($cgi) {
 		my $path_info = $cgi->path_info;
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 06/11] implement per-thread Atom feeds
  2015-09-01  8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
                   ` (3 preceding siblings ...)
  2015-09-01  8:55 ` [PATCH 05/11] feed: extract atom header generation Eric Wong
@ 2015-09-01  8:55 ` Eric Wong
  2015-09-01  9:30   ` [13/11 PATCH] feed: fix <updated> tag in Atom feed Eric Wong
  2015-09-01  8:55 ` [PATCH 07/11] www: compile mbox regexp only once Eric Wong
                   ` (4 subsequent siblings)
  9 siblings, 1 reply; 13+ messages in thread
From: Eric Wong @ 2015-09-01  8:55 UTC (permalink / raw)
  To: meta

This allows users to subscribe to only a single thread
with their feed reader without subscribing to the rest of
the thread.

Update our endpoint notes while we're at it.
---
 Documentation/design_www.txt | 31 ++++++++-------
 lib/PublicInbox/Feed.pm      | 92 ++++++++++++++++++++++++++++++++------------
 lib/PublicInbox/View.pm      |  3 +-
 lib/PublicInbox/WWW.pm       | 13 +++++++
 t/cgi.t                      | 12 ++++++
 5 files changed, 112 insertions(+), 39 deletions(-)

diff --git a/Documentation/design_www.txt b/Documentation/design_www.txt
index 55e9268..d25afca 100644
--- a/Documentation/design_www.txt
+++ b/Documentation/design_www.txt
@@ -6,25 +6,30 @@ URL naming
 /$LISTNAME/atom.xml                             -> Atom feed
 
 #### Optional, relies on Search::Xapian
-/$LISTNAME/t/$MESSAGE_ID.html                   -> HTML content of thread
+/$LISTNAME/t/$MESSAGE_ID/                       -> HTML content of thread
+/$LISTNAME/t/$MESSAGE_ID/atom                   -> Atom feed for thread
+/$LISTNAME/t/$MESSAGE_ID/mbox.gz                -> gzipped mbox of thread
 
 ### Stable endpoints
-/$LISTNAME/m/$MESSAGE_ID.html                   -> HTML content (short quotes)
-/$LISTNAME/m/$MESSAGE_ID.txt                    -> raw mbox
-/$LISTNAME/m/$MESSAGE_ID                        -> 301 to .html version
-/$LISTNAME/f/$MESSAGE_ID.html                   -> HTML content (full quotes)
-/$LISTNAME/f/$MESSAGE_ID                        -> 301 to .html version
-/$LISTNAME/f/$MESSAGE_ID.txt                    -> 301 to ../m/$MESSAGE_ID.txt
+/$LISTNAME/m/$MESSAGE_ID/                       -> HTML content (short quotes)
+/$LISTNAME/m/$MESSAGE_ID                        -> 301 to above
+/$LISTNAME/m/$MESSAGE_ID/raw                    -> raw mbox
+/$LISTNAME/f/$MESSAGE_ID/                       -> HTML content (full quotes)
+/$LISTNAME/f/$MESSAGE_ID                        -> 301 to above
+/$LISTNAME/f/$MESSAGE_ID/raw (*)                -> 301 to ../m/$MESSAGE_ID/raw
+
+### Legacy endpoints (may be ambiguous given Message-IDs with similar suffies)
+/$LISTNAME/m/$MESSAGE_ID.html                   -> 301 to $MESSAGE_ID/
+/$LISTNAME/m/$MESSAGE_ID.txt                    -> 301 to $MESSAGE_ID/raw
+/$LISTNAME/f/$MESSAGE_ID.html                   -> 301 to $MESSAGE_ID/
+/$LISTNAME/f/$MESSAGE_ID.txt (*)                -> 301 to ../m/$MESSAGE_ID/raw
+
 
 FIXME: we must refactor/cleanup/add tests for most of our CGI before
 adding more endpoints and features.
 
-Maybe TODO (these might be expensive)
--------------------------------------
-/$LISTNAME/t/$MESSAGE_ID.mbox                   -> mbox content of thread
-
-We use file name suffixes on all of these (except /) so URLs may easily
-cached/memoized using a static file server.
+(*) These URLs were never linked, but only exist as a convenience to folks
+    who edit existing URLs
 
 Encoding notes
 --------------
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 3540e9a..1fef984 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -9,7 +9,7 @@ use Date::Parse qw(strptime);
 use PublicInbox::Hval;
 use PublicInbox::GitCatFile;
 use PublicInbox::View;
-use PublicInbox::MID qw/mid_clean mid_compress/;
+use PublicInbox::MID qw/mid_clean mid_compress mid2path/;
 use POSIX qw/strftime/;
 use constant {
 	DATEFMT => '%Y-%m-%dT%H:%M:%SZ', # atom standard
@@ -25,6 +25,11 @@ sub generate {
 	sub { emit_atom($_[0], $ctx) };
 }
 
+sub generate_thread_atom {
+	my ($ctx) = @_;
+	sub { emit_atom_thread($_[0], $ctx) };
+}
+
 sub generate_html_index {
 	my ($ctx) = @_;
 	sub { emit_html_index($_[0], $ctx) };
@@ -32,15 +37,22 @@ sub generate_html_index {
 
 # private subs
 
-sub atom_header {
-	my ($feed_opts) = @_;
-	my $title = $feed_opts->{description};
+sub title_tag {
+	my ($title) = @_;
+	# try to avoid the type attribute in title:
 	$title = PublicInbox::Hval->new_oneline($title)->as_html;
 	my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
+	"<title$type>$title</title>";
+}
+
+sub atom_header {
+	my ($feed_opts, $title) = @_;
+
+	$title = title_tag($feed_opts->{description}) unless (defined $title);
 
 	qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
 	qq{<feed\nxmlns="http://www.w3.org/2005/Atom">} .
-	qq{<title$type>$title</title>} .
+	qq{$title} .
 	qq(<link\nhref="$feed_opts->{url}"/>) .
 	qq(<link\nrel="self"\nhref="$feed_opts->{atomurl}"/>) .
 	qq(<id>mailto:$feed_opts->{id_addr}</id>);
@@ -56,19 +68,50 @@ sub emit_atom {
 	each_recent_blob($ctx, sub {
 		my ($path, undef, $ts) = @_;
 		if (defined $x) {
-			$fh->write($x . '<updated>'.
-					strftime(DATEFMT, gmtime($ts)) .
-					'</updated>');
+			$fh->write($x . '<updated>' .
+				   strftime(DATEFMT, gmtime($ts)) .
+				   '</updated>');
 			$x = undef;
 		}
 		add_to_feed($feed_opts, $fh, $path, $git);
 	});
 	$git = undef; # destroy pipes
+	_end_feed($fh);
+}
+
+sub _no_thread {
+	my ($cb) = @_;
+	my $fh = $cb->([404, ['Content-Type' => 'text/plain']]);
+	$fh->write("No feed found for thread\n");
+	$fh->close;
+}
+
+sub _end_feed {
+	my ($fh) = @_;
 	Email::Address->purge_cache;
-	$fh->write("</feed>");
+	$fh->write('</feed>');
 	$fh->close;
 }
 
+sub emit_atom_thread {
+	my ($cb, $ctx) = @_;
+	my $res = $ctx->{srch}->get_thread($ctx->{mid});
+	return _no_thread($cb) unless $res->{total};
+	my $fh = $cb->([200, ['Content-Type' => 'application/xml']]);
+	my $feed_opts = get_feedopts($ctx);
+
+	my $html_url = $feed_opts->{atomurl} = $ctx->{self_url};
+	$html_url =~ s!/atom\z!/!;
+	$feed_opts->{url} = $html_url;
+	$feed_opts->{emit_header} = 1;
+
+	my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
+	foreach my $msg (@{$res->{msgs}}) {
+		add_to_feed($feed_opts, $fh, mid2path($msg->mid), $git);
+	}
+	$git = undef; # destroy pipes
+	_end_feed($fh);
+}
 
 sub emit_html_index {
 	my ($cb, $ctx) = @_;
@@ -233,7 +276,6 @@ sub get_feedopts {
 
 	my $url_base;
 	if ($cgi) {
-		my $path_info = $cgi->path_info;
 		my $base;
 		if (ref($cgi) eq 'CGI') {
 			$base = $cgi->url(-base);
@@ -241,13 +283,11 @@ sub get_feedopts {
 			$base = $cgi->base->as_string;
 			$base =~ s!/\z!!;
 		}
-		$url_base = $path_info;
-		if ($url_base =~ s!/(?:|index\.html)?\z!!) {
-			$rv{atomurl} = "$base$url_base/atom.xml";
+		$url_base = "$base/$listname";
+		if (my $mid = $ctx->{mid}) { # per-thread feed:
+			$rv{atomurl} = "$url_base/t/$mid/atom";
 		} else {
-			$url_base =~ s!/atom\.xml\z!!;
-			$rv{atomurl} = $base . $path_info;
-			$url_base = $base . $url_base; # XXX is this needed?
+			$rv{atomurl} = "$url_base/atom.xml";
 		}
 	} else {
 		$url_base = "http://example.com";
@@ -288,9 +328,12 @@ sub add_to_feed {
 	defined($content) or return 0;
 	$mime = undef;
 
+	my $date = $header_obj->header('Date');
+	$date = PublicInbox::Hval->new_oneline($date);
+	$date = feed_date($date->raw) or return 0;
+
 	my $title = mime_header($header_obj, 'Subject') or return 0;
-	$title = PublicInbox::Hval->new_oneline($title)->as_html;
-	my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
+	$title = title_tag($title);
 
 	my $from = mime_header($header_obj, 'From') or return 0;
 	my @from = Email::Address->parse($from) or return 0;
@@ -298,13 +341,12 @@ sub add_to_feed {
 	my $email = $from[0]->address;
 	$email = PublicInbox::Hval->new_oneline($email)->as_html;
 
-	my $date = $header_obj->header('Date');
-	$date = PublicInbox::Hval->new_oneline($date);
-	$date = feed_date($date->raw) or return 0;
-
+	if (delete $feed_opts->{emit_header}) {
+		$fh->write(atom_header($feed_opts, $title) .
+			   "<updated>$date</updated>");
+	}
 	$fh->write("<entry><author><name>$name</name><email>$email</email>" .
-		   "</author><title$type>$title</title>" .
-		   "<updated>$date</updated>" .
+		   "</author>$title$date" .
 		   qq{<content\ntype="xhtml">} .
 		   qq{<div\nxmlns="http://www.w3.org/1999/xhtml">});
 	$fh->write($content);
@@ -313,7 +355,7 @@ sub add_to_feed {
 	my $h = '[a-f0-9]';
 	my (@uuid5) = ($add =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o);
 	my $id = 'urn:uuid:' . join('-', @uuid5);
-	my $midurl = $feed_opts->{midurl} || 'http://example.com/m/';
+	my $midurl = $feed_opts->{midurl};
 	$fh->write(qq{</div></content><link\nhref="$midurl$href"/>}.
 		   "<id>$id</id></entry>");
 	1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 477c4b6..a30bf70 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -160,7 +160,8 @@ sub emit_thread_html {
 	my $next = "<a\nid=\"s$final_anchor\">";
 	$next .= $final_anchor == 1 ? 'only message in' : 'end of';
 	$next .= " thread</a>, back to <a\nhref=\"../../\">index</a>\n";
-	$next .= "download: <a\nhref=\"mbox.gz\">mbox.gz</a>\n\n";
+	$next .= "download: <a\nhref=\"mbox.gz\">mbox.gz</a>";
+	$next .= " / <a\nhref=\"atom\">Atom feed</a>\n\n";
 	$fh->write("<hr />" . PRE_WRAP . $next . $foot .
 		   "</pre></body></html>");
 	$fh->close;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index e6eec3d..c99c25f 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -56,6 +56,9 @@ sub run {
 		invalid_list_mid(\%ctx, $1, $2) ||
 			get_thread_mbox(\%ctx, $sfx);
 
+	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/atom\z!o) {
+		invalid_list_mid(\%ctx, $1, $2) || get_thread_atom(\%ctx);
+
 	# legacy redirects
 	} elsif ($path_info =~ m!$LISTNAME_RE/(t|m|f)/(\S+)\.html\z!o) {
 		my $pfx = $2;
@@ -348,4 +351,14 @@ sub get_thread_mbox {
 	PublicInbox::Mbox::thread_mbox($ctx, $srch, $sfx);
 }
 
+
+# /$LISTNAME/t/$MESSAGE_ID/atom		  -> thread as Atom feed
+sub get_thread_atom {
+	my ($ctx) = @_;
+	searcher($ctx) or return need_search($ctx);
+	$ctx->{self_url} = self_url($ctx->{cgi});
+	require PublicInbox::Feed;
+	PublicInbox::Feed::generate_thread_atom($ctx);
+}
+
 1;
diff --git a/t/cgi.t b/t/cgi.t
index fc28ae3..d84e634 100644
--- a/t/cgi.t
+++ b/t/cgi.t
@@ -200,6 +200,18 @@ EOF
 	} else {
 		like($res->{head}, qr/^Status: 501 /, "search not available");
 	}
+
+	my $have_xml_feed = eval { require XML::Feed; 1 } if $indexed;
+	if ($have_xml_feed) {
+		$path = "/test/t/blahblah%40example.com/atom";
+		$res = cgi_run($path);
+		like($res->{head}, qr/^Status: 200 /, "atom returned 200");
+		like($res->{head}, qr!^Content-Type: application/xml!m,
+			"search returned atom");
+		my $p = XML::Feed->parse(\($res->{body}));
+		is($p->format, "Atom", "parsed atom feed");
+		is(scalar $p->entries, 3, "parsed three entries");
+	}
 }
 
 # redirect list-name-only URLs
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 07/11] www: compile mbox regexp only once
  2015-09-01  8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
                   ` (4 preceding siblings ...)
  2015-09-01  8:55 ` [PATCH 06/11] implement per-thread Atom feeds Eric Wong
@ 2015-09-01  8:55 ` Eric Wong
  2015-09-01  8:55 ` [PATCH 08/11] www: root atom feed is "new.atom" and not "atom.xml" Eric Wong
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01  8:55 UTC (permalink / raw)
  To: meta

No need for 'x' modifier to span more lines, though
---
 lib/PublicInbox/WWW.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index c99c25f..278d786 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -51,7 +51,7 @@ sub run {
 	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/\z!o) {
 		invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
 
-	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!x) {
+	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!o) {
 		my $sfx = $3;
 		invalid_list_mid(\%ctx, $1, $2) ||
 			get_thread_mbox(\%ctx, $sfx);
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 08/11] www: root atom feed is "new.atom" and not "atom.xml"
  2015-09-01  8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
                   ` (5 preceding siblings ...)
  2015-09-01  8:55 ` [PATCH 07/11] www: compile mbox regexp only once Eric Wong
@ 2015-09-01  8:55 ` Eric Wong
  2015-09-01  8:55 ` [PATCH 09/11] completely revamp URL structure to shorten permalinks Eric Wong
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01  8:55 UTC (permalink / raw)
  To: meta

The MIME type entry for Atom feed relies on "atom",
so allow properly-configured static file servers to serve
it with the correct Content-Type header.
---
 Documentation/design_www.txt | 36 ++++++++++++++++++++----------------
 lib/PublicInbox/Feed.pm      |  4 ++--
 lib/PublicInbox/WWW.pm       |  4 ++--
 t/plack.t                    |  2 +-
 4 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/Documentation/design_www.txt b/Documentation/design_www.txt
index d25afca..a11c389 100644
--- a/Documentation/design_www.txt
+++ b/Documentation/design_www.txt
@@ -3,34 +3,38 @@ URL naming
 
 ### Unstable endpoints
 /$LISTNAME/?r=$GIT_COMMIT                       -> HTML only
-/$LISTNAME/atom.xml                             -> Atom feed
+/$LISTNAME/new.atom                        -> Atom feed
 
 #### Optional, relies on Search::Xapian
-/$LISTNAME/t/$MESSAGE_ID/                       -> HTML content of thread
-/$LISTNAME/t/$MESSAGE_ID/atom                   -> Atom feed for thread
-/$LISTNAME/t/$MESSAGE_ID/mbox.gz                -> gzipped mbox of thread
+/$LISTNAME/t/$MESSAGE_ID/                  -> HTML content of thread
+/$LISTNAME/t/$MESSAGE_ID/atom              -> Atom feed for thread
+/$LISTNAME/t/$MESSAGE_ID/mbox.gz           -> gzipped mbox of thread
 
 ### Stable endpoints
-/$LISTNAME/m/$MESSAGE_ID/                       -> HTML content (short quotes)
-/$LISTNAME/m/$MESSAGE_ID                        -> 301 to above
-/$LISTNAME/m/$MESSAGE_ID/raw                    -> raw mbox
-/$LISTNAME/f/$MESSAGE_ID/                       -> HTML content (full quotes)
-/$LISTNAME/f/$MESSAGE_ID                        -> 301 to above
-/$LISTNAME/f/$MESSAGE_ID/raw (*)                -> 301 to ../m/$MESSAGE_ID/raw
+/$LISTNAME/m/$MESSAGE_ID/                  -> HTML content (short quotes)
+/$LISTNAME/m/$MESSAGE_ID                   -> 301 to above
+/$LISTNAME/m/$MESSAGE_ID/raw               -> raw mbox
+/$LISTNAME/f/$MESSAGE_ID/                  -> HTML content (full quotes)
+/$LISTNAME/f/$MESSAGE_ID                   -> 301 to above
+/$LISTNAME/f/$MESSAGE_ID/raw [1]           -> 301 to ../m/$MESSAGE_ID/raw
 
-### Legacy endpoints (may be ambiguous given Message-IDs with similar suffies)
-/$LISTNAME/m/$MESSAGE_ID.html                   -> 301 to $MESSAGE_ID/
-/$LISTNAME/m/$MESSAGE_ID.txt                    -> 301 to $MESSAGE_ID/raw
-/$LISTNAME/f/$MESSAGE_ID.html                   -> 301 to $MESSAGE_ID/
-/$LISTNAME/f/$MESSAGE_ID.txt (*)                -> 301 to ../m/$MESSAGE_ID/raw
+### Legacy endpoints (may be ambiguous given Message-IDs with similar suffixes)
+/$LISTNAME/m/$MESSAGE_ID.html              -> 301 to $MESSAGE_ID/
+/$LISTNAME/m/$MESSAGE_ID.txt               -> 301 to $MESSAGE_ID/raw
+/$LISTNAME/f/$MESSAGE_ID.html              -> 301 to $MESSAGE_ID/
+/$LISTNAME/f/$MESSAGE_ID.txt [1]           -> 301 to ../m/$MESSAGE_ID/raw
 
+/$LISTNAME/atom.xml [2]                    -> identical to /$LISTNAME/new.atom
 
 FIXME: we must refactor/cleanup/add tests for most of our CGI before
 adding more endpoints and features.
 
-(*) These URLs were never linked, but only exist as a convenience to folks
+[1] These URLs were never linked, but only exist as a convenience to folks
     who edit existing URLs
 
+[2] Do not make this into a 301 since feed readers may not follow them as well
+    as normal browsers do.
+
 Encoding notes
 --------------
 
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 1fef984..9d58193 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -287,11 +287,11 @@ sub get_feedopts {
 		if (my $mid = $ctx->{mid}) { # per-thread feed:
 			$rv{atomurl} = "$url_base/t/$mid/atom";
 		} else {
-			$rv{atomurl} = "$url_base/atom.xml";
+			$rv{atomurl} = "$url_base/new.atom";
 		}
 	} else {
 		$url_base = "http://example.com";
-		$rv{atomurl} = "$url_base/atom.xml";
+		$rv{atomurl} = "$url_base/new.atom";
 	}
 	$rv{url} ||= "$url_base/";
 	$rv{midurl} = "$url_base/m/";
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 278d786..a9cb6d7 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -34,7 +34,7 @@ sub run {
 		invalid_list(\%ctx, $1) || redirect_list_index($cgi);
 	} elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) {
 		invalid_list(\%ctx, $1) || get_index(\%ctx);
-	} elsif ($path_info =~ m!$LISTNAME_RE/atom\.xml\z!o) {
+	} elsif ($path_info =~ m!$LISTNAME_RE/(?:atom\.xml|new\.atom)\z!o) {
 		invalid_list(\%ctx, $1) || get_atom(\%ctx);
 
 	# single-message pages
@@ -128,7 +128,7 @@ sub invalid_list_mid {
 	$ret;
 }
 
-# /$LISTNAME/atom.xml                       -> Atom feed, includes replies
+# /$LISTNAME/new.atom                     -> Atom feed, includes replies
 sub get_atom {
 	my ($ctx) = @_;
 	require PublicInbox::Feed;
diff --git a/t/plack.t b/t/plack.t
index b3c8764..50c9e60 100644
--- a/t/plack.t
+++ b/t/plack.t
@@ -83,7 +83,7 @@ EOF
 
 	test_psgi($app, sub {
 		my ($cb) = @_;
-		my $atomurl = 'http://example.com/test/atom.xml';
+		my $atomurl = 'http://example.com/test/new.atom';
 		my $res = $cb->(GET('http://example.com/test/'));
 		is(200, $res->code, 'success response received');
 		like($res->content, qr!href="\Q$atomurl\E"!,
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 09/11] completely revamp URL structure to shorten permalinks
  2015-09-01  8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
                   ` (6 preceding siblings ...)
  2015-09-01  8:55 ` [PATCH 08/11] www: root atom feed is "new.atom" and not "atom.xml" Eric Wong
@ 2015-09-01  8:55 ` Eric Wong
  2015-09-01  8:55 ` [PATCH 10/11] view: drop extra '</a>' tag Eric Wong
  2015-09-01  8:55 ` [PATCH 11/11] view: more robust link generation Eric Wong
  9 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01  8:55 UTC (permalink / raw)
  To: meta

This allows common /m/ links to be used without a prefix,
saving 2 precious bytes for permalinks and raw messages.

Old URLs continue to redirect.
---
 Documentation/design_www.txt |  37 +++++----
 lib/PublicInbox/Feed.pm      |  19 +++--
 lib/PublicInbox/View.pm      |  48 ++++++------
 lib/PublicInbox/WWW.pm       | 177 +++++++++++++++++++++++--------------------
 t/cgi.t                      |  20 ++---
 t/feed.t                     |   2 +-
 t/plack.t                    |  32 +++++---
 t/view.t                     |   6 +-
 8 files changed, 179 insertions(+), 162 deletions(-)

diff --git a/Documentation/design_www.txt b/Documentation/design_www.txt
index a11c389..b73a798 100644
--- a/Documentation/design_www.txt
+++ b/Documentation/design_www.txt
@@ -2,29 +2,28 @@ URL naming
 ----------
 
 ### Unstable endpoints
-/$LISTNAME/?r=$GIT_COMMIT                       -> HTML only
-/$LISTNAME/new.atom                        -> Atom feed
+/$LISTNAME/?r=$GIT_COMMIT                 -> HTML only
+/$LISTNAME/new.atom                       -> Atom feed
 
 #### Optional, relies on Search::Xapian
-/$LISTNAME/t/$MESSAGE_ID/                  -> HTML content of thread
-/$LISTNAME/t/$MESSAGE_ID/atom              -> Atom feed for thread
-/$LISTNAME/t/$MESSAGE_ID/mbox.gz           -> gzipped mbox of thread
+/$LISTNAME/$MESSAGE_ID/t/                 -> HTML content of thread
+/$LISTNAME/$MESSAGE_ID/t.atom             -> Atom feed for thread
+/$LISTNAME/$MESSAGE_ID/t.mbox.gz          -> gzipped mbox of thread
 
 ### Stable endpoints
-/$LISTNAME/m/$MESSAGE_ID/                  -> HTML content (short quotes)
-/$LISTNAME/m/$MESSAGE_ID                   -> 301 to above
-/$LISTNAME/m/$MESSAGE_ID/raw               -> raw mbox
-/$LISTNAME/f/$MESSAGE_ID/                  -> HTML content (full quotes)
-/$LISTNAME/f/$MESSAGE_ID                   -> 301 to above
-/$LISTNAME/f/$MESSAGE_ID/raw [1]           -> 301 to ../m/$MESSAGE_ID/raw
-
-### Legacy endpoints (may be ambiguous given Message-IDs with similar suffixes)
-/$LISTNAME/m/$MESSAGE_ID.html              -> 301 to $MESSAGE_ID/
-/$LISTNAME/m/$MESSAGE_ID.txt               -> 301 to $MESSAGE_ID/raw
-/$LISTNAME/f/$MESSAGE_ID.html              -> 301 to $MESSAGE_ID/
-/$LISTNAME/f/$MESSAGE_ID.txt [1]           -> 301 to ../m/$MESSAGE_ID/raw
-
-/$LISTNAME/atom.xml [2]                    -> identical to /$LISTNAME/new.atom
+/$LISTNAME/$MESSAGE_ID/                   -> HTML content (short quotes)
+/$LISTNAME/$MESSAGE_ID                    -> 301 to /$LISTNAME/$MESSAGE_ID
+/$LISTNAME/$MESSAGE_ID/raw                -> raw mbox
+/$LISTNAME/$MESSAGE_ID/f/                 -> HTML content (full quotes)
+
+### Legacy endpoints (may be ambiguous given Message-IDs with similar suffies)
+/$LISTNAME/m/$MESSAGE_ID/                 -> 301 to /$LISTNAME/$MESSAGE_ID/
+/$LISTNAME/m/$MESSAGE_ID.html             -> 301 to /$LISTNAME/$MESSAGE_ID/
+/$LISTNAME/m/$MESSAGE_ID.txt              -> 301 to /$LISTNAME/$MESSAGE_ID/raw
+/$LISTNAME/f/$MESSAGE_ID.html             -> 301 to /$LISTNAME/$MESSAGE_ID/f/
+/$LISTNAME/f/$MESSAGE_ID.txt [1]          -> 301 to /$LISTNAME/$MESSAGE_ID/raw
+
+/$LISTNAME/atom.xml [2]                   -> identical to /$LISTNAME/new.atom
 
 FIXME: we must refactor/cleanup/add tests for most of our CGI before
 adding more endpoints and features.
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 9d58193..4420fde 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -101,7 +101,7 @@ sub emit_atom_thread {
 	my $feed_opts = get_feedopts($ctx);
 
 	my $html_url = $feed_opts->{atomurl} = $ctx->{self_url};
-	$html_url =~ s!/atom\z!/!;
+	$html_url =~ s!/t\.atom\z!/!;
 	$feed_opts->{url} = $html_url;
 	$feed_opts->{emit_header} = 1;
 
@@ -285,7 +285,7 @@ sub get_feedopts {
 		}
 		$url_base = "$base/$listname";
 		if (my $mid = $ctx->{mid}) { # per-thread feed:
-			$rv{atomurl} = "$url_base/t/$mid/atom";
+			$rv{atomurl} = "$url_base/$mid/t.atom";
 		} else {
 			$rv{atomurl} = "$url_base/new.atom";
 		}
@@ -294,8 +294,7 @@ sub get_feedopts {
 		$rv{atomurl} = "$url_base/new.atom";
 	}
 	$rv{url} ||= "$url_base/";
-	$rv{midurl} = "$url_base/m/";
-	$rv{fullurl} = "$url_base/f/";
+	$rv{midurl} = "$url_base/";
 
 	\%rv;
 }
@@ -317,14 +316,15 @@ sub add_to_feed {
 	my ($feed_opts, $fh, $add, $git) = @_;
 
 	my $mime = do_cat_mail($git, $add) or return 0;
-	my $fullurl = $feed_opts->{fullurl} || 'http://example.com/f/';
+	my $url = $feed_opts->{url};
+	my $midurl = $feed_opts->{midurl};
 
 	my $header_obj = $mime->header_obj;
 	my $mid = $header_obj->header('Message-ID');
 	defined $mid or return 0;
 	$mid = PublicInbox::Hval->new_msgid($mid);
-	my $href = $mid->as_href . '/';
-	my $content = PublicInbox::View->feed_entry($mime, $fullurl . $href);
+	my $href = $mid->as_href;
+	my $content = PublicInbox::View->feed_entry($mime, "$midurl$href/f/");
 	defined($content) or return 0;
 	$mime = undef;
 
@@ -355,8 +355,7 @@ sub add_to_feed {
 	my $h = '[a-f0-9]';
 	my (@uuid5) = ($add =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o);
 	my $id = 'urn:uuid:' . join('-', @uuid5);
-	my $midurl = $feed_opts->{midurl};
-	$fh->write(qq{</div></content><link\nhref="$midurl$href"/>}.
+	$fh->write(qq!</div></content><link\nhref="$midurl$href/"/>!.
 		   "<id>$id</id></entry>");
 	1;
 }
@@ -414,7 +413,7 @@ sub dump_topics {
 		$mid = PublicInbox::Hval->new($mid)->as_href;
 		$subj = PublicInbox::Hval->new($subj)->as_html;
 		$u = PublicInbox::Hval->new($u)->as_html;
-		$dst .= "\n<a\nhref=\"t/$mid/#u\"><b>$subj</b></a>\n- ";
+		$dst .= "\n<a\nhref=\"$mid/t/#u\"><b>$subj</b></a>\n- ";
 		$ts = strftime('%Y-%m-%d %H:%M', gmtime($ts));
 		if ($n == 1) {
 			$dst .= "created by $u @ $ts UTC\n"
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index a30bf70..2be16b4 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -80,7 +80,7 @@ sub index_entry {
 		$anchor = $seen->{$anchor_idx};
 	}
 	if ($srch) {
-		$subj = "<a\nhref=\"${path}t/$href/#u\">$subj</a>";
+		$subj = "<a\nhref=\"${path}$href/t/#u\">$subj</a>";
 	}
 	if ($root_anchor && $root_anchor eq $id) {
 		$subj = "<u\nid=\"u\">$subj</u>";
@@ -101,9 +101,9 @@ sub index_entry {
 	$fh->write($rv .= "\n\n");
 
 	my ($fhref, $more_ref);
-	my $mhref = "${path}m/$href/";
+	my $mhref = "${path}$href/";
 	if ($level > 0) {
-		$fhref = "${path}f/$href/";
+		$fhref = "${path}$href/f/";
 		$more_ref = \$more;
 	}
 	# scan through all parts, looking for displayable text
@@ -112,7 +112,7 @@ sub index_entry {
 	});
 	$mime->body_set('');
 
-	my $txt = "${path}m/$href/raw";
+	my $txt = "${path}$href/raw";
 	$rv = "\n<a\nhref=\"$mhref\">$more</a> <a\nhref=\"$txt\">raw</a> ";
 	$rv .= html_footer($mime, 0, undef, $ctx);
 
@@ -120,7 +120,7 @@ sub index_entry {
 		unless (defined $anchor) {
 			my $v = PublicInbox::Hval->new_msgid($irt);
 			$v = $v->as_href;
-			$anchor = "${path}m/$v/";
+			$anchor = "${path}$v/";
 			$seen->{$anchor_idx} = $anchor;
 		}
 		$rv .= " <a\nhref=\"$anchor\">parent</a>";
@@ -160,8 +160,8 @@ sub emit_thread_html {
 	my $next = "<a\nid=\"s$final_anchor\">";
 	$next .= $final_anchor == 1 ? 'only message in' : 'end of';
 	$next .= " thread</a>, back to <a\nhref=\"../../\">index</a>\n";
-	$next .= "download: <a\nhref=\"mbox.gz\">mbox.gz</a>";
-	$next .= " / <a\nhref=\"atom\">Atom feed</a>\n\n";
+	$next .= "download: <a\nhref=\"../t.mbox.gz\">mbox.gz</a>";
+	$next .= " / <a\nhref=\"../t.atom\">Atom feed</a>\n\n";
 	$fh->write("<hr />" . PRE_WRAP . $next . $foot .
 		   "</pre></body></html>");
 	$fh->close;
@@ -349,8 +349,8 @@ sub headers_to_html_header {
 		} elsif ($h eq 'Subject') {
 			$title[0] = $v->as_html;
 			if ($srch) {
-				$rv .= "$h: <a\nid=\"t\"\n" .
-					"href=\"../../t/$mid_href/\">";
+				my $p = $full_pfx ? '' : '../';
+				$rv .= "$h: <a\nid=\"t\"\nhref=\"${p}t/#u\">";
 				$rv .= $v->as_html . "</a>\n";
 				next;
 			}
@@ -359,7 +359,7 @@ sub headers_to_html_header {
 
 	}
 	$rv .= 'Message-ID: &lt;' . $mid->as_html . '&gt; ';
-	my $raw_ref = $full_pfx ? 'raw' : "../../m/$mid_href/raw";
+	my $raw_ref = $full_pfx ? 'raw' : '../raw';
 	$rv .= "(<a\nhref=\"$raw_ref\">raw</a>)\n";
 	if ($srch) {
 		$rv .= "<a\nhref=\"#r\">References: [see below]</a>\n";
@@ -373,7 +373,7 @@ sub headers_to_html_header {
 }
 
 sub thread_inline {
-	my ($dst, $ctx, $cur) = @_;
+	my ($dst, $ctx, $cur, $full_pfx) = @_;
 	my $srch = $ctx->{srch};
 	my $mid = mid_compress(mid_clean($cur->header('Message-ID')));
 	my $res = $srch->get_thread($mid);
@@ -383,9 +383,10 @@ sub thread_inline {
 		$$dst .= "\n[no followups, yet]</a>\n";
 		return;
 	}
+	my $upfx = $full_pfx ? '' : '../';
 
 	$$dst .= "\n\n~$nr messages in thread: ".
-		 "(<a\nhref=\"../../t/$mid/#u\">expand</a>)\n";
+		 "(<a\nhref=\"${upfx}t/#u\">expand</a>)\n";
 	my $subj = $srch->subject_path($cur->header('Subject'));
 	my $state = {
 		seen => { $subj => 1 },
@@ -393,7 +394,7 @@ sub thread_inline {
 		cur => $mid,
 	};
 	for (thread_results(load_results($res))->rootset) {
-		inline_dump($dst, $state, $_, 0);
+		inline_dump($dst, $state, $upfx, $_, 0);
 	}
 	$state->{next_msg};
 }
@@ -461,19 +462,20 @@ sub html_footer {
 	my $href = "mailto:$to?In-Reply-To=$irt&Cc=${cc}&Subject=$subj";
 
 	my $srch = $ctx->{srch} if $ctx;
-	my $idx = $standalone ? " <a\nhref=\"../../\">index</a>" : '';
+	my $upfx = $full_pfx ? '../' : '../../';
+	my $idx = $standalone ? "<a\nhref=\"$upfx\">index</a>" : '';
 	if ($idx && $srch) {
-		my $next = thread_inline(\$idx, $ctx, $mime);
+		my $next = thread_inline(\$idx, $ctx, $mime, $full_pfx);
 		$irt = $mime->header('In-Reply-To');
 		if (defined $irt) {
 			$irt = PublicInbox::Hval->new_msgid($irt);
 			$irt = $irt->as_href;
-			$irt = "<a\nhref=\"../$irt/\">parent</a> ";
+			$irt = "<a\nhref=\"$upfx$irt/\">parent</a> ";
 		} else {
 			$irt = ' ' x length('parent ');
 		}
 		if ($next) {
-			$irt .= "<a\nhref=\"../$next/\">next</a> ";
+			$irt .= "<a\nhref=\"$upfx$next/\">next</a> ";
 		} else {
 			$irt .= '     ';
 		}
@@ -564,7 +566,7 @@ sub _msg_date {
 }
 
 sub _inline_header {
-	my ($dst, $state, $mime, $level) = @_;
+	my ($dst, $state, $upfx, $mime, $level) = @_;
 	my $pfx = '  ' x $level;
 
 	my $cur = $state->{cur};
@@ -601,7 +603,7 @@ sub _inline_header {
 		$s = $s->as_html;
 	}
 	my $m = PublicInbox::Hval->new_msgid($mid);
-	$m = '../' . $m->as_href . '/';
+	$m = $upfx . '../' . $m->as_href . '/';
 	if (defined $s) {
 		$$dst .= "$pfx` <a\nhref=\"$m\">$s</a>\n" .
 		         "$pfx  $f @ $d\n";
@@ -611,14 +613,14 @@ sub _inline_header {
 }
 
 sub inline_dump {
-	my ($dst, $state, $node, $level) = @_;
+	my ($dst, $state, $upfx, $node, $level) = @_;
 	return unless $node;
 	return if $state->{stopped};
 	if (my $mime = $node->message) {
-		_inline_header($dst, $state, $mime, $level);
+		_inline_header($dst, $state, $upfx, $mime, $level);
 	}
-	inline_dump($dst, $state, $node->child, $level+1);
-	inline_dump($dst, $state, $node->next, $level);
+	inline_dump($dst, $state, $upfx, $node->child, $level+1);
+	inline_dump($dst, $state, $upfx, $node->next, $level);
 }
 
 1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index a9cb6d7..d666a1b 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -16,6 +16,7 @@ use URI::Escape qw(uri_escape_utf8 uri_unescape);
 use constant SSOMA_URL => 'http://ssoma.public-inbox.org/';
 use constant PI_URL => 'http://public-inbox.org/';
 our $LISTNAME_RE = qr!\A/([\w\.\-]+)!;
+our $MID_RE = qr!([^/]+)!;
 our $pi_config;
 
 sub run {
@@ -31,56 +32,37 @@ sub run {
 	if ($path_info eq '/') {
 		r404();
 	} elsif ($path_info =~ m!$LISTNAME_RE\z!o) {
-		invalid_list(\%ctx, $1) || redirect_list_index($cgi);
+		invalid_list(\%ctx, $1) || r301(\%ctx, $1);
 	} elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) {
 		invalid_list(\%ctx, $1) || get_index(\%ctx);
 	} elsif ($path_info =~ m!$LISTNAME_RE/(?:atom\.xml|new\.atom)\z!o) {
 		invalid_list(\%ctx, $1) || get_atom(\%ctx);
 
+	# thread display
+	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t/\z!o) {
+		invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
+	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t\.mbox(\.gz)?\z!o) {
+		my $sfx = $3;
+		invalid_list_mid(\%ctx, $1, $2) || get_thread_mbox(\%ctx, $sfx);
+	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t\.atom\z!o) {
+		invalid_list_mid(\%ctx, $1, $2) || get_thread_atom(\%ctx);
+
 	# single-message pages
-	} elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)/\z!o) {
+	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/\z!o) {
 		invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx);
-	} elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)/raw\z!o) {
+	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/raw\z!o) {
 		invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx);
 
 	# full-message page
-	} elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)/\z!o) {
+	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/f/\z!o) {
 		invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx);
 
-	# thread display
-	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/\z!o) {
-		invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
-
-	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!o) {
-		my $sfx = $3;
-		invalid_list_mid(\%ctx, $1, $2) ||
-			get_thread_mbox(\%ctx, $sfx);
-
-	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/atom\z!o) {
-		invalid_list_mid(\%ctx, $1, $2) || get_thread_atom(\%ctx);
-
-	# legacy redirects
-	} elsif ($path_info =~ m!$LISTNAME_RE/(t|m|f)/(\S+)\.html\z!o) {
-		my $pfx = $2;
-		invalid_list_mid(\%ctx, $1, $3) ||
-			redirect_mid(\%ctx, $pfx, qr/\.html\z/, '/');
-	} elsif ($path_info =~ m!$LISTNAME_RE/(m|f)/(\S+)\.txt\z!o) {
-		my $pfx = $2;
-		invalid_list_mid(\%ctx, $1, $3) ||
-			redirect_mid(\%ctx, $pfx, qr/\.txt\z/, '/raw');
-	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)(\.mbox(?:\.gz)?)\z!o) {
-		my $end = $3;
-		invalid_list_mid(\%ctx, $1, $2) ||
-			redirect_mid(\%ctx, 't', $end, '/mbox.gz');
-
-	# convenience redirects, order matters
-	} elsif ($path_info =~ m!$LISTNAME_RE/(m|f|t|s)/(\S+)\z!o) {
-		my $pfx = $2;
-		invalid_list_mid(\%ctx, $1, $3) ||
-			redirect_mid(\%ctx, $pfx, qr/\z/, '/');
+	# convenience redirects order matters
+	} elsif ($path_info =~ m!$LISTNAME_RE/([^/]{2,})\z!o) {
+		r301(\%ctx, $1, $2);
 
 	} else {
-		r404();
+		legacy_redirects(\%ctx, $path_info);
 	}
 }
 
@@ -163,7 +145,7 @@ sub mid2blob {
 	}
 }
 
-# /$LISTNAME/m/$MESSAGE_ID.txt                    -> raw mbox
+# /$LISTNAME/$MESSAGE_ID/raw                    -> raw mbox
 sub get_mid_txt {
 	my ($ctx) = @_;
 	my $x = mid2blob($ctx) or return r404();
@@ -171,22 +153,21 @@ sub get_mid_txt {
 	PublicInbox::Mbox::emit1($x);
 }
 
-# /$LISTNAME/m/$MESSAGE_ID.html                   -> HTML content (short quotes)
+# /$LISTNAME/$MESSAGE_ID/                   -> HTML content (short quotes)
 sub get_mid_html {
 	my ($ctx) = @_;
 	my $x = mid2blob($ctx) or return r404();
 
 	require PublicInbox::View;
-	my $pfx = msg_pfx($ctx);
 	my $foot = footer($ctx);
 	require Email::MIME;
 	my $mime = Email::MIME->new($x);
 	searcher($ctx);
 	[ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
-	  [ PublicInbox::View::msg_html($ctx, $mime, $pfx, $foot) ] ];
+	  [ PublicInbox::View::msg_html($ctx, $mime, 'f/', $foot) ] ];
 }
 
-# /$LISTNAME/f/$MESSAGE_ID.html                   -> HTML content (fullquotes)
+# /$LISTNAME/$MESSAGE_ID/f/                   -> HTML content (fullquotes)
 sub get_full_html {
 	my ($ctx) = @_;
 	my $x = mid2blob($ctx) or return r404();
@@ -200,7 +181,7 @@ sub get_full_html {
 	  [ PublicInbox::View::msg_html($ctx, $mime, undef, $foot)] ];
 }
 
-# /$LISTNAME/t/$MESSAGE_ID.html
+# /$LISTNAME/$MESSAGE_ID/t/
 sub get_thread {
 	my ($ctx) = @_;
 	my $srch = searcher($ctx) or return need_search($ctx);
@@ -214,39 +195,6 @@ sub self_url {
 	ref($cgi) eq 'CGI' ? $cgi->self_url : $cgi->uri->as_string;
 }
 
-sub redirect_list_index {
-	my ($cgi) = @_;
-	do_redirect(self_url($cgi) . "/");
-}
-
-sub redirect_mid {
-	my ($ctx, $pfx, $old, $sfx) = @_;
-	my $url = self_url($ctx->{cgi});
-	my $anchor = '';
-	if (lc($pfx) eq 't' && $sfx eq '/') {
-		$anchor = '#u'; # <u id='#u'> is used to highlight in View.pm
-	}
-	$url =~ s/$old/$sfx/;
-	do_redirect($url . $anchor);
-}
-
-# only hit when somebody tries to guess URLs manually:
-sub redirect_mid_txt {
-	my ($ctx, $pfx) = @_;
-	my $listname = $ctx->{listname};
-	my $url = self_url($ctx->{cgi});
-	$url =~ s!/$listname/f/(\S+\.txt)\z!/$listname/m/$1!;
-	do_redirect($url);
-}
-
-sub do_redirect {
-	my ($url) = @_;
-	[ 301,
-	  [ Location => $url, 'Content-Type' => 'text/plain' ],
-	  [ "Redirecting to $url\n" ]
-	]
-}
-
 sub ctx_get {
 	my ($ctx, $key) = @_;
 	my $val = $ctx->{$key};
@@ -333,14 +281,8 @@ EOF
 	[ 501, [ 'Content-Type' => 'text/html; charset=UTF-8' ], [ $msg ] ];
 }
 
-sub msg_pfx {
-	my ($ctx) = @_;
-	my $href = PublicInbox::Hval::ascii_html(uri_escape_utf8($ctx->{mid}));
-	"../../f/$href/";
-}
-
-# /$LISTNAME/t/$MESSAGE_ID/mbox           -> thread as mbox
-# /$LISTNAME/t/$MESSAGE_ID/mbox.gz        -> thread as gzipped mbox
+# /$LISTNAME/$MESSAGE_ID/t.mbox           -> thread as mbox
+# /$LISTNAME/$MESSAGE_ID/t.mbox.gz        -> thread as gzipped mbox
 # note: I'm not a big fan of other compression formats since they're
 # significantly more expensive on CPU than gzip and less-widely available,
 # especially on older systems.  Stick to zlib since that's what git uses.
@@ -352,7 +294,7 @@ sub get_thread_mbox {
 }
 
 
-# /$LISTNAME/t/$MESSAGE_ID/atom		  -> thread as Atom feed
+# /$LISTNAME/$MESSAGE_ID/t.atom		  -> thread as Atom feed
 sub get_thread_atom {
 	my ($ctx) = @_;
 	searcher($ctx) or return need_search($ctx);
@@ -361,4 +303,71 @@ sub get_thread_atom {
 	PublicInbox::Feed::generate_thread_atom($ctx);
 }
 
+sub legacy_redirects {
+	my ($ctx, $path_info) = @_;
+
+	# single-message pages
+	if ($path_info =~ m!$LISTNAME_RE/m/(\S+)/\z!o) {
+		r301($ctx, $1, $2);
+	} elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)/raw\z!o) {
+		r301($ctx, $1, $2, 'raw');
+
+	} elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)/\z!o) {
+		r301($ctx, $1, $2, 'f/');
+
+	# thread display
+	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/\z!o) {
+		r301($ctx, $1, $2, 't/#u');
+
+	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!o) {
+		r301($ctx, $1, $2, "t.mbox$3");
+
+	# even older legacy redirects
+	} elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.html\z!o) {
+		r301($ctx, $1, $2);
+
+	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.html\z!o) {
+		r301($ctx, $1, $2, 't/#u');
+
+	} elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\.html\z!o) {
+		r301($ctx, $1, $2, 'f/');
+
+	} elsif ($path_info =~ m!$LISTNAME_RE/(?:m|f)/(\S+)\.txt\z!o) {
+		r301($ctx, $1, $2, 'raw');
+
+	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)(\.mbox(?:\.gz)?)\z!o) {
+		r301($ctx, $1, $2, "t$3");
+
+	# legacy convenience redirects, order still matters
+	} elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\z!o) {
+		r301($ctx, $1, $2);
+	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\z!o) {
+		r301($ctx, $1, $2, 't/#u');
+	} elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\z!o) {
+		r301($ctx, $1, $2, 'f/');
+
+	} else {
+		r404();
+	}
+}
+
+sub r301 {
+	my ($ctx, $listname, $mid, $suffix) = @_;
+	my $cgi = $ctx->{cgi};
+	my $url;
+	if (ref($cgi) eq 'CGI') {
+		$url = $cgi->url(-base) . '/';
+	} else {
+		$url = $cgi->base->as_string;
+	}
+
+	$url .= $listname . '/';
+	$url .= (uri_escape_utf8($mid) . '/') if (defined $mid);
+	$url .= $suffix if (defined $suffix);
+
+	[ 301,
+	  [ Location => $url, 'Content-Type' => 'text/plain' ],
+	  [ "Redirecting to $url\n" ] ]
+}
+
 1;
diff --git a/t/cgi.t b/t/cgi.t
index d84e634..a6600c2 100644
--- a/t/cgi.t
+++ b/t/cgi.t
@@ -109,7 +109,7 @@ EOF
 	like($res->{body}, qr/<title>test for public-inbox/,
 		"set title in XML feed");
 	like($res->{body},
-		qr!http://test\.example\.com/test/m/blah%40example\.com!,
+		qr!http://test\.example\.com/test/blah%40example\.com/!,
 		"link id set");
 	like($res->{body}, qr/what\?/, "reply included");
 }
@@ -152,26 +152,26 @@ EOF
 	}
 	local $ENV{GIT_DIR} = $maindir;
 
-	my $res = cgi_run("/test/m/slashy%2fasdf%40example.com/raw");
+	my $res = cgi_run("/test/slashy%2fasdf%40example.com/raw");
 	like($res->{body}, qr/Message-Id: <\Q$slashy_mid\E>/,
 		"slashy mid raw hit");
 
-	$res = cgi_run("/test/m/blahblah\@example.com/raw");
+	$res = cgi_run("/test/blahblah\@example.com/raw");
 	like($res->{body}, qr/Message-Id: <blahblah\@example\.com>/,
 		"mid raw hit");
-	$res = cgi_run("/test/m/blahblah\@example.con/raw");
+	$res = cgi_run("/test/blahblah\@example.con/raw");
 	like($res->{head}, qr/Status: 404 Not Found/, "mid raw miss");
 
-	$res = cgi_run("/test/m/blahblah\@example.com/");
+	$res = cgi_run("/test/blahblah\@example.com/");
 	like($res->{body}, qr/\A<html>/, "mid html hit");
 	like($res->{head}, qr/Status: 200 OK/, "200 response");
-	$res = cgi_run("/test/m/blahblah\@example.con/");
+	$res = cgi_run("/test/blahblah\@example.con/");
 	like($res->{head}, qr/Status: 404 Not Found/, "mid html miss");
 
-	$res = cgi_run("/test/f/blahblah\@example.com/");
+	$res = cgi_run("/test/blahblah\@example.com/f/");
 	like($res->{body}, qr/\A<html>/, "mid html");
 	like($res->{head}, qr/Status: 200 OK/, "200 response");
-	$res = cgi_run("/test/f/blahblah\@example.con/");
+	$res = cgi_run("/test/blahblah\@example.con/f/");
 	like($res->{head}, qr/Status: 404 Not Found/, "mid html miss");
 
 	$res = cgi_run("/test/");
@@ -183,7 +183,7 @@ EOF
 {
 	local $ENV{HOME} = $home;
 	local $ENV{PATH} = $main_path;
-	my $path = "/test/t/blahblah%40example.com/mbox.gz";
+	my $path = "/test/blahblah%40example.com/t.mbox.gz";
 	my $res = cgi_run($path);
 	like($res->{head}, qr/^Status: 501 /, "search not-yet-enabled");
 	my $indexed = system($index, $maindir) == 0;
@@ -203,7 +203,7 @@ EOF
 
 	my $have_xml_feed = eval { require XML::Feed; 1 } if $indexed;
 	if ($have_xml_feed) {
-		$path = "/test/t/blahblah%40example.com/atom";
+		$path = "/test/blahblah%40example.com/t.atom";
 		$res = cgi_run($path);
 		like($res->{head}, qr/^Status: 200 /, "atom returned 200");
 		like($res->{head}, qr!^Content-Type: application/xml!m,
diff --git a/t/feed.t b/t/feed.t
index a9955f0..e4ec752 100644
--- a/t/feed.t
+++ b/t/feed.t
@@ -77,7 +77,7 @@ EOF
 		}
 
 		unlike($feed, qr/drop me/, "long quoted text dropped");
-		like($feed, qr!/f/\d%40example\.com/#q!,
+		like($feed, qr!/\d%40example\.com/f/#q!,
 			"/f/ url generated for long quoted text");
 		like($feed, qr/inline me here/, "short quoted text kept");
 		like($feed, qr/keep me/, "unquoted text saved");
diff --git a/t/plack.t b/t/plack.t
index 50c9e60..067a593 100644
--- a/t/plack.t
+++ b/t/plack.t
@@ -88,7 +88,7 @@ EOF
 		is(200, $res->code, 'success response received');
 		like($res->content, qr!href="\Q$atomurl\E"!,
 			'atom URL generated');
-		like($res->content, qr!href="m/blah%40example\.com/"!,
+		like($res->content, qr!href="blah%40example\.com/"!,
 			'index generated');
 	});
 
@@ -98,14 +98,14 @@ EOF
 		my $res = $cb->(GET($pfx . '/atom.xml'));
 		is(200, $res->code, 'success response received for atom');
 		like($res->content,
-			qr!link\s+href="\Q$pfx\E/m/blah%40example\.com/"!s,
+			qr!link\s+href="\Q$pfx\E/blah%40example\.com/"!s,
 			'atom feed generated correct URL');
 	});
 
-	foreach my $t (qw(f m)) {
+	foreach my $t (('', 'f/')) {
 		test_psgi($app, sub {
 			my ($cb) = @_;
-			my $path = "/$t/blah%40example.com/";
+			my $path = "/blah%40example.com/$t";
 			my $res = $cb->(GET($pfx . $path));
 			is(200, $res->code, "success for $path");
 			like($res->content, qr!<title>hihi - Me</title>!,
@@ -114,8 +114,8 @@ EOF
 	}
 	test_psgi($app, sub {
 		my ($cb) = @_;
-		my $res = $cb->(GET($pfx . '/m/blah%40example.com/raw'));
-		is(200, $res->code, 'success response received for /m/*/raw');
+		my $res = $cb->(GET($pfx . '/blah%40example.com/raw'));
+		is(200, $res->code, 'success response received for /*/raw');
 		like($res->content, qr!\AFrom !, "mbox returned");
 	});
 
@@ -126,18 +126,25 @@ EOF
 			my $res = $cb->(GET($pfx . "/$t/blah%40example.com.txt"));
 			is(301, $res->code, "redirect for old $t .txt link");
 			my $location = $res->header('Location');
-			like($location, qr!/$t/blah%40example\.com/raw\z!,
+			like($location, qr!/blah%40example\.com/raw\z!,
 				".txt redirected to /raw");
 		});
 	}
-	foreach my $t (qw(m f t)) {
+
+	my %umap = (
+		'm' => '',
+		'f' => 'f/',
+		't' => 't/',
+	);
+	while (my ($t, $e) = each %umap) {
 		test_psgi($app, sub {
 			my ($cb) = @_;
 			my $res = $cb->(GET($pfx . "/$t/blah%40example.com.html"));
 			is(301, $res->code, "redirect for old $t .html link");
 			my $location = $res->header('Location');
-			like($location, qr!/$t/blah%40example\.com/(?:#u)?\z!,
-				".html redirected to /raw");
+			like($location,
+				qr!/blah%40example\.com/$e(?:#u)?\z!,
+				".html redirected to new location");
 		});
 	}
 	foreach my $sfx (qw(mbox mbox.gz)) {
@@ -146,8 +153,9 @@ EOF
 			my $res = $cb->(GET($pfx . "/t/blah%40example.com.$sfx"));
 			is(301, $res->code, 'redirect for old thread link');
 			my $location = $res->header('Location');
-			like($location, qr!/t/blah%40example\.com/mbox\.gz\z!,
-				"$sfx redirected to /mbox.gz");
+			like($location,
+			     qr!/blah%40example\.com/t\.mbox(?:\.gz)?\z!,
+			     "$sfx redirected to /mbox.gz");
 		});
 	}
 }
diff --git a/t/view.t b/t/view.t
index 77cf3a3..83823d8 100644
--- a/t/view.t
+++ b/t/view.t
@@ -44,17 +44,17 @@ EOF
 	my $html = PublicInbox::View::msg_html(undef, $mime);
 
 	# ghetto tests
-	like($html, qr!<a\nhref="\.\./\.\./m/hello%40!s, "MID link present");
+	like($html, qr!<a\nhref="\.\./raw"!s, "raw link present");
 	like($html, qr/hello world\b/, "body present");
 	like($html, qr/&gt; keep this inline/, "short quoted text is inline");
 	like($html, qr/<a\nid=[^>]+><\/a>&gt; Long and wordy/,
 		"long quoted text is anchored");
 
 	# short page
-	my $pfx = "../../f/hello%40example.com/";
+	my $pfx = "../hello%40example.com/f/";
 	$mime = Email::MIME->new($s);
 	my $short = PublicInbox::View::msg_html(undef, $mime, $pfx);
-	like($short, qr!<a\nhref="\.\./\.\./f/hello%40example\.com/!s,
+	like($short, qr!<a\nhref="\.\./hello%40example\.com/f/!s,
 		"MID link present");
 	like($short, qr/\n&gt; keep this inline/,
 		"short quoted text is inline");
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 10/11] view: drop extra '</a>' tag
  2015-09-01  8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
                   ` (7 preceding siblings ...)
  2015-09-01  8:55 ` [PATCH 09/11] completely revamp URL structure to shorten permalinks Eric Wong
@ 2015-09-01  8:55 ` Eric Wong
  2015-09-01  8:55 ` [PATCH 11/11] view: more robust link generation Eric Wong
  9 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01  8:55 UTC (permalink / raw)
  To: meta

Oops.
---
 lib/PublicInbox/View.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 2be16b4..45f559e 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -380,7 +380,7 @@ sub thread_inline {
 	my $nr = $res->{total};
 
 	if ($nr <= 1) {
-		$$dst .= "\n[no followups, yet]</a>\n";
+		$$dst .= "\n[no followups, yet]\n";
 		return;
 	}
 	my $upfx = $full_pfx ? '' : '../';
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 11/11] view: more robust link generation
  2015-09-01  8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
                   ` (8 preceding siblings ...)
  2015-09-01  8:55 ` [PATCH 10/11] view: drop extra '</a>' tag Eric Wong
@ 2015-09-01  8:55 ` Eric Wong
  2015-09-01  9:08   ` [PATCH 12/11] view: add missing space Eric Wong
  9 siblings, 1 reply; 13+ messages in thread
From: Eric Wong @ 2015-09-01  8:55 UTC (permalink / raw)
  To: meta

We must avoid double-escaping in cases where we have URLs anchored
by "<>" in the plain-text as is common (and AFAIK recommended)
convention.  So we must use a two step linkification process
to prevent double-escaping.
---
 lib/PublicInbox/View.pm | 62 +++++++++++++++++++++++++++++++++++++------------
 1 file changed, 47 insertions(+), 15 deletions(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 45f559e..3d7ba6f 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -10,7 +10,8 @@ use Encode::MIME::Header;
 use Email::MIME::ContentType qw/parse_content_type/;
 use PublicInbox::Hval;
 use PublicInbox::MID qw/mid_clean mid_compress mid2path/;
-use Digest::SHA;
+use Digest::SHA qw/sha1_hex/;
+my $SALT = rand;
 require POSIX;
 
 # TODO: make these constants tunable
@@ -235,10 +236,35 @@ my $LINK_RE = qr!\b((?:ftp|https?|nntp)://
 		 [\@:\w\.-]+/
 		 ?[\@\w\+\&\?\.\%\;/#=-]*)!x;
 
-sub linkify {
-	# no newlines added here since it'd break the splitting we do
-	# to fold quotes
-	$_[0] =~ s!$LINK_RE!<a\nhref="$1">$1</a>!g;
+sub linkify_1 {
+	my ($link_map, $s) = @_;
+	$s =~ s!$LINK_RE!
+		my $url = $1;
+		# salt this, as this could be exploited to show
+		# links in the HTML which don't show up in the raw mail.
+		my $key = sha1_hex($url . $SALT);
+		$link_map->{$key} = $url;
+		'PI-LINK-'. $key;
+	!ge;
+	$s;
+}
+
+sub linkify_2 {
+	my ($link_map, $s) = @_;
+
+	# Added "PI-LINK-" prefix to avoid false-positives on git commits
+	$s =~ s!\bPI-LINK-([a-f0-9]{40})\b!
+		my $key = $1;
+		my $url = $link_map->{$key};
+		if (defined $url) {
+			$url = ascii_html($url);
+			"<a\nhref=\"$url\">$url</a>";
+		} else {
+			# false positive or somebody tried to mess with us
+			$key;
+		}
+	!ge;
+	$s;
 }
 
 sub flush_quote {
@@ -247,13 +273,15 @@ sub flush_quote {
 	if ($full_pfx) {
 		if (!$final && scalar(@$quot) <= MAX_INLINE_QUOTED) {
 			# show quote inline
-			my $rv = join('', map { linkify($_); $_ } @$quot);
+			my %l;
+			my $rv = join('', map { linkify_1(\%l, $_) } @$quot);
 			@$quot = ();
-			return $rv;
+			$rv = ascii_html($rv);
+			return linkify_2(\%l, $rv);
 		}
 
 		# show a short snippet of quoted text and link to full version:
-		@$quot = map { s/^(?:&gt;\s*)+//gm; $_ } @$quot;
+		@$quot = map { s/^(?:>\s*)+//gm; $_ } @$quot;
 		my $cur = join(' ', @$quot);
 		@$quot = split(/\s+/, $cur);
 		$cur = '';
@@ -268,16 +296,19 @@ sub flush_quote {
 		} while (@$quot && length($cur) < MAX_TRUNC_LEN);
 		@$quot = ();
 		$cur =~ s/ \z/ .../s;
+		$cur = ascii_html($cur);
 		my $nr = ++$$n;
 		"&gt; [<a\nhref=\"$full_pfx#q${part_nr}_$nr\">$cur</a>]\n";
 	} else {
 		# show everything in the full version with anchor from
 		# short version (see above)
 		my $nr = ++$$n;
-		my $rv = "<a\nid=q${part_nr}_$nr></a>";
-		$rv .= join('', map { linkify($_); $_ } @$quot);
+		my $rv = "";
+		my %l;
+		$rv .= join('', map { linkify_1(\%l, $_) } @$quot);
 		@$quot = ();
-		$rv;
+		$rv = ascii_html($rv);
+		"<a\nid=q${part_nr}_$nr></a>" . linkify_2(\%l, $rv);
 	}
 }
 
@@ -297,7 +328,6 @@ sub add_text_body {
 	my $s = $part->body;
 	$part->body_set('');
 	$s = $enc->decode($s);
-	$s = ascii_html($s);
 	my @lines = split(/^/m, $s);
 	$s = '';
 
@@ -309,7 +339,7 @@ sub add_text_body {
 
 	my @quot;
 	while (defined(my $cur = shift @lines)) {
-		if ($cur !~ /^&gt;/) {
+		if ($cur !~ /^>/) {
 			# show the previously buffered quote inline
 			if (scalar @quot) {
 				$s .= flush_quote(\@quot, \$n, $$part_nr,
@@ -317,8 +347,10 @@ sub add_text_body {
 			}
 
 			# regular line, OK
-			linkify($cur);
-			$s .= $cur;
+			my %l;
+			$cur = linkify_1(\%l, $cur);
+			$cur = ascii_html($cur);
+			$s .= linkify_2(\%l, $cur);
 		} else {
 			push @quot, $cur;
 		}
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 12/11] view: add missing space
  2015-09-01  8:55 ` [PATCH 11/11] view: more robust link generation Eric Wong
@ 2015-09-01  9:08   ` Eric Wong
  0 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01  9:08 UTC (permalink / raw)
  To: meta

This fixes a regression introduced in
commit 1b4b2c7b8b2f2df8f114617d2e875eaf5c839ce0
("completely revamp URL structure to shorten permalinks")
---
 lib/PublicInbox/View.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 3d7ba6f..29888f9 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -495,7 +495,7 @@ sub html_footer {
 
 	my $srch = $ctx->{srch} if $ctx;
 	my $upfx = $full_pfx ? '../' : '../../';
-	my $idx = $standalone ? "<a\nhref=\"$upfx\">index</a>" : '';
+	my $idx = $standalone ? " <a\nhref=\"$upfx\">index</a>" : '';
 	if ($idx && $srch) {
 		my $next = thread_inline(\$idx, $ctx, $mime, $full_pfx);
 		$irt = $mime->header('In-Reply-To');
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [13/11 PATCH] feed: fix <updated> tag in Atom feed
  2015-09-01  8:55 ` [PATCH 06/11] implement per-thread Atom feeds Eric Wong
@ 2015-09-01  9:30   ` Eric Wong
  0 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01  9:30 UTC (permalink / raw)
  To: meta

Fixes commit d44ed46ee92c78aaaed64975c4d6846613963be4
("implement per-thread Atom feeds")
---
 lib/PublicInbox/Feed.pm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 4420fde..9615880 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -331,6 +331,7 @@ sub add_to_feed {
 	my $date = $header_obj->header('Date');
 	$date = PublicInbox::Hval->new_oneline($date);
 	$date = feed_date($date->raw) or return 0;
+	$date = "<updated>$date</updated>";
 
 	my $title = mime_header($header_obj, 'Subject') or return 0;
 	$title = title_tag($title);
@@ -342,8 +343,7 @@ sub add_to_feed {
 	$email = PublicInbox::Hval->new_oneline($email)->as_html;
 
 	if (delete $feed_opts->{emit_header}) {
-		$fh->write(atom_header($feed_opts, $title) .
-			   "<updated>$date</updated>");
+		$fh->write(atom_header($feed_opts, $title) . $date);
 	}
 	$fh->write("<entry><author><name>$name</name><email>$email</email>" .
 		   "</author>$title$date" .
-- 
EW


^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2015-09-01  9:30 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-09-01  8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
2015-09-01  8:55 ` [PATCH 02/11] search: allow querying all mail with '' Eric Wong
2015-09-01  8:55 ` [PATCH 03/11] search: show newest results first Eric Wong
2015-09-01  8:55 ` [PATCH 04/11] feed: use updated date based on git commit date Eric Wong
2015-09-01  8:55 ` [PATCH 05/11] feed: extract atom header generation Eric Wong
2015-09-01  8:55 ` [PATCH 06/11] implement per-thread Atom feeds Eric Wong
2015-09-01  9:30   ` [13/11 PATCH] feed: fix <updated> tag in Atom feed Eric Wong
2015-09-01  8:55 ` [PATCH 07/11] www: compile mbox regexp only once Eric Wong
2015-09-01  8:55 ` [PATCH 08/11] www: root atom feed is "new.atom" and not "atom.xml" Eric Wong
2015-09-01  8:55 ` [PATCH 09/11] completely revamp URL structure to shorten permalinks Eric Wong
2015-09-01  8:55 ` [PATCH 10/11] view: drop extra '</a>' tag Eric Wong
2015-09-01  8:55 ` [PATCH 11/11] view: more robust link generation Eric Wong
2015-09-01  9:08   ` [PATCH 12/11] view: add missing space Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).