* [PATCH 01/11] search: reduce redundant doc data
@ 2015-09-01 8:55 Eric Wong
2015-09-01 8:55 ` [PATCH 02/11] search: allow querying all mail with '' Eric Wong
` (9 more replies)
0 siblings, 10 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01 8:55 UTC (permalink / raw)
To: meta
Redundant document data increases our database size, pull the
smsg->mid off the unique term, the smsg->ts off the value, and
only generate the formatted display date off smsg->ts.
---
lib/PublicInbox/Search.pm | 7 ++++---
lib/PublicInbox/SearchIdx.pm | 2 --
lib/PublicInbox/SearchMsg.pm | 42 ++++++++++++++++++++++--------------------
lib/PublicInbox/View.pm | 15 ++++++++-------
4 files changed, 34 insertions(+), 32 deletions(-)
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index d3faaeb..b7b215f 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -4,8 +4,9 @@
package PublicInbox::Search;
use strict;
use warnings;
-use PublicInbox::SearchMsg;
+use constant TS => 0;
use Search::Xapian qw/:standard/;
+use PublicInbox::SearchMsg;
use Email::MIME;
use PublicInbox::MID qw/mid_clean mid_compress/;
@@ -15,7 +16,6 @@ our $REPLY_RE = qr/^re:\s+/i;
our $LANG = 'english';
use constant {
- TS => 0,
# SCHEMA_VERSION history
# 0 - initial
# 1 - subject_path is lower-cased
@@ -25,7 +25,8 @@ use constant {
# 5 - subject_path drops trailing '.'
# 6 - preserve References: order in document data
# 7 - remove references and inreplyto terms
- SCHEMA_VERSION => 7,
+ # 8 - remove redundant/unneeded document data
+ SCHEMA_VERSION => 8,
QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD,
};
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index dec3333..32e0714 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -81,8 +81,6 @@ sub add_message {
$doc->add_term(xpfx('path') . mid_compress($path));
}
- my $from = $smsg->from_name;
- my $date = $smsg->date;
my $ts = Search::Xapian::sortable_serialise($smsg->ts);
$doc->add_value(PublicInbox::Search::TS, $ts);
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 4ad8a0c..1821b07 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -13,6 +13,7 @@ use PublicInbox::MID qw/mid_clean mid_compress/;
use Encode qw/find_encoding/;
my $enc_utf8 = find_encoding('UTF-8');
our $PFX2TERM_RE = undef;
+use constant EPOCH_822 => 'Thu, 01 Jan 1970 00:00:00 +0000';
sub new {
my ($class, $mime) = @_;
@@ -30,13 +31,17 @@ sub wrap {
sub load_doc {
my ($class, $doc) = @_;
my $data = $doc->get_data;
+ my $ts = eval {
+ no strict 'subs';
+ $doc->get_value(PublicInbox::Search::TS);
+ };
+ $ts = Search::Xapian::sortable_unserialise($ts);
$data = $enc_utf8->decode($data);
- my ($mid, $subj, $from, $date, $refs) = split(/\n/, $data);
+ my ($subj, $from, $refs) = split(/\n/, $data);
bless {
doc => $doc,
- mid => $mid,
subject => $subj,
- date => $date,
+ ts => $ts,
from_name => $from,
references_sorted => $refs,
}, $class;
@@ -77,27 +82,13 @@ sub from_name {
sub ts {
my ($self) = @_;
- my $ts = $self->{ts};
- return $ts if $ts;
- $self->{ts} = eval {
- str2time($self->date || $self->mime->header('Date'))
- } || 0;
-}
-
-sub date {
- my ($self) = @_;
- my $date = $self->{date};
- return $date if $date;
- my $ts = eval { str2time($self->mime->header('Date')) };
- $self->{date} = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
+ $self->{ts} ||= eval { str2time($self->mime->header('Date')) } || 0;
}
sub to_doc_data {
my ($self) = @_;
- $self->mid . "\n" .
PublicInbox::Search::subject_summary($self->subject) . "\n" .
$self->from_name . "\n".
- $self->date . "\n" .
$self->references_sorted;
}
@@ -139,14 +130,23 @@ sub mini_mime {
my @h = (
Subject => $self->subject,
'X-PI-From' => $self->from_name,
- 'X-PI-Date' => $self->date,
'X-PI-TS' => $self->ts,
'Message-ID' => "<$self->{mid}>",
+
+ # prevent Email::Simple::Creator from running,
+ # this header is useless for threading as we use X-PI-TS
+ # for sorting and display:
+ 'Date' => EPOCH_822,
);
my $refs = $self->{references_sorted};
my $mime = Email::MIME->create(header_str => \@h);
- $mime->header_set('References', $refs) if (defined $refs);
+ my $h = $mime->header_obj;
+ $h->header_set('References', $refs) if (defined $refs);
+
+ # drop useless headers Email::MIME set for us
+ $h->header_set('Date');
+ $h->header_set('MIME-Version');
$mime;
}
@@ -155,6 +155,8 @@ sub mid {
if (defined $mid) {
$self->{mid} = $mid;
+ } elsif (my $rv = $self->{mid}) {
+ $rv;
} else {
$self->ensure_metadata; # needed for ghosts
$self->{mid} ||= $self->_extract_mid;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 584a2d7..477c4b6 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -86,12 +86,7 @@ sub index_entry {
$subj = "<u\nid=\"u\">$subj</u>";
}
- my $ts = $mime->header('X-PI-TS');
- unless (defined $ts) {
- $ts = msg_timestamp($mime);
- }
- $ts = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
-
+ my $ts = _msg_date($mime);
my $rv = "<table\nsummary=l$level><tr>";
if ($level) {
$rv .= '<td><pre>' . (' ' x $level) . '</pre></td>';
@@ -561,6 +556,12 @@ sub missing_thread {
EOF
}
+sub _msg_date {
+ my ($mime) = @_;
+ my $ts = $mime->header('X-PI-TS') || msg_timestamp($mime);
+ POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
+}
+
sub _inline_header {
my ($dst, $state, $mime, $level) = @_;
my $pfx = ' ' x $level;
@@ -568,7 +569,7 @@ sub _inline_header {
my $cur = $state->{cur};
my $mid = $mime->header('Message-ID');
my $f = $mime->header('X-PI-From');
- my $d = $mime->header('X-PI-Date');
+ my $d = _msg_date($mime);
$f = PublicInbox::Hval->new($f);
$d = PublicInbox::Hval->new($d);
$f = $f->as_html;
--
EW
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 02/11] search: allow querying all mail with ''
2015-09-01 8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
@ 2015-09-01 8:55 ` Eric Wong
2015-09-01 8:55 ` [PATCH 03/11] search: show newest results first Eric Wong
` (8 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01 8:55 UTC (permalink / raw)
To: meta
This makes dumping recent topics easier, hopefully.
---
lib/PublicInbox/Search.pm | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index b7b215f..831c4fd 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -74,10 +74,14 @@ sub reopen { $_[0]->{xdb}->reopen }
# read-only
sub query {
my ($self, $query_string, $opts) = @_;
- my $query = $self->qp->parse_query($query_string, QP_FLAGS);
+ my $query;
$opts ||= {};
- $opts->{relevance} = 1;
+ unless ($query_string eq '') {
+ $query = $self->qp->parse_query($query_string, QP_FLAGS);
+ $opts->{relevance} = 1;
+ }
+
$self->do_enquire($query, $opts);
}
@@ -104,8 +108,11 @@ sub get_thread {
sub do_enquire {
my ($self, $query, $opts) = @_;
my $enquire = $self->enquire;
-
- $query = Search::Xapian::Query->new(OP_AND, $query, $mail_query);
+ if (defined $query) {
+ $query = Search::Xapian::Query->new(OP_AND,$query,$mail_query);
+ } else {
+ $query = $mail_query;
+ }
$enquire->set_query($query);
if ($opts->{relevance}) {
$enquire->set_sort_by_relevance_then_value(TS, 0);
--
EW
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 03/11] search: show newest results first
2015-09-01 8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
2015-09-01 8:55 ` [PATCH 02/11] search: allow querying all mail with '' Eric Wong
@ 2015-09-01 8:55 ` Eric Wong
2015-09-01 8:55 ` [PATCH 04/11] feed: use updated date based on git commit date Eric Wong
` (7 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01 8:55 UTC (permalink / raw)
To: meta
Like revision control history, older stuff is less relevant,
so favor newer stuff, first.
---
lib/PublicInbox/Search.pm | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 831c4fd..8b32ef3 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -115,9 +115,9 @@ sub do_enquire {
}
$enquire->set_query($query);
if ($opts->{relevance}) {
- $enquire->set_sort_by_relevance_then_value(TS, 0);
+ $enquire->set_sort_by_relevance_then_value(TS, 1);
} else {
- $enquire->set_sort_by_value(TS, 0);
+ $enquire->set_sort_by_value(TS, 1);
}
$opts ||= {};
my $offset = $opts->{offset} || 0;
--
EW
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 04/11] feed: use updated date based on git commit date
2015-09-01 8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
2015-09-01 8:55 ` [PATCH 02/11] search: allow querying all mail with '' Eric Wong
2015-09-01 8:55 ` [PATCH 03/11] search: show newest results first Eric Wong
@ 2015-09-01 8:55 ` Eric Wong
2015-09-01 8:55 ` [PATCH 05/11] feed: extract atom header generation Eric Wong
` (6 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01 8:55 UTC (permalink / raw)
To: meta
This will hopefully make life easier for feed readers.
---
lib/PublicInbox/Feed.pm | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index bc76cdc..71042d7 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -10,6 +10,7 @@ use PublicInbox::Hval;
use PublicInbox::GitCatFile;
use PublicInbox::View;
use PublicInbox::MID qw/mid_clean mid_compress/;
+use POSIX qw/strftime/;
use constant {
DATEFMT => '%Y-%m-%dT%H:%M:%SZ', # atom standard
MAX_PER_PAGE => 25, # this needs to be tunable
@@ -33,7 +34,6 @@ sub generate_html_index {
sub emit_atom {
my ($cb, $ctx) = @_;
- require POSIX;
my $fh = $cb->([ 200, ['Content-Type' => 'application/xml']]);
my $max = $ctx->{max} || MAX_PER_PAGE;
my $feed_opts = get_feedopts($ctx);
@@ -45,18 +45,23 @@ sub emit_atom {
my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
my $url = $feed_opts->{url} || "http://example.com/";
my $atomurl = $feed_opts->{atomurl};
- $fh->write(qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
+ my $x = qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
qq{<feed\nxmlns="http://www.w3.org/2005/Atom">} .
qq{<title$type>$title</title>} .
qq{<link\nhref="$url"/>} .
qq{<link\nrel="self"\nhref="$atomurl"/>} .
- qq{<id>mailto:$addr</id>} .
- '<updated>' . POSIX::strftime(DATEFMT, gmtime) . '</updated>');
+ qq{<id>mailto:$addr</id>};
my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
each_recent_blob($ctx, sub {
- my ($add, undef) = @_;
- add_to_feed($feed_opts, $fh, $add, $git);
+ my ($path, undef, $ts) = @_;
+ if (defined $x) {
+ $fh->write($x . '<updated>'.
+ strftime(DATEFMT, gmtime($ts)) .
+ '</updated>');
+ $x = undef;
+ }
+ add_to_feed($feed_opts, $fh, $path, $git);
});
$git = undef; # destroy pipes
Email::Address->purge_cache;
@@ -259,7 +264,7 @@ sub feed_date {
my ($date) = @_;
my @t = eval { strptime($date) };
- scalar(@t) ? POSIX::strftime(DATEFMT, @t) : 0;
+ scalar(@t) ? strftime(DATEFMT, @t) : 0;
}
# returns 0 (skipped) or 1 (added)
@@ -363,7 +368,7 @@ sub dump_topics {
$subj = PublicInbox::Hval->new($subj)->as_html;
$u = PublicInbox::Hval->new($u)->as_html;
$dst .= "\n<a\nhref=\"t/$mid/#u\"><b>$subj</b></a>\n- ";
- $ts = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
+ $ts = strftime('%Y-%m-%d %H:%M', gmtime($ts));
if ($n == 1) {
$dst .= "created by $u @ $ts UTC\n"
} else {
--
EW
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 05/11] feed: extract atom header generation
2015-09-01 8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
` (2 preceding siblings ...)
2015-09-01 8:55 ` [PATCH 04/11] feed: use updated date based on git commit date Eric Wong
@ 2015-09-01 8:55 ` Eric Wong
2015-09-01 8:55 ` [PATCH 06/11] implement per-thread Atom feeds Eric Wong
` (5 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01 8:55 UTC (permalink / raw)
To: meta
We'll be using it for per-thread subscriptions
---
lib/PublicInbox/Feed.pm | 41 +++++++++++++++++++++++------------------
1 file changed, 23 insertions(+), 18 deletions(-)
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 71042d7..3540e9a 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -32,26 +32,26 @@ sub generate_html_index {
# private subs
+sub atom_header {
+ my ($feed_opts) = @_;
+ my $title = $feed_opts->{description};
+ $title = PublicInbox::Hval->new_oneline($title)->as_html;
+ my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
+
+ qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
+ qq{<feed\nxmlns="http://www.w3.org/2005/Atom">} .
+ qq{<title$type>$title</title>} .
+ qq(<link\nhref="$feed_opts->{url}"/>) .
+ qq(<link\nrel="self"\nhref="$feed_opts->{atomurl}"/>) .
+ qq(<id>mailto:$feed_opts->{id_addr}</id>);
+}
+
sub emit_atom {
my ($cb, $ctx) = @_;
my $fh = $cb->([ 200, ['Content-Type' => 'application/xml']]);
my $max = $ctx->{max} || MAX_PER_PAGE;
my $feed_opts = get_feedopts($ctx);
- my $addr = $feed_opts->{address};
- $addr = $addr->[0] if ref($addr);
- $addr ||= 'public-inbox@example.com';
- my $title = $feed_opts->{description} || "unnamed feed";
- $title = PublicInbox::Hval->new_oneline($title)->as_html;
- my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
- my $url = $feed_opts->{url} || "http://example.com/";
- my $atomurl = $feed_opts->{atomurl};
- my $x = qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
- qq{<feed\nxmlns="http://www.w3.org/2005/Atom">} .
- qq{<title$type>$title</title>} .
- qq{<link\nhref="$url"/>} .
- qq{<link\nrel="self"\nhref="$atomurl"/>} .
- qq{<id>mailto:$addr</id>};
-
+ my $x = atom_header($feed_opts);
my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
each_recent_blob($ctx, sub {
my ($path, undef, $ts) = @_;
@@ -219,13 +219,18 @@ sub get_feedopts {
if (open my $fh, '<', "$ctx->{git_dir}/description") {
chomp($rv{description} = <$fh>);
close $fh;
+ } else {
+ $rv{description} = '($GIT_DIR/description missing)';
}
if ($pi_config && defined $listname && $listname ne '') {
- foreach my $key (qw(address)) {
- $rv{$key} = $pi_config->get($listname, $key) || "";
- }
+ my $addr = $pi_config->get($listname, 'address') || "";
+ $rv{address} = $addr;
+ $addr = $addr->[0] if ref($addr);
+ $rv{id_addr} = $addr;
}
+ $rv{id_addr} ||= 'public-inbox@example.com';
+
my $url_base;
if ($cgi) {
my $path_info = $cgi->path_info;
--
EW
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 06/11] implement per-thread Atom feeds
2015-09-01 8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
` (3 preceding siblings ...)
2015-09-01 8:55 ` [PATCH 05/11] feed: extract atom header generation Eric Wong
@ 2015-09-01 8:55 ` Eric Wong
2015-09-01 9:30 ` [13/11 PATCH] feed: fix <updated> tag in Atom feed Eric Wong
2015-09-01 8:55 ` [PATCH 07/11] www: compile mbox regexp only once Eric Wong
` (4 subsequent siblings)
9 siblings, 1 reply; 13+ messages in thread
From: Eric Wong @ 2015-09-01 8:55 UTC (permalink / raw)
To: meta
This allows users to subscribe to only a single thread
with their feed reader without subscribing to the rest of
the thread.
Update our endpoint notes while we're at it.
---
Documentation/design_www.txt | 31 ++++++++-------
lib/PublicInbox/Feed.pm | 92 ++++++++++++++++++++++++++++++++------------
lib/PublicInbox/View.pm | 3 +-
lib/PublicInbox/WWW.pm | 13 +++++++
t/cgi.t | 12 ++++++
5 files changed, 112 insertions(+), 39 deletions(-)
diff --git a/Documentation/design_www.txt b/Documentation/design_www.txt
index 55e9268..d25afca 100644
--- a/Documentation/design_www.txt
+++ b/Documentation/design_www.txt
@@ -6,25 +6,30 @@ URL naming
/$LISTNAME/atom.xml -> Atom feed
#### Optional, relies on Search::Xapian
-/$LISTNAME/t/$MESSAGE_ID.html -> HTML content of thread
+/$LISTNAME/t/$MESSAGE_ID/ -> HTML content of thread
+/$LISTNAME/t/$MESSAGE_ID/atom -> Atom feed for thread
+/$LISTNAME/t/$MESSAGE_ID/mbox.gz -> gzipped mbox of thread
### Stable endpoints
-/$LISTNAME/m/$MESSAGE_ID.html -> HTML content (short quotes)
-/$LISTNAME/m/$MESSAGE_ID.txt -> raw mbox
-/$LISTNAME/m/$MESSAGE_ID -> 301 to .html version
-/$LISTNAME/f/$MESSAGE_ID.html -> HTML content (full quotes)
-/$LISTNAME/f/$MESSAGE_ID -> 301 to .html version
-/$LISTNAME/f/$MESSAGE_ID.txt -> 301 to ../m/$MESSAGE_ID.txt
+/$LISTNAME/m/$MESSAGE_ID/ -> HTML content (short quotes)
+/$LISTNAME/m/$MESSAGE_ID -> 301 to above
+/$LISTNAME/m/$MESSAGE_ID/raw -> raw mbox
+/$LISTNAME/f/$MESSAGE_ID/ -> HTML content (full quotes)
+/$LISTNAME/f/$MESSAGE_ID -> 301 to above
+/$LISTNAME/f/$MESSAGE_ID/raw (*) -> 301 to ../m/$MESSAGE_ID/raw
+
+### Legacy endpoints (may be ambiguous given Message-IDs with similar suffies)
+/$LISTNAME/m/$MESSAGE_ID.html -> 301 to $MESSAGE_ID/
+/$LISTNAME/m/$MESSAGE_ID.txt -> 301 to $MESSAGE_ID/raw
+/$LISTNAME/f/$MESSAGE_ID.html -> 301 to $MESSAGE_ID/
+/$LISTNAME/f/$MESSAGE_ID.txt (*) -> 301 to ../m/$MESSAGE_ID/raw
+
FIXME: we must refactor/cleanup/add tests for most of our CGI before
adding more endpoints and features.
-Maybe TODO (these might be expensive)
--------------------------------------
-/$LISTNAME/t/$MESSAGE_ID.mbox -> mbox content of thread
-
-We use file name suffixes on all of these (except /) so URLs may easily
-cached/memoized using a static file server.
+(*) These URLs were never linked, but only exist as a convenience to folks
+ who edit existing URLs
Encoding notes
--------------
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 3540e9a..1fef984 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -9,7 +9,7 @@ use Date::Parse qw(strptime);
use PublicInbox::Hval;
use PublicInbox::GitCatFile;
use PublicInbox::View;
-use PublicInbox::MID qw/mid_clean mid_compress/;
+use PublicInbox::MID qw/mid_clean mid_compress mid2path/;
use POSIX qw/strftime/;
use constant {
DATEFMT => '%Y-%m-%dT%H:%M:%SZ', # atom standard
@@ -25,6 +25,11 @@ sub generate {
sub { emit_atom($_[0], $ctx) };
}
+sub generate_thread_atom {
+ my ($ctx) = @_;
+ sub { emit_atom_thread($_[0], $ctx) };
+}
+
sub generate_html_index {
my ($ctx) = @_;
sub { emit_html_index($_[0], $ctx) };
@@ -32,15 +37,22 @@ sub generate_html_index {
# private subs
-sub atom_header {
- my ($feed_opts) = @_;
- my $title = $feed_opts->{description};
+sub title_tag {
+ my ($title) = @_;
+ # try to avoid the type attribute in title:
$title = PublicInbox::Hval->new_oneline($title)->as_html;
my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
+ "<title$type>$title</title>";
+}
+
+sub atom_header {
+ my ($feed_opts, $title) = @_;
+
+ $title = title_tag($feed_opts->{description}) unless (defined $title);
qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
qq{<feed\nxmlns="http://www.w3.org/2005/Atom">} .
- qq{<title$type>$title</title>} .
+ qq{$title} .
qq(<link\nhref="$feed_opts->{url}"/>) .
qq(<link\nrel="self"\nhref="$feed_opts->{atomurl}"/>) .
qq(<id>mailto:$feed_opts->{id_addr}</id>);
@@ -56,19 +68,50 @@ sub emit_atom {
each_recent_blob($ctx, sub {
my ($path, undef, $ts) = @_;
if (defined $x) {
- $fh->write($x . '<updated>'.
- strftime(DATEFMT, gmtime($ts)) .
- '</updated>');
+ $fh->write($x . '<updated>' .
+ strftime(DATEFMT, gmtime($ts)) .
+ '</updated>');
$x = undef;
}
add_to_feed($feed_opts, $fh, $path, $git);
});
$git = undef; # destroy pipes
+ _end_feed($fh);
+}
+
+sub _no_thread {
+ my ($cb) = @_;
+ my $fh = $cb->([404, ['Content-Type' => 'text/plain']]);
+ $fh->write("No feed found for thread\n");
+ $fh->close;
+}
+
+sub _end_feed {
+ my ($fh) = @_;
Email::Address->purge_cache;
- $fh->write("</feed>");
+ $fh->write('</feed>');
$fh->close;
}
+sub emit_atom_thread {
+ my ($cb, $ctx) = @_;
+ my $res = $ctx->{srch}->get_thread($ctx->{mid});
+ return _no_thread($cb) unless $res->{total};
+ my $fh = $cb->([200, ['Content-Type' => 'application/xml']]);
+ my $feed_opts = get_feedopts($ctx);
+
+ my $html_url = $feed_opts->{atomurl} = $ctx->{self_url};
+ $html_url =~ s!/atom\z!/!;
+ $feed_opts->{url} = $html_url;
+ $feed_opts->{emit_header} = 1;
+
+ my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
+ foreach my $msg (@{$res->{msgs}}) {
+ add_to_feed($feed_opts, $fh, mid2path($msg->mid), $git);
+ }
+ $git = undef; # destroy pipes
+ _end_feed($fh);
+}
sub emit_html_index {
my ($cb, $ctx) = @_;
@@ -233,7 +276,6 @@ sub get_feedopts {
my $url_base;
if ($cgi) {
- my $path_info = $cgi->path_info;
my $base;
if (ref($cgi) eq 'CGI') {
$base = $cgi->url(-base);
@@ -241,13 +283,11 @@ sub get_feedopts {
$base = $cgi->base->as_string;
$base =~ s!/\z!!;
}
- $url_base = $path_info;
- if ($url_base =~ s!/(?:|index\.html)?\z!!) {
- $rv{atomurl} = "$base$url_base/atom.xml";
+ $url_base = "$base/$listname";
+ if (my $mid = $ctx->{mid}) { # per-thread feed:
+ $rv{atomurl} = "$url_base/t/$mid/atom";
} else {
- $url_base =~ s!/atom\.xml\z!!;
- $rv{atomurl} = $base . $path_info;
- $url_base = $base . $url_base; # XXX is this needed?
+ $rv{atomurl} = "$url_base/atom.xml";
}
} else {
$url_base = "http://example.com";
@@ -288,9 +328,12 @@ sub add_to_feed {
defined($content) or return 0;
$mime = undef;
+ my $date = $header_obj->header('Date');
+ $date = PublicInbox::Hval->new_oneline($date);
+ $date = feed_date($date->raw) or return 0;
+
my $title = mime_header($header_obj, 'Subject') or return 0;
- $title = PublicInbox::Hval->new_oneline($title)->as_html;
- my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
+ $title = title_tag($title);
my $from = mime_header($header_obj, 'From') or return 0;
my @from = Email::Address->parse($from) or return 0;
@@ -298,13 +341,12 @@ sub add_to_feed {
my $email = $from[0]->address;
$email = PublicInbox::Hval->new_oneline($email)->as_html;
- my $date = $header_obj->header('Date');
- $date = PublicInbox::Hval->new_oneline($date);
- $date = feed_date($date->raw) or return 0;
-
+ if (delete $feed_opts->{emit_header}) {
+ $fh->write(atom_header($feed_opts, $title) .
+ "<updated>$date</updated>");
+ }
$fh->write("<entry><author><name>$name</name><email>$email</email>" .
- "</author><title$type>$title</title>" .
- "<updated>$date</updated>" .
+ "</author>$title$date" .
qq{<content\ntype="xhtml">} .
qq{<div\nxmlns="http://www.w3.org/1999/xhtml">});
$fh->write($content);
@@ -313,7 +355,7 @@ sub add_to_feed {
my $h = '[a-f0-9]';
my (@uuid5) = ($add =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o);
my $id = 'urn:uuid:' . join('-', @uuid5);
- my $midurl = $feed_opts->{midurl} || 'http://example.com/m/';
+ my $midurl = $feed_opts->{midurl};
$fh->write(qq{</div></content><link\nhref="$midurl$href"/>}.
"<id>$id</id></entry>");
1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 477c4b6..a30bf70 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -160,7 +160,8 @@ sub emit_thread_html {
my $next = "<a\nid=\"s$final_anchor\">";
$next .= $final_anchor == 1 ? 'only message in' : 'end of';
$next .= " thread</a>, back to <a\nhref=\"../../\">index</a>\n";
- $next .= "download: <a\nhref=\"mbox.gz\">mbox.gz</a>\n\n";
+ $next .= "download: <a\nhref=\"mbox.gz\">mbox.gz</a>";
+ $next .= " / <a\nhref=\"atom\">Atom feed</a>\n\n";
$fh->write("<hr />" . PRE_WRAP . $next . $foot .
"</pre></body></html>");
$fh->close;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index e6eec3d..c99c25f 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -56,6 +56,9 @@ sub run {
invalid_list_mid(\%ctx, $1, $2) ||
get_thread_mbox(\%ctx, $sfx);
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/atom\z!o) {
+ invalid_list_mid(\%ctx, $1, $2) || get_thread_atom(\%ctx);
+
# legacy redirects
} elsif ($path_info =~ m!$LISTNAME_RE/(t|m|f)/(\S+)\.html\z!o) {
my $pfx = $2;
@@ -348,4 +351,14 @@ sub get_thread_mbox {
PublicInbox::Mbox::thread_mbox($ctx, $srch, $sfx);
}
+
+# /$LISTNAME/t/$MESSAGE_ID/atom -> thread as Atom feed
+sub get_thread_atom {
+ my ($ctx) = @_;
+ searcher($ctx) or return need_search($ctx);
+ $ctx->{self_url} = self_url($ctx->{cgi});
+ require PublicInbox::Feed;
+ PublicInbox::Feed::generate_thread_atom($ctx);
+}
+
1;
diff --git a/t/cgi.t b/t/cgi.t
index fc28ae3..d84e634 100644
--- a/t/cgi.t
+++ b/t/cgi.t
@@ -200,6 +200,18 @@ EOF
} else {
like($res->{head}, qr/^Status: 501 /, "search not available");
}
+
+ my $have_xml_feed = eval { require XML::Feed; 1 } if $indexed;
+ if ($have_xml_feed) {
+ $path = "/test/t/blahblah%40example.com/atom";
+ $res = cgi_run($path);
+ like($res->{head}, qr/^Status: 200 /, "atom returned 200");
+ like($res->{head}, qr!^Content-Type: application/xml!m,
+ "search returned atom");
+ my $p = XML::Feed->parse(\($res->{body}));
+ is($p->format, "Atom", "parsed atom feed");
+ is(scalar $p->entries, 3, "parsed three entries");
+ }
}
# redirect list-name-only URLs
--
EW
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 07/11] www: compile mbox regexp only once
2015-09-01 8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
` (4 preceding siblings ...)
2015-09-01 8:55 ` [PATCH 06/11] implement per-thread Atom feeds Eric Wong
@ 2015-09-01 8:55 ` Eric Wong
2015-09-01 8:55 ` [PATCH 08/11] www: root atom feed is "new.atom" and not "atom.xml" Eric Wong
` (3 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01 8:55 UTC (permalink / raw)
To: meta
No need for 'x' modifier to span more lines, though
---
lib/PublicInbox/WWW.pm | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index c99c25f..278d786 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -51,7 +51,7 @@ sub run {
} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/\z!o) {
invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
- } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!x) {
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!o) {
my $sfx = $3;
invalid_list_mid(\%ctx, $1, $2) ||
get_thread_mbox(\%ctx, $sfx);
--
EW
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 08/11] www: root atom feed is "new.atom" and not "atom.xml"
2015-09-01 8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
` (5 preceding siblings ...)
2015-09-01 8:55 ` [PATCH 07/11] www: compile mbox regexp only once Eric Wong
@ 2015-09-01 8:55 ` Eric Wong
2015-09-01 8:55 ` [PATCH 09/11] completely revamp URL structure to shorten permalinks Eric Wong
` (2 subsequent siblings)
9 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01 8:55 UTC (permalink / raw)
To: meta
The MIME type entry for Atom feed relies on "atom",
so allow properly-configured static file servers to serve
it with the correct Content-Type header.
---
Documentation/design_www.txt | 36 ++++++++++++++++++++----------------
lib/PublicInbox/Feed.pm | 4 ++--
lib/PublicInbox/WWW.pm | 4 ++--
t/plack.t | 2 +-
4 files changed, 25 insertions(+), 21 deletions(-)
diff --git a/Documentation/design_www.txt b/Documentation/design_www.txt
index d25afca..a11c389 100644
--- a/Documentation/design_www.txt
+++ b/Documentation/design_www.txt
@@ -3,34 +3,38 @@ URL naming
### Unstable endpoints
/$LISTNAME/?r=$GIT_COMMIT -> HTML only
-/$LISTNAME/atom.xml -> Atom feed
+/$LISTNAME/new.atom -> Atom feed
#### Optional, relies on Search::Xapian
-/$LISTNAME/t/$MESSAGE_ID/ -> HTML content of thread
-/$LISTNAME/t/$MESSAGE_ID/atom -> Atom feed for thread
-/$LISTNAME/t/$MESSAGE_ID/mbox.gz -> gzipped mbox of thread
+/$LISTNAME/t/$MESSAGE_ID/ -> HTML content of thread
+/$LISTNAME/t/$MESSAGE_ID/atom -> Atom feed for thread
+/$LISTNAME/t/$MESSAGE_ID/mbox.gz -> gzipped mbox of thread
### Stable endpoints
-/$LISTNAME/m/$MESSAGE_ID/ -> HTML content (short quotes)
-/$LISTNAME/m/$MESSAGE_ID -> 301 to above
-/$LISTNAME/m/$MESSAGE_ID/raw -> raw mbox
-/$LISTNAME/f/$MESSAGE_ID/ -> HTML content (full quotes)
-/$LISTNAME/f/$MESSAGE_ID -> 301 to above
-/$LISTNAME/f/$MESSAGE_ID/raw (*) -> 301 to ../m/$MESSAGE_ID/raw
+/$LISTNAME/m/$MESSAGE_ID/ -> HTML content (short quotes)
+/$LISTNAME/m/$MESSAGE_ID -> 301 to above
+/$LISTNAME/m/$MESSAGE_ID/raw -> raw mbox
+/$LISTNAME/f/$MESSAGE_ID/ -> HTML content (full quotes)
+/$LISTNAME/f/$MESSAGE_ID -> 301 to above
+/$LISTNAME/f/$MESSAGE_ID/raw [1] -> 301 to ../m/$MESSAGE_ID/raw
-### Legacy endpoints (may be ambiguous given Message-IDs with similar suffies)
-/$LISTNAME/m/$MESSAGE_ID.html -> 301 to $MESSAGE_ID/
-/$LISTNAME/m/$MESSAGE_ID.txt -> 301 to $MESSAGE_ID/raw
-/$LISTNAME/f/$MESSAGE_ID.html -> 301 to $MESSAGE_ID/
-/$LISTNAME/f/$MESSAGE_ID.txt (*) -> 301 to ../m/$MESSAGE_ID/raw
+### Legacy endpoints (may be ambiguous given Message-IDs with similar suffixes)
+/$LISTNAME/m/$MESSAGE_ID.html -> 301 to $MESSAGE_ID/
+/$LISTNAME/m/$MESSAGE_ID.txt -> 301 to $MESSAGE_ID/raw
+/$LISTNAME/f/$MESSAGE_ID.html -> 301 to $MESSAGE_ID/
+/$LISTNAME/f/$MESSAGE_ID.txt [1] -> 301 to ../m/$MESSAGE_ID/raw
+/$LISTNAME/atom.xml [2] -> identical to /$LISTNAME/new.atom
FIXME: we must refactor/cleanup/add tests for most of our CGI before
adding more endpoints and features.
-(*) These URLs were never linked, but only exist as a convenience to folks
+[1] These URLs were never linked, but only exist as a convenience to folks
who edit existing URLs
+[2] Do not make this into a 301 since feed readers may not follow them as well
+ as normal browsers do.
+
Encoding notes
--------------
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 1fef984..9d58193 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -287,11 +287,11 @@ sub get_feedopts {
if (my $mid = $ctx->{mid}) { # per-thread feed:
$rv{atomurl} = "$url_base/t/$mid/atom";
} else {
- $rv{atomurl} = "$url_base/atom.xml";
+ $rv{atomurl} = "$url_base/new.atom";
}
} else {
$url_base = "http://example.com";
- $rv{atomurl} = "$url_base/atom.xml";
+ $rv{atomurl} = "$url_base/new.atom";
}
$rv{url} ||= "$url_base/";
$rv{midurl} = "$url_base/m/";
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 278d786..a9cb6d7 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -34,7 +34,7 @@ sub run {
invalid_list(\%ctx, $1) || redirect_list_index($cgi);
} elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) {
invalid_list(\%ctx, $1) || get_index(\%ctx);
- } elsif ($path_info =~ m!$LISTNAME_RE/atom\.xml\z!o) {
+ } elsif ($path_info =~ m!$LISTNAME_RE/(?:atom\.xml|new\.atom)\z!o) {
invalid_list(\%ctx, $1) || get_atom(\%ctx);
# single-message pages
@@ -128,7 +128,7 @@ sub invalid_list_mid {
$ret;
}
-# /$LISTNAME/atom.xml -> Atom feed, includes replies
+# /$LISTNAME/new.atom -> Atom feed, includes replies
sub get_atom {
my ($ctx) = @_;
require PublicInbox::Feed;
diff --git a/t/plack.t b/t/plack.t
index b3c8764..50c9e60 100644
--- a/t/plack.t
+++ b/t/plack.t
@@ -83,7 +83,7 @@ EOF
test_psgi($app, sub {
my ($cb) = @_;
- my $atomurl = 'http://example.com/test/atom.xml';
+ my $atomurl = 'http://example.com/test/new.atom';
my $res = $cb->(GET('http://example.com/test/'));
is(200, $res->code, 'success response received');
like($res->content, qr!href="\Q$atomurl\E"!,
--
EW
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 09/11] completely revamp URL structure to shorten permalinks
2015-09-01 8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
` (6 preceding siblings ...)
2015-09-01 8:55 ` [PATCH 08/11] www: root atom feed is "new.atom" and not "atom.xml" Eric Wong
@ 2015-09-01 8:55 ` Eric Wong
2015-09-01 8:55 ` [PATCH 10/11] view: drop extra '</a>' tag Eric Wong
2015-09-01 8:55 ` [PATCH 11/11] view: more robust link generation Eric Wong
9 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01 8:55 UTC (permalink / raw)
To: meta
This allows common /m/ links to be used without a prefix,
saving 2 precious bytes for permalinks and raw messages.
Old URLs continue to redirect.
---
Documentation/design_www.txt | 37 +++++----
lib/PublicInbox/Feed.pm | 19 +++--
lib/PublicInbox/View.pm | 48 ++++++------
lib/PublicInbox/WWW.pm | 177 +++++++++++++++++++++++--------------------
t/cgi.t | 20 ++---
t/feed.t | 2 +-
t/plack.t | 32 +++++---
t/view.t | 6 +-
8 files changed, 179 insertions(+), 162 deletions(-)
diff --git a/Documentation/design_www.txt b/Documentation/design_www.txt
index a11c389..b73a798 100644
--- a/Documentation/design_www.txt
+++ b/Documentation/design_www.txt
@@ -2,29 +2,28 @@ URL naming
----------
### Unstable endpoints
-/$LISTNAME/?r=$GIT_COMMIT -> HTML only
-/$LISTNAME/new.atom -> Atom feed
+/$LISTNAME/?r=$GIT_COMMIT -> HTML only
+/$LISTNAME/new.atom -> Atom feed
#### Optional, relies on Search::Xapian
-/$LISTNAME/t/$MESSAGE_ID/ -> HTML content of thread
-/$LISTNAME/t/$MESSAGE_ID/atom -> Atom feed for thread
-/$LISTNAME/t/$MESSAGE_ID/mbox.gz -> gzipped mbox of thread
+/$LISTNAME/$MESSAGE_ID/t/ -> HTML content of thread
+/$LISTNAME/$MESSAGE_ID/t.atom -> Atom feed for thread
+/$LISTNAME/$MESSAGE_ID/t.mbox.gz -> gzipped mbox of thread
### Stable endpoints
-/$LISTNAME/m/$MESSAGE_ID/ -> HTML content (short quotes)
-/$LISTNAME/m/$MESSAGE_ID -> 301 to above
-/$LISTNAME/m/$MESSAGE_ID/raw -> raw mbox
-/$LISTNAME/f/$MESSAGE_ID/ -> HTML content (full quotes)
-/$LISTNAME/f/$MESSAGE_ID -> 301 to above
-/$LISTNAME/f/$MESSAGE_ID/raw [1] -> 301 to ../m/$MESSAGE_ID/raw
-
-### Legacy endpoints (may be ambiguous given Message-IDs with similar suffixes)
-/$LISTNAME/m/$MESSAGE_ID.html -> 301 to $MESSAGE_ID/
-/$LISTNAME/m/$MESSAGE_ID.txt -> 301 to $MESSAGE_ID/raw
-/$LISTNAME/f/$MESSAGE_ID.html -> 301 to $MESSAGE_ID/
-/$LISTNAME/f/$MESSAGE_ID.txt [1] -> 301 to ../m/$MESSAGE_ID/raw
-
-/$LISTNAME/atom.xml [2] -> identical to /$LISTNAME/new.atom
+/$LISTNAME/$MESSAGE_ID/ -> HTML content (short quotes)
+/$LISTNAME/$MESSAGE_ID -> 301 to /$LISTNAME/$MESSAGE_ID
+/$LISTNAME/$MESSAGE_ID/raw -> raw mbox
+/$LISTNAME/$MESSAGE_ID/f/ -> HTML content (full quotes)
+
+### Legacy endpoints (may be ambiguous given Message-IDs with similar suffies)
+/$LISTNAME/m/$MESSAGE_ID/ -> 301 to /$LISTNAME/$MESSAGE_ID/
+/$LISTNAME/m/$MESSAGE_ID.html -> 301 to /$LISTNAME/$MESSAGE_ID/
+/$LISTNAME/m/$MESSAGE_ID.txt -> 301 to /$LISTNAME/$MESSAGE_ID/raw
+/$LISTNAME/f/$MESSAGE_ID.html -> 301 to /$LISTNAME/$MESSAGE_ID/f/
+/$LISTNAME/f/$MESSAGE_ID.txt [1] -> 301 to /$LISTNAME/$MESSAGE_ID/raw
+
+/$LISTNAME/atom.xml [2] -> identical to /$LISTNAME/new.atom
FIXME: we must refactor/cleanup/add tests for most of our CGI before
adding more endpoints and features.
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 9d58193..4420fde 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -101,7 +101,7 @@ sub emit_atom_thread {
my $feed_opts = get_feedopts($ctx);
my $html_url = $feed_opts->{atomurl} = $ctx->{self_url};
- $html_url =~ s!/atom\z!/!;
+ $html_url =~ s!/t\.atom\z!/!;
$feed_opts->{url} = $html_url;
$feed_opts->{emit_header} = 1;
@@ -285,7 +285,7 @@ sub get_feedopts {
}
$url_base = "$base/$listname";
if (my $mid = $ctx->{mid}) { # per-thread feed:
- $rv{atomurl} = "$url_base/t/$mid/atom";
+ $rv{atomurl} = "$url_base/$mid/t.atom";
} else {
$rv{atomurl} = "$url_base/new.atom";
}
@@ -294,8 +294,7 @@ sub get_feedopts {
$rv{atomurl} = "$url_base/new.atom";
}
$rv{url} ||= "$url_base/";
- $rv{midurl} = "$url_base/m/";
- $rv{fullurl} = "$url_base/f/";
+ $rv{midurl} = "$url_base/";
\%rv;
}
@@ -317,14 +316,15 @@ sub add_to_feed {
my ($feed_opts, $fh, $add, $git) = @_;
my $mime = do_cat_mail($git, $add) or return 0;
- my $fullurl = $feed_opts->{fullurl} || 'http://example.com/f/';
+ my $url = $feed_opts->{url};
+ my $midurl = $feed_opts->{midurl};
my $header_obj = $mime->header_obj;
my $mid = $header_obj->header('Message-ID');
defined $mid or return 0;
$mid = PublicInbox::Hval->new_msgid($mid);
- my $href = $mid->as_href . '/';
- my $content = PublicInbox::View->feed_entry($mime, $fullurl . $href);
+ my $href = $mid->as_href;
+ my $content = PublicInbox::View->feed_entry($mime, "$midurl$href/f/");
defined($content) or return 0;
$mime = undef;
@@ -355,8 +355,7 @@ sub add_to_feed {
my $h = '[a-f0-9]';
my (@uuid5) = ($add =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o);
my $id = 'urn:uuid:' . join('-', @uuid5);
- my $midurl = $feed_opts->{midurl};
- $fh->write(qq{</div></content><link\nhref="$midurl$href"/>}.
+ $fh->write(qq!</div></content><link\nhref="$midurl$href/"/>!.
"<id>$id</id></entry>");
1;
}
@@ -414,7 +413,7 @@ sub dump_topics {
$mid = PublicInbox::Hval->new($mid)->as_href;
$subj = PublicInbox::Hval->new($subj)->as_html;
$u = PublicInbox::Hval->new($u)->as_html;
- $dst .= "\n<a\nhref=\"t/$mid/#u\"><b>$subj</b></a>\n- ";
+ $dst .= "\n<a\nhref=\"$mid/t/#u\"><b>$subj</b></a>\n- ";
$ts = strftime('%Y-%m-%d %H:%M', gmtime($ts));
if ($n == 1) {
$dst .= "created by $u @ $ts UTC\n"
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index a30bf70..2be16b4 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -80,7 +80,7 @@ sub index_entry {
$anchor = $seen->{$anchor_idx};
}
if ($srch) {
- $subj = "<a\nhref=\"${path}t/$href/#u\">$subj</a>";
+ $subj = "<a\nhref=\"${path}$href/t/#u\">$subj</a>";
}
if ($root_anchor && $root_anchor eq $id) {
$subj = "<u\nid=\"u\">$subj</u>";
@@ -101,9 +101,9 @@ sub index_entry {
$fh->write($rv .= "\n\n");
my ($fhref, $more_ref);
- my $mhref = "${path}m/$href/";
+ my $mhref = "${path}$href/";
if ($level > 0) {
- $fhref = "${path}f/$href/";
+ $fhref = "${path}$href/f/";
$more_ref = \$more;
}
# scan through all parts, looking for displayable text
@@ -112,7 +112,7 @@ sub index_entry {
});
$mime->body_set('');
- my $txt = "${path}m/$href/raw";
+ my $txt = "${path}$href/raw";
$rv = "\n<a\nhref=\"$mhref\">$more</a> <a\nhref=\"$txt\">raw</a> ";
$rv .= html_footer($mime, 0, undef, $ctx);
@@ -120,7 +120,7 @@ sub index_entry {
unless (defined $anchor) {
my $v = PublicInbox::Hval->new_msgid($irt);
$v = $v->as_href;
- $anchor = "${path}m/$v/";
+ $anchor = "${path}$v/";
$seen->{$anchor_idx} = $anchor;
}
$rv .= " <a\nhref=\"$anchor\">parent</a>";
@@ -160,8 +160,8 @@ sub emit_thread_html {
my $next = "<a\nid=\"s$final_anchor\">";
$next .= $final_anchor == 1 ? 'only message in' : 'end of';
$next .= " thread</a>, back to <a\nhref=\"../../\">index</a>\n";
- $next .= "download: <a\nhref=\"mbox.gz\">mbox.gz</a>";
- $next .= " / <a\nhref=\"atom\">Atom feed</a>\n\n";
+ $next .= "download: <a\nhref=\"../t.mbox.gz\">mbox.gz</a>";
+ $next .= " / <a\nhref=\"../t.atom\">Atom feed</a>\n\n";
$fh->write("<hr />" . PRE_WRAP . $next . $foot .
"</pre></body></html>");
$fh->close;
@@ -349,8 +349,8 @@ sub headers_to_html_header {
} elsif ($h eq 'Subject') {
$title[0] = $v->as_html;
if ($srch) {
- $rv .= "$h: <a\nid=\"t\"\n" .
- "href=\"../../t/$mid_href/\">";
+ my $p = $full_pfx ? '' : '../';
+ $rv .= "$h: <a\nid=\"t\"\nhref=\"${p}t/#u\">";
$rv .= $v->as_html . "</a>\n";
next;
}
@@ -359,7 +359,7 @@ sub headers_to_html_header {
}
$rv .= 'Message-ID: <' . $mid->as_html . '> ';
- my $raw_ref = $full_pfx ? 'raw' : "../../m/$mid_href/raw";
+ my $raw_ref = $full_pfx ? 'raw' : '../raw';
$rv .= "(<a\nhref=\"$raw_ref\">raw</a>)\n";
if ($srch) {
$rv .= "<a\nhref=\"#r\">References: [see below]</a>\n";
@@ -373,7 +373,7 @@ sub headers_to_html_header {
}
sub thread_inline {
- my ($dst, $ctx, $cur) = @_;
+ my ($dst, $ctx, $cur, $full_pfx) = @_;
my $srch = $ctx->{srch};
my $mid = mid_compress(mid_clean($cur->header('Message-ID')));
my $res = $srch->get_thread($mid);
@@ -383,9 +383,10 @@ sub thread_inline {
$$dst .= "\n[no followups, yet]</a>\n";
return;
}
+ my $upfx = $full_pfx ? '' : '../';
$$dst .= "\n\n~$nr messages in thread: ".
- "(<a\nhref=\"../../t/$mid/#u\">expand</a>)\n";
+ "(<a\nhref=\"${upfx}t/#u\">expand</a>)\n";
my $subj = $srch->subject_path($cur->header('Subject'));
my $state = {
seen => { $subj => 1 },
@@ -393,7 +394,7 @@ sub thread_inline {
cur => $mid,
};
for (thread_results(load_results($res))->rootset) {
- inline_dump($dst, $state, $_, 0);
+ inline_dump($dst, $state, $upfx, $_, 0);
}
$state->{next_msg};
}
@@ -461,19 +462,20 @@ sub html_footer {
my $href = "mailto:$to?In-Reply-To=$irt&Cc=${cc}&Subject=$subj";
my $srch = $ctx->{srch} if $ctx;
- my $idx = $standalone ? " <a\nhref=\"../../\">index</a>" : '';
+ my $upfx = $full_pfx ? '../' : '../../';
+ my $idx = $standalone ? "<a\nhref=\"$upfx\">index</a>" : '';
if ($idx && $srch) {
- my $next = thread_inline(\$idx, $ctx, $mime);
+ my $next = thread_inline(\$idx, $ctx, $mime, $full_pfx);
$irt = $mime->header('In-Reply-To');
if (defined $irt) {
$irt = PublicInbox::Hval->new_msgid($irt);
$irt = $irt->as_href;
- $irt = "<a\nhref=\"../$irt/\">parent</a> ";
+ $irt = "<a\nhref=\"$upfx$irt/\">parent</a> ";
} else {
$irt = ' ' x length('parent ');
}
if ($next) {
- $irt .= "<a\nhref=\"../$next/\">next</a> ";
+ $irt .= "<a\nhref=\"$upfx$next/\">next</a> ";
} else {
$irt .= ' ';
}
@@ -564,7 +566,7 @@ sub _msg_date {
}
sub _inline_header {
- my ($dst, $state, $mime, $level) = @_;
+ my ($dst, $state, $upfx, $mime, $level) = @_;
my $pfx = ' ' x $level;
my $cur = $state->{cur};
@@ -601,7 +603,7 @@ sub _inline_header {
$s = $s->as_html;
}
my $m = PublicInbox::Hval->new_msgid($mid);
- $m = '../' . $m->as_href . '/';
+ $m = $upfx . '../' . $m->as_href . '/';
if (defined $s) {
$$dst .= "$pfx` <a\nhref=\"$m\">$s</a>\n" .
"$pfx $f @ $d\n";
@@ -611,14 +613,14 @@ sub _inline_header {
}
sub inline_dump {
- my ($dst, $state, $node, $level) = @_;
+ my ($dst, $state, $upfx, $node, $level) = @_;
return unless $node;
return if $state->{stopped};
if (my $mime = $node->message) {
- _inline_header($dst, $state, $mime, $level);
+ _inline_header($dst, $state, $upfx, $mime, $level);
}
- inline_dump($dst, $state, $node->child, $level+1);
- inline_dump($dst, $state, $node->next, $level);
+ inline_dump($dst, $state, $upfx, $node->child, $level+1);
+ inline_dump($dst, $state, $upfx, $node->next, $level);
}
1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index a9cb6d7..d666a1b 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -16,6 +16,7 @@ use URI::Escape qw(uri_escape_utf8 uri_unescape);
use constant SSOMA_URL => 'http://ssoma.public-inbox.org/';
use constant PI_URL => 'http://public-inbox.org/';
our $LISTNAME_RE = qr!\A/([\w\.\-]+)!;
+our $MID_RE = qr!([^/]+)!;
our $pi_config;
sub run {
@@ -31,56 +32,37 @@ sub run {
if ($path_info eq '/') {
r404();
} elsif ($path_info =~ m!$LISTNAME_RE\z!o) {
- invalid_list(\%ctx, $1) || redirect_list_index($cgi);
+ invalid_list(\%ctx, $1) || r301(\%ctx, $1);
} elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) {
invalid_list(\%ctx, $1) || get_index(\%ctx);
} elsif ($path_info =~ m!$LISTNAME_RE/(?:atom\.xml|new\.atom)\z!o) {
invalid_list(\%ctx, $1) || get_atom(\%ctx);
+ # thread display
+ } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t/\z!o) {
+ invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
+ } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t\.mbox(\.gz)?\z!o) {
+ my $sfx = $3;
+ invalid_list_mid(\%ctx, $1, $2) || get_thread_mbox(\%ctx, $sfx);
+ } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t\.atom\z!o) {
+ invalid_list_mid(\%ctx, $1, $2) || get_thread_atom(\%ctx);
+
# single-message pages
- } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)/\z!o) {
+ } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/\z!o) {
invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx);
- } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)/raw\z!o) {
+ } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/raw\z!o) {
invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx);
# full-message page
- } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)/\z!o) {
+ } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/f/\z!o) {
invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx);
- # thread display
- } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
-
- } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!o) {
- my $sfx = $3;
- invalid_list_mid(\%ctx, $1, $2) ||
- get_thread_mbox(\%ctx, $sfx);
-
- } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/atom\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_thread_atom(\%ctx);
-
- # legacy redirects
- } elsif ($path_info =~ m!$LISTNAME_RE/(t|m|f)/(\S+)\.html\z!o) {
- my $pfx = $2;
- invalid_list_mid(\%ctx, $1, $3) ||
- redirect_mid(\%ctx, $pfx, qr/\.html\z/, '/');
- } elsif ($path_info =~ m!$LISTNAME_RE/(m|f)/(\S+)\.txt\z!o) {
- my $pfx = $2;
- invalid_list_mid(\%ctx, $1, $3) ||
- redirect_mid(\%ctx, $pfx, qr/\.txt\z/, '/raw');
- } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)(\.mbox(?:\.gz)?)\z!o) {
- my $end = $3;
- invalid_list_mid(\%ctx, $1, $2) ||
- redirect_mid(\%ctx, 't', $end, '/mbox.gz');
-
- # convenience redirects, order matters
- } elsif ($path_info =~ m!$LISTNAME_RE/(m|f|t|s)/(\S+)\z!o) {
- my $pfx = $2;
- invalid_list_mid(\%ctx, $1, $3) ||
- redirect_mid(\%ctx, $pfx, qr/\z/, '/');
+ # convenience redirects order matters
+ } elsif ($path_info =~ m!$LISTNAME_RE/([^/]{2,})\z!o) {
+ r301(\%ctx, $1, $2);
} else {
- r404();
+ legacy_redirects(\%ctx, $path_info);
}
}
@@ -163,7 +145,7 @@ sub mid2blob {
}
}
-# /$LISTNAME/m/$MESSAGE_ID.txt -> raw mbox
+# /$LISTNAME/$MESSAGE_ID/raw -> raw mbox
sub get_mid_txt {
my ($ctx) = @_;
my $x = mid2blob($ctx) or return r404();
@@ -171,22 +153,21 @@ sub get_mid_txt {
PublicInbox::Mbox::emit1($x);
}
-# /$LISTNAME/m/$MESSAGE_ID.html -> HTML content (short quotes)
+# /$LISTNAME/$MESSAGE_ID/ -> HTML content (short quotes)
sub get_mid_html {
my ($ctx) = @_;
my $x = mid2blob($ctx) or return r404();
require PublicInbox::View;
- my $pfx = msg_pfx($ctx);
my $foot = footer($ctx);
require Email::MIME;
my $mime = Email::MIME->new($x);
searcher($ctx);
[ 200, [ 'Content-Type' => 'text/html; charset=UTF-8' ],
- [ PublicInbox::View::msg_html($ctx, $mime, $pfx, $foot) ] ];
+ [ PublicInbox::View::msg_html($ctx, $mime, 'f/', $foot) ] ];
}
-# /$LISTNAME/f/$MESSAGE_ID.html -> HTML content (fullquotes)
+# /$LISTNAME/$MESSAGE_ID/f/ -> HTML content (fullquotes)
sub get_full_html {
my ($ctx) = @_;
my $x = mid2blob($ctx) or return r404();
@@ -200,7 +181,7 @@ sub get_full_html {
[ PublicInbox::View::msg_html($ctx, $mime, undef, $foot)] ];
}
-# /$LISTNAME/t/$MESSAGE_ID.html
+# /$LISTNAME/$MESSAGE_ID/t/
sub get_thread {
my ($ctx) = @_;
my $srch = searcher($ctx) or return need_search($ctx);
@@ -214,39 +195,6 @@ sub self_url {
ref($cgi) eq 'CGI' ? $cgi->self_url : $cgi->uri->as_string;
}
-sub redirect_list_index {
- my ($cgi) = @_;
- do_redirect(self_url($cgi) . "/");
-}
-
-sub redirect_mid {
- my ($ctx, $pfx, $old, $sfx) = @_;
- my $url = self_url($ctx->{cgi});
- my $anchor = '';
- if (lc($pfx) eq 't' && $sfx eq '/') {
- $anchor = '#u'; # <u id='#u'> is used to highlight in View.pm
- }
- $url =~ s/$old/$sfx/;
- do_redirect($url . $anchor);
-}
-
-# only hit when somebody tries to guess URLs manually:
-sub redirect_mid_txt {
- my ($ctx, $pfx) = @_;
- my $listname = $ctx->{listname};
- my $url = self_url($ctx->{cgi});
- $url =~ s!/$listname/f/(\S+\.txt)\z!/$listname/m/$1!;
- do_redirect($url);
-}
-
-sub do_redirect {
- my ($url) = @_;
- [ 301,
- [ Location => $url, 'Content-Type' => 'text/plain' ],
- [ "Redirecting to $url\n" ]
- ]
-}
-
sub ctx_get {
my ($ctx, $key) = @_;
my $val = $ctx->{$key};
@@ -333,14 +281,8 @@ EOF
[ 501, [ 'Content-Type' => 'text/html; charset=UTF-8' ], [ $msg ] ];
}
-sub msg_pfx {
- my ($ctx) = @_;
- my $href = PublicInbox::Hval::ascii_html(uri_escape_utf8($ctx->{mid}));
- "../../f/$href/";
-}
-
-# /$LISTNAME/t/$MESSAGE_ID/mbox -> thread as mbox
-# /$LISTNAME/t/$MESSAGE_ID/mbox.gz -> thread as gzipped mbox
+# /$LISTNAME/$MESSAGE_ID/t.mbox -> thread as mbox
+# /$LISTNAME/$MESSAGE_ID/t.mbox.gz -> thread as gzipped mbox
# note: I'm not a big fan of other compression formats since they're
# significantly more expensive on CPU than gzip and less-widely available,
# especially on older systems. Stick to zlib since that's what git uses.
@@ -352,7 +294,7 @@ sub get_thread_mbox {
}
-# /$LISTNAME/t/$MESSAGE_ID/atom -> thread as Atom feed
+# /$LISTNAME/$MESSAGE_ID/t.atom -> thread as Atom feed
sub get_thread_atom {
my ($ctx) = @_;
searcher($ctx) or return need_search($ctx);
@@ -361,4 +303,71 @@ sub get_thread_atom {
PublicInbox::Feed::generate_thread_atom($ctx);
}
+sub legacy_redirects {
+ my ($ctx, $path_info) = @_;
+
+ # single-message pages
+ if ($path_info =~ m!$LISTNAME_RE/m/(\S+)/\z!o) {
+ r301($ctx, $1, $2);
+ } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)/raw\z!o) {
+ r301($ctx, $1, $2, 'raw');
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)/\z!o) {
+ r301($ctx, $1, $2, 'f/');
+
+ # thread display
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/\z!o) {
+ r301($ctx, $1, $2, 't/#u');
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!o) {
+ r301($ctx, $1, $2, "t.mbox$3");
+
+ # even older legacy redirects
+ } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.html\z!o) {
+ r301($ctx, $1, $2);
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.html\z!o) {
+ r301($ctx, $1, $2, 't/#u');
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\.html\z!o) {
+ r301($ctx, $1, $2, 'f/');
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/(?:m|f)/(\S+)\.txt\z!o) {
+ r301($ctx, $1, $2, 'raw');
+
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)(\.mbox(?:\.gz)?)\z!o) {
+ r301($ctx, $1, $2, "t$3");
+
+ # legacy convenience redirects, order still matters
+ } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\z!o) {
+ r301($ctx, $1, $2);
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\z!o) {
+ r301($ctx, $1, $2, 't/#u');
+ } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\z!o) {
+ r301($ctx, $1, $2, 'f/');
+
+ } else {
+ r404();
+ }
+}
+
+sub r301 {
+ my ($ctx, $listname, $mid, $suffix) = @_;
+ my $cgi = $ctx->{cgi};
+ my $url;
+ if (ref($cgi) eq 'CGI') {
+ $url = $cgi->url(-base) . '/';
+ } else {
+ $url = $cgi->base->as_string;
+ }
+
+ $url .= $listname . '/';
+ $url .= (uri_escape_utf8($mid) . '/') if (defined $mid);
+ $url .= $suffix if (defined $suffix);
+
+ [ 301,
+ [ Location => $url, 'Content-Type' => 'text/plain' ],
+ [ "Redirecting to $url\n" ] ]
+}
+
1;
diff --git a/t/cgi.t b/t/cgi.t
index d84e634..a6600c2 100644
--- a/t/cgi.t
+++ b/t/cgi.t
@@ -109,7 +109,7 @@ EOF
like($res->{body}, qr/<title>test for public-inbox/,
"set title in XML feed");
like($res->{body},
- qr!http://test\.example\.com/test/m/blah%40example\.com!,
+ qr!http://test\.example\.com/test/blah%40example\.com/!,
"link id set");
like($res->{body}, qr/what\?/, "reply included");
}
@@ -152,26 +152,26 @@ EOF
}
local $ENV{GIT_DIR} = $maindir;
- my $res = cgi_run("/test/m/slashy%2fasdf%40example.com/raw");
+ my $res = cgi_run("/test/slashy%2fasdf%40example.com/raw");
like($res->{body}, qr/Message-Id: <\Q$slashy_mid\E>/,
"slashy mid raw hit");
- $res = cgi_run("/test/m/blahblah\@example.com/raw");
+ $res = cgi_run("/test/blahblah\@example.com/raw");
like($res->{body}, qr/Message-Id: <blahblah\@example\.com>/,
"mid raw hit");
- $res = cgi_run("/test/m/blahblah\@example.con/raw");
+ $res = cgi_run("/test/blahblah\@example.con/raw");
like($res->{head}, qr/Status: 404 Not Found/, "mid raw miss");
- $res = cgi_run("/test/m/blahblah\@example.com/");
+ $res = cgi_run("/test/blahblah\@example.com/");
like($res->{body}, qr/\A<html>/, "mid html hit");
like($res->{head}, qr/Status: 200 OK/, "200 response");
- $res = cgi_run("/test/m/blahblah\@example.con/");
+ $res = cgi_run("/test/blahblah\@example.con/");
like($res->{head}, qr/Status: 404 Not Found/, "mid html miss");
- $res = cgi_run("/test/f/blahblah\@example.com/");
+ $res = cgi_run("/test/blahblah\@example.com/f/");
like($res->{body}, qr/\A<html>/, "mid html");
like($res->{head}, qr/Status: 200 OK/, "200 response");
- $res = cgi_run("/test/f/blahblah\@example.con/");
+ $res = cgi_run("/test/blahblah\@example.con/f/");
like($res->{head}, qr/Status: 404 Not Found/, "mid html miss");
$res = cgi_run("/test/");
@@ -183,7 +183,7 @@ EOF
{
local $ENV{HOME} = $home;
local $ENV{PATH} = $main_path;
- my $path = "/test/t/blahblah%40example.com/mbox.gz";
+ my $path = "/test/blahblah%40example.com/t.mbox.gz";
my $res = cgi_run($path);
like($res->{head}, qr/^Status: 501 /, "search not-yet-enabled");
my $indexed = system($index, $maindir) == 0;
@@ -203,7 +203,7 @@ EOF
my $have_xml_feed = eval { require XML::Feed; 1 } if $indexed;
if ($have_xml_feed) {
- $path = "/test/t/blahblah%40example.com/atom";
+ $path = "/test/blahblah%40example.com/t.atom";
$res = cgi_run($path);
like($res->{head}, qr/^Status: 200 /, "atom returned 200");
like($res->{head}, qr!^Content-Type: application/xml!m,
diff --git a/t/feed.t b/t/feed.t
index a9955f0..e4ec752 100644
--- a/t/feed.t
+++ b/t/feed.t
@@ -77,7 +77,7 @@ EOF
}
unlike($feed, qr/drop me/, "long quoted text dropped");
- like($feed, qr!/f/\d%40example\.com/#q!,
+ like($feed, qr!/\d%40example\.com/f/#q!,
"/f/ url generated for long quoted text");
like($feed, qr/inline me here/, "short quoted text kept");
like($feed, qr/keep me/, "unquoted text saved");
diff --git a/t/plack.t b/t/plack.t
index 50c9e60..067a593 100644
--- a/t/plack.t
+++ b/t/plack.t
@@ -88,7 +88,7 @@ EOF
is(200, $res->code, 'success response received');
like($res->content, qr!href="\Q$atomurl\E"!,
'atom URL generated');
- like($res->content, qr!href="m/blah%40example\.com/"!,
+ like($res->content, qr!href="blah%40example\.com/"!,
'index generated');
});
@@ -98,14 +98,14 @@ EOF
my $res = $cb->(GET($pfx . '/atom.xml'));
is(200, $res->code, 'success response received for atom');
like($res->content,
- qr!link\s+href="\Q$pfx\E/m/blah%40example\.com/"!s,
+ qr!link\s+href="\Q$pfx\E/blah%40example\.com/"!s,
'atom feed generated correct URL');
});
- foreach my $t (qw(f m)) {
+ foreach my $t (('', 'f/')) {
test_psgi($app, sub {
my ($cb) = @_;
- my $path = "/$t/blah%40example.com/";
+ my $path = "/blah%40example.com/$t";
my $res = $cb->(GET($pfx . $path));
is(200, $res->code, "success for $path");
like($res->content, qr!<title>hihi - Me</title>!,
@@ -114,8 +114,8 @@ EOF
}
test_psgi($app, sub {
my ($cb) = @_;
- my $res = $cb->(GET($pfx . '/m/blah%40example.com/raw'));
- is(200, $res->code, 'success response received for /m/*/raw');
+ my $res = $cb->(GET($pfx . '/blah%40example.com/raw'));
+ is(200, $res->code, 'success response received for /*/raw');
like($res->content, qr!\AFrom !, "mbox returned");
});
@@ -126,18 +126,25 @@ EOF
my $res = $cb->(GET($pfx . "/$t/blah%40example.com.txt"));
is(301, $res->code, "redirect for old $t .txt link");
my $location = $res->header('Location');
- like($location, qr!/$t/blah%40example\.com/raw\z!,
+ like($location, qr!/blah%40example\.com/raw\z!,
".txt redirected to /raw");
});
}
- foreach my $t (qw(m f t)) {
+
+ my %umap = (
+ 'm' => '',
+ 'f' => 'f/',
+ 't' => 't/',
+ );
+ while (my ($t, $e) = each %umap) {
test_psgi($app, sub {
my ($cb) = @_;
my $res = $cb->(GET($pfx . "/$t/blah%40example.com.html"));
is(301, $res->code, "redirect for old $t .html link");
my $location = $res->header('Location');
- like($location, qr!/$t/blah%40example\.com/(?:#u)?\z!,
- ".html redirected to /raw");
+ like($location,
+ qr!/blah%40example\.com/$e(?:#u)?\z!,
+ ".html redirected to new location");
});
}
foreach my $sfx (qw(mbox mbox.gz)) {
@@ -146,8 +153,9 @@ EOF
my $res = $cb->(GET($pfx . "/t/blah%40example.com.$sfx"));
is(301, $res->code, 'redirect for old thread link');
my $location = $res->header('Location');
- like($location, qr!/t/blah%40example\.com/mbox\.gz\z!,
- "$sfx redirected to /mbox.gz");
+ like($location,
+ qr!/blah%40example\.com/t\.mbox(?:\.gz)?\z!,
+ "$sfx redirected to /mbox.gz");
});
}
}
diff --git a/t/view.t b/t/view.t
index 77cf3a3..83823d8 100644
--- a/t/view.t
+++ b/t/view.t
@@ -44,17 +44,17 @@ EOF
my $html = PublicInbox::View::msg_html(undef, $mime);
# ghetto tests
- like($html, qr!<a\nhref="\.\./\.\./m/hello%40!s, "MID link present");
+ like($html, qr!<a\nhref="\.\./raw"!s, "raw link present");
like($html, qr/hello world\b/, "body present");
like($html, qr/> keep this inline/, "short quoted text is inline");
like($html, qr/<a\nid=[^>]+><\/a>> Long and wordy/,
"long quoted text is anchored");
# short page
- my $pfx = "../../f/hello%40example.com/";
+ my $pfx = "../hello%40example.com/f/";
$mime = Email::MIME->new($s);
my $short = PublicInbox::View::msg_html(undef, $mime, $pfx);
- like($short, qr!<a\nhref="\.\./\.\./f/hello%40example\.com/!s,
+ like($short, qr!<a\nhref="\.\./hello%40example\.com/f/!s,
"MID link present");
like($short, qr/\n> keep this inline/,
"short quoted text is inline");
--
EW
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 10/11] view: drop extra '</a>' tag
2015-09-01 8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
` (7 preceding siblings ...)
2015-09-01 8:55 ` [PATCH 09/11] completely revamp URL structure to shorten permalinks Eric Wong
@ 2015-09-01 8:55 ` Eric Wong
2015-09-01 8:55 ` [PATCH 11/11] view: more robust link generation Eric Wong
9 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01 8:55 UTC (permalink / raw)
To: meta
Oops.
---
lib/PublicInbox/View.pm | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 2be16b4..45f559e 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -380,7 +380,7 @@ sub thread_inline {
my $nr = $res->{total};
if ($nr <= 1) {
- $$dst .= "\n[no followups, yet]</a>\n";
+ $$dst .= "\n[no followups, yet]\n";
return;
}
my $upfx = $full_pfx ? '' : '../';
--
EW
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 11/11] view: more robust link generation
2015-09-01 8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
` (8 preceding siblings ...)
2015-09-01 8:55 ` [PATCH 10/11] view: drop extra '</a>' tag Eric Wong
@ 2015-09-01 8:55 ` Eric Wong
2015-09-01 9:08 ` [PATCH 12/11] view: add missing space Eric Wong
9 siblings, 1 reply; 13+ messages in thread
From: Eric Wong @ 2015-09-01 8:55 UTC (permalink / raw)
To: meta
We must avoid double-escaping in cases where we have URLs anchored
by "<>" in the plain-text as is common (and AFAIK recommended)
convention. So we must use a two step linkification process
to prevent double-escaping.
---
lib/PublicInbox/View.pm | 62 +++++++++++++++++++++++++++++++++++++------------
1 file changed, 47 insertions(+), 15 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 45f559e..3d7ba6f 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -10,7 +10,8 @@ use Encode::MIME::Header;
use Email::MIME::ContentType qw/parse_content_type/;
use PublicInbox::Hval;
use PublicInbox::MID qw/mid_clean mid_compress mid2path/;
-use Digest::SHA;
+use Digest::SHA qw/sha1_hex/;
+my $SALT = rand;
require POSIX;
# TODO: make these constants tunable
@@ -235,10 +236,35 @@ my $LINK_RE = qr!\b((?:ftp|https?|nntp)://
[\@:\w\.-]+/
?[\@\w\+\&\?\.\%\;/#=-]*)!x;
-sub linkify {
- # no newlines added here since it'd break the splitting we do
- # to fold quotes
- $_[0] =~ s!$LINK_RE!<a\nhref="$1">$1</a>!g;
+sub linkify_1 {
+ my ($link_map, $s) = @_;
+ $s =~ s!$LINK_RE!
+ my $url = $1;
+ # salt this, as this could be exploited to show
+ # links in the HTML which don't show up in the raw mail.
+ my $key = sha1_hex($url . $SALT);
+ $link_map->{$key} = $url;
+ 'PI-LINK-'. $key;
+ !ge;
+ $s;
+}
+
+sub linkify_2 {
+ my ($link_map, $s) = @_;
+
+ # Added "PI-LINK-" prefix to avoid false-positives on git commits
+ $s =~ s!\bPI-LINK-([a-f0-9]{40})\b!
+ my $key = $1;
+ my $url = $link_map->{$key};
+ if (defined $url) {
+ $url = ascii_html($url);
+ "<a\nhref=\"$url\">$url</a>";
+ } else {
+ # false positive or somebody tried to mess with us
+ $key;
+ }
+ !ge;
+ $s;
}
sub flush_quote {
@@ -247,13 +273,15 @@ sub flush_quote {
if ($full_pfx) {
if (!$final && scalar(@$quot) <= MAX_INLINE_QUOTED) {
# show quote inline
- my $rv = join('', map { linkify($_); $_ } @$quot);
+ my %l;
+ my $rv = join('', map { linkify_1(\%l, $_) } @$quot);
@$quot = ();
- return $rv;
+ $rv = ascii_html($rv);
+ return linkify_2(\%l, $rv);
}
# show a short snippet of quoted text and link to full version:
- @$quot = map { s/^(?:>\s*)+//gm; $_ } @$quot;
+ @$quot = map { s/^(?:>\s*)+//gm; $_ } @$quot;
my $cur = join(' ', @$quot);
@$quot = split(/\s+/, $cur);
$cur = '';
@@ -268,16 +296,19 @@ sub flush_quote {
} while (@$quot && length($cur) < MAX_TRUNC_LEN);
@$quot = ();
$cur =~ s/ \z/ .../s;
+ $cur = ascii_html($cur);
my $nr = ++$$n;
"> [<a\nhref=\"$full_pfx#q${part_nr}_$nr\">$cur</a>]\n";
} else {
# show everything in the full version with anchor from
# short version (see above)
my $nr = ++$$n;
- my $rv = "<a\nid=q${part_nr}_$nr></a>";
- $rv .= join('', map { linkify($_); $_ } @$quot);
+ my $rv = "";
+ my %l;
+ $rv .= join('', map { linkify_1(\%l, $_) } @$quot);
@$quot = ();
- $rv;
+ $rv = ascii_html($rv);
+ "<a\nid=q${part_nr}_$nr></a>" . linkify_2(\%l, $rv);
}
}
@@ -297,7 +328,6 @@ sub add_text_body {
my $s = $part->body;
$part->body_set('');
$s = $enc->decode($s);
- $s = ascii_html($s);
my @lines = split(/^/m, $s);
$s = '';
@@ -309,7 +339,7 @@ sub add_text_body {
my @quot;
while (defined(my $cur = shift @lines)) {
- if ($cur !~ /^>/) {
+ if ($cur !~ /^>/) {
# show the previously buffered quote inline
if (scalar @quot) {
$s .= flush_quote(\@quot, \$n, $$part_nr,
@@ -317,8 +347,10 @@ sub add_text_body {
}
# regular line, OK
- linkify($cur);
- $s .= $cur;
+ my %l;
+ $cur = linkify_1(\%l, $cur);
+ $cur = ascii_html($cur);
+ $s .= linkify_2(\%l, $cur);
} else {
push @quot, $cur;
}
--
EW
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 12/11] view: add missing space
2015-09-01 8:55 ` [PATCH 11/11] view: more robust link generation Eric Wong
@ 2015-09-01 9:08 ` Eric Wong
0 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01 9:08 UTC (permalink / raw)
To: meta
This fixes a regression introduced in
commit 1b4b2c7b8b2f2df8f114617d2e875eaf5c839ce0
("completely revamp URL structure to shorten permalinks")
---
lib/PublicInbox/View.pm | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 3d7ba6f..29888f9 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -495,7 +495,7 @@ sub html_footer {
my $srch = $ctx->{srch} if $ctx;
my $upfx = $full_pfx ? '../' : '../../';
- my $idx = $standalone ? "<a\nhref=\"$upfx\">index</a>" : '';
+ my $idx = $standalone ? " <a\nhref=\"$upfx\">index</a>" : '';
if ($idx && $srch) {
my $next = thread_inline(\$idx, $ctx, $mime, $full_pfx);
$irt = $mime->header('In-Reply-To');
--
EW
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [13/11 PATCH] feed: fix <updated> tag in Atom feed
2015-09-01 8:55 ` [PATCH 06/11] implement per-thread Atom feeds Eric Wong
@ 2015-09-01 9:30 ` Eric Wong
0 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2015-09-01 9:30 UTC (permalink / raw)
To: meta
Fixes commit d44ed46ee92c78aaaed64975c4d6846613963be4
("implement per-thread Atom feeds")
---
lib/PublicInbox/Feed.pm | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 4420fde..9615880 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -331,6 +331,7 @@ sub add_to_feed {
my $date = $header_obj->header('Date');
$date = PublicInbox::Hval->new_oneline($date);
$date = feed_date($date->raw) or return 0;
+ $date = "<updated>$date</updated>";
my $title = mime_header($header_obj, 'Subject') or return 0;
$title = title_tag($title);
@@ -342,8 +343,7 @@ sub add_to_feed {
$email = PublicInbox::Hval->new_oneline($email)->as_html;
if (delete $feed_opts->{emit_header}) {
- $fh->write(atom_header($feed_opts, $title) .
- "<updated>$date</updated>");
+ $fh->write(atom_header($feed_opts, $title) . $date);
}
$fh->write("<entry><author><name>$name</name><email>$email</email>" .
"</author>$title$date" .
--
EW
^ permalink raw reply related [flat|nested] 13+ messages in thread
end of thread, other threads:[~2015-09-01 9:30 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-09-01 8:55 [PATCH 01/11] search: reduce redundant doc data Eric Wong
2015-09-01 8:55 ` [PATCH 02/11] search: allow querying all mail with '' Eric Wong
2015-09-01 8:55 ` [PATCH 03/11] search: show newest results first Eric Wong
2015-09-01 8:55 ` [PATCH 04/11] feed: use updated date based on git commit date Eric Wong
2015-09-01 8:55 ` [PATCH 05/11] feed: extract atom header generation Eric Wong
2015-09-01 8:55 ` [PATCH 06/11] implement per-thread Atom feeds Eric Wong
2015-09-01 9:30 ` [13/11 PATCH] feed: fix <updated> tag in Atom feed Eric Wong
2015-09-01 8:55 ` [PATCH 07/11] www: compile mbox regexp only once Eric Wong
2015-09-01 8:55 ` [PATCH 08/11] www: root atom feed is "new.atom" and not "atom.xml" Eric Wong
2015-09-01 8:55 ` [PATCH 09/11] completely revamp URL structure to shorten permalinks Eric Wong
2015-09-01 8:55 ` [PATCH 10/11] view: drop extra '</a>' tag Eric Wong
2015-09-01 8:55 ` [PATCH 11/11] view: more robust link generation Eric Wong
2015-09-01 9:08 ` [PATCH 12/11] view: add missing space Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).