* [PATCH] www: attempt to handle Message-IDs with slashes
@ 2015-09-03 4:43 Eric Wong
2015-09-03 7:41 ` [PATCH 2/1] " Eric Wong
0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2015-09-03 4:43 UTC (permalink / raw)
To: meta
Unfortunately, some HTTP servers will try to be clever
with %2F and escape it to '/', making life difficult for
us. Fortunately, not many Message-IDs have slashes in
them.
---
lib/PublicInbox/WWW.pm | 57 ++++++++++++++++++++++++++------------------------
1 file changed, 30 insertions(+), 27 deletions(-)
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 11b5402..6eebf62 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -17,12 +17,13 @@ use constant SSOMA_URL => 'http://ssoma.public-inbox.org/';
use constant PI_URL => 'http://public-inbox.org/';
our $LISTNAME_RE = qr!\A/([\w\.\-]+)!;
our $MID_RE = qr!([^/]+)!;
+our $END_RE = qr!(f/|T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
our $pi_config;
sub run {
my ($cgi, $method) = @_;
$pi_config ||= PublicInbox::Config->new;
- my %ctx = (cgi => $cgi, pi_config => $pi_config);
+ my $ctx = { cgi => $cgi, pi_config => $pi_config };
if ($method !~ /\AGET|HEAD\z/) {
return r(405, 'Method Not Allowed');
}
@@ -32,40 +33,26 @@ sub run {
if ($path_info eq '/') {
r404();
} elsif ($path_info =~ m!$LISTNAME_RE\z!o) {
- invalid_list(\%ctx, $1) || r301(\%ctx, $1);
+ invalid_list($ctx, $1) || r301($ctx, $1);
} elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) {
- invalid_list(\%ctx, $1) || get_index(\%ctx);
+ invalid_list($ctx, $1) || get_index($ctx);
} elsif ($path_info =~ m!$LISTNAME_RE/(?:atom\.xml|new\.atom)\z!o) {
- invalid_list(\%ctx, $1) || get_atom(\%ctx);
+ invalid_list($ctx, $1) || get_atom($ctx);
- # thread display
- } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t/\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
- } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t\.mbox(\.gz)?\z!o) {
- my $sfx = $3;
- invalid_list_mid(\%ctx, $1, $2) || get_thread_mbox(\%ctx, $sfx);
- } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t\.atom\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_thread_atom(\%ctx);
- } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/T/\z!o) {
- $ctx{flat} = 1;
- invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
-
- # single-message pages
- } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx);
- } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/raw\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx);
+ } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/$END_RE\z!o) {
+ msg_page($ctx, $1, $2, $3);
- # full-message page
- } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/f/\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx);
+ # some Message-IDs have slashes in them and the HTTP server
+ # may try to be clever and unescape them :<
+ } elsif ($path_info =~ m!$LISTNAME_RE/(\S+/\S+)/$END_RE\z!o) {
+ msg_page($ctx, $1, $2, $3);
# convenience redirects order matters
} elsif ($path_info =~ m!$LISTNAME_RE/([^/]{2,})\z!o) {
- r301(\%ctx, $1, $2);
+ r301($ctx, $1, $2);
} else {
- legacy_redirects(\%ctx, $path_info);
+ legacy_redirects($ctx, $path_info);
}
}
@@ -203,10 +190,11 @@ sub get_full_html {
# /$LISTNAME/$MESSAGE_ID/t/
sub get_thread {
- my ($ctx) = @_;
+ my ($ctx, $flat) = @_;
my $srch = searcher($ctx) or return need_search($ctx);
require PublicInbox::View;
my $foot = footer($ctx);
+ $ctx->{flat} = $flat;
PublicInbox::View::thread_html($ctx, $foot, $srch);
}
@@ -390,4 +378,19 @@ sub r301 {
[ "Redirecting to $url\n" ] ]
}
+sub msg_page {
+ my ($ctx, $list, $mid, $e) = @_;
+ unless (invalid_list_mid($ctx, $list, $mid)) {
+ '' eq $e and return get_mid_html($ctx);
+ 't/' eq $e and return get_thread($ctx);
+ 't.atom' eq $e and return get_thread_atom($ctx);
+ 't.mbox' eq $e and return get_thread_mbox($ctx);
+ 't.mbox.gz' eq $e and return get_thread_mbox($ctx, '.gz');
+ 'T/' eq $e and return get_thread($ctx, 1);
+ 'raw' eq $e and return get_mid_txt($ctx);
+ 'f/' eq $e and return get_full_html($ctx);
+ }
+ r404($ctx);
+}
+
1;
--
EW
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH 2/1] www: attempt to handle Message-IDs with slashes
2015-09-03 4:43 [PATCH] www: attempt to handle Message-IDs with slashes Eric Wong
@ 2015-09-03 7:41 ` Eric Wong
0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2015-09-03 7:41 UTC (permalink / raw)
To: meta
Unfortunately, some HTTP servers will try to be clever
with %2F and escape it to '/', making life difficult for
us. Fortunately, not many Message-IDs have slashes in
them.
---
Whee! Fix one problem and five more pop up!
lib/PublicInbox/WWW.pm | 57 ++++++++++++++++++++++++++------------------------
1 file changed, 30 insertions(+), 27 deletions(-)
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 11b5402..6eebf62 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -17,12 +17,13 @@ use constant SSOMA_URL => 'http://ssoma.public-inbox.org/';
use constant PI_URL => 'http://public-inbox.org/';
our $LISTNAME_RE = qr!\A/([\w\.\-]+)!;
our $MID_RE = qr!([^/]+)!;
+our $END_RE = qr!(f/|T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
our $pi_config;
sub run {
my ($cgi, $method) = @_;
$pi_config ||= PublicInbox::Config->new;
- my %ctx = (cgi => $cgi, pi_config => $pi_config);
+ my $ctx = { cgi => $cgi, pi_config => $pi_config };
if ($method !~ /\AGET|HEAD\z/) {
return r(405, 'Method Not Allowed');
}
@@ -32,40 +33,26 @@ sub run {
if ($path_info eq '/') {
r404();
} elsif ($path_info =~ m!$LISTNAME_RE\z!o) {
- invalid_list(\%ctx, $1) || r301(\%ctx, $1);
+ invalid_list($ctx, $1) || r301($ctx, $1);
} elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) {
- invalid_list(\%ctx, $1) || get_index(\%ctx);
+ invalid_list($ctx, $1) || get_index($ctx);
} elsif ($path_info =~ m!$LISTNAME_RE/(?:atom\.xml|new\.atom)\z!o) {
- invalid_list(\%ctx, $1) || get_atom(\%ctx);
+ invalid_list($ctx, $1) || get_atom($ctx);
- # thread display
- } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t/\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
- } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t\.mbox(\.gz)?\z!o) {
- my $sfx = $3;
- invalid_list_mid(\%ctx, $1, $2) || get_thread_mbox(\%ctx, $sfx);
- } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t\.atom\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_thread_atom(\%ctx);
- } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/T/\z!o) {
- $ctx{flat} = 1;
- invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
-
- # single-message pages
- } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx);
- } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/raw\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx);
+ } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/$END_RE\z!o) {
+ msg_page($ctx, $1, $2, $3);
- # full-message page
- } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/f/\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx);
+ # some Message-IDs have slashes in them and the HTTP server
+ # may try to be clever and unescape them :<
+ } elsif ($path_info =~ m!$LISTNAME_RE/(\S+/\S+)/$END_RE\z!o) {
+ msg_page($ctx, $1, $2, $3);
# convenience redirects order matters
} elsif ($path_info =~ m!$LISTNAME_RE/([^/]{2,})\z!o) {
- r301(\%ctx, $1, $2);
+ r301($ctx, $1, $2);
} else {
- legacy_redirects(\%ctx, $path_info);
+ legacy_redirects($ctx, $path_info);
}
}
@@ -203,10 +190,11 @@ sub get_full_html {
# /$LISTNAME/$MESSAGE_ID/t/
sub get_thread {
- my ($ctx) = @_;
+ my ($ctx, $flat) = @_;
my $srch = searcher($ctx) or return need_search($ctx);
require PublicInbox::View;
my $foot = footer($ctx);
+ $ctx->{flat} = $flat;
PublicInbox::View::thread_html($ctx, $foot, $srch);
}
@@ -390,4 +378,19 @@ sub r301 {
[ "Redirecting to $url\n" ] ]
}
+sub msg_page {
+ my ($ctx, $list, $mid, $e) = @_;
+ unless (invalid_list_mid($ctx, $list, $mid)) {
+ '' eq $e and return get_mid_html($ctx);
+ 't/' eq $e and return get_thread($ctx);
+ 't.atom' eq $e and return get_thread_atom($ctx);
+ 't.mbox' eq $e and return get_thread_mbox($ctx);
+ 't.mbox.gz' eq $e and return get_thread_mbox($ctx, '.gz');
+ 'T/' eq $e and return get_thread($ctx, 1);
+ 'raw' eq $e and return get_mid_txt($ctx);
+ 'f/' eq $e and return get_full_html($ctx);
+ }
+ r404($ctx);
+}
+
1;
--
EW
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2015-09-03 7:41 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-09-03 4:43 [PATCH] www: attempt to handle Message-IDs with slashes Eric Wong
2015-09-03 7:41 ` [PATCH 2/1] " Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).