unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] www: attempt to handle Message-IDs with slashes
@ 2015-09-03  4:43 Eric Wong
  2015-09-03  7:41 ` [PATCH 2/1] " Eric Wong
  0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2015-09-03  4:43 UTC (permalink / raw)
  To: meta

Unfortunately, some HTTP servers will try to be clever
with %2F and escape it to '/', making life difficult for
us.  Fortunately, not many Message-IDs have slashes in
them.
---
 lib/PublicInbox/WWW.pm | 57 ++++++++++++++++++++++++++------------------------
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 11b5402..6eebf62 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -17,12 +17,13 @@ use constant SSOMA_URL => 'http://ssoma.public-inbox.org/';
 use constant PI_URL => 'http://public-inbox.org/';
 our $LISTNAME_RE = qr!\A/([\w\.\-]+)!;
 our $MID_RE = qr!([^/]+)!;
+our $END_RE = qr!(f/|T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
 our $pi_config;
 
 sub run {
 	my ($cgi, $method) = @_;
 	$pi_config ||= PublicInbox::Config->new;
-	my %ctx = (cgi => $cgi, pi_config => $pi_config);
+	my $ctx = { cgi => $cgi, pi_config => $pi_config };
 	if ($method !~ /\AGET|HEAD\z/) {
 		return r(405, 'Method Not Allowed');
 	}
@@ -32,40 +33,26 @@ sub run {
 	if ($path_info eq '/') {
 		r404();
 	} elsif ($path_info =~ m!$LISTNAME_RE\z!o) {
-		invalid_list(\%ctx, $1) || r301(\%ctx, $1);
+		invalid_list($ctx, $1) || r301($ctx, $1);
 	} elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) {
-		invalid_list(\%ctx, $1) || get_index(\%ctx);
+		invalid_list($ctx, $1) || get_index($ctx);
 	} elsif ($path_info =~ m!$LISTNAME_RE/(?:atom\.xml|new\.atom)\z!o) {
-		invalid_list(\%ctx, $1) || get_atom(\%ctx);
+		invalid_list($ctx, $1) || get_atom($ctx);
 
-	# thread display
-	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t/\z!o) {
-		invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
-	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t\.mbox(\.gz)?\z!o) {
-		my $sfx = $3;
-		invalid_list_mid(\%ctx, $1, $2) || get_thread_mbox(\%ctx, $sfx);
-	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t\.atom\z!o) {
-		invalid_list_mid(\%ctx, $1, $2) || get_thread_atom(\%ctx);
-	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/T/\z!o) {
-		$ctx{flat} = 1;
-		invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
-
-	# single-message pages
-	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/\z!o) {
-		invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx);
-	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/raw\z!o) {
-		invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx);
+	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/$END_RE\z!o) {
+		msg_page($ctx, $1, $2, $3);
 
-	# full-message page
-	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/f/\z!o) {
-		invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx);
+	# some Message-IDs have slashes in them and the HTTP server
+	# may try to be clever and unescape them :<
+	} elsif ($path_info =~ m!$LISTNAME_RE/(\S+/\S+)/$END_RE\z!o) {
+		msg_page($ctx, $1, $2, $3);
 
 	# convenience redirects order matters
 	} elsif ($path_info =~ m!$LISTNAME_RE/([^/]{2,})\z!o) {
-		r301(\%ctx, $1, $2);
+		r301($ctx, $1, $2);
 
 	} else {
-		legacy_redirects(\%ctx, $path_info);
+		legacy_redirects($ctx, $path_info);
 	}
 }
 
@@ -203,10 +190,11 @@ sub get_full_html {
 
 # /$LISTNAME/$MESSAGE_ID/t/
 sub get_thread {
-	my ($ctx) = @_;
+	my ($ctx, $flat) = @_;
 	my $srch = searcher($ctx) or return need_search($ctx);
 	require PublicInbox::View;
 	my $foot = footer($ctx);
+	$ctx->{flat} = $flat;
 	PublicInbox::View::thread_html($ctx, $foot, $srch);
 }
 
@@ -390,4 +378,19 @@ sub r301 {
 	  [ "Redirecting to $url\n" ] ]
 }
 
+sub msg_page {
+	my ($ctx, $list, $mid, $e) = @_;
+	unless (invalid_list_mid($ctx, $list, $mid)) {
+		'' eq $e and return get_mid_html($ctx);
+		't/' eq $e and return get_thread($ctx);
+		't.atom' eq $e and return get_thread_atom($ctx);
+		't.mbox' eq $e and return get_thread_mbox($ctx);
+		't.mbox.gz' eq $e and return get_thread_mbox($ctx, '.gz');
+		'T/' eq $e and return get_thread($ctx, 1);
+		'raw' eq $e and return get_mid_txt($ctx);
+		'f/' eq $e and return get_full_html($ctx);
+	}
+	r404($ctx);
+}
+
 1;
-- 
EW


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH 2/1] www: attempt to handle Message-IDs with slashes
  2015-09-03  4:43 [PATCH] www: attempt to handle Message-IDs with slashes Eric Wong
@ 2015-09-03  7:41 ` Eric Wong
  0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2015-09-03  7:41 UTC (permalink / raw)
  To: meta

Unfortunately, some HTTP servers will try to be clever
with %2F and escape it to '/', making life difficult for
us.  Fortunately, not many Message-IDs have slashes in
them.
---
  Whee! Fix one problem and five more pop up!

 lib/PublicInbox/WWW.pm | 57 ++++++++++++++++++++++++++------------------------
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 11b5402..6eebf62 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -17,12 +17,13 @@ use constant SSOMA_URL => 'http://ssoma.public-inbox.org/';
 use constant PI_URL => 'http://public-inbox.org/';
 our $LISTNAME_RE = qr!\A/([\w\.\-]+)!;
 our $MID_RE = qr!([^/]+)!;
+our $END_RE = qr!(f/|T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
 our $pi_config;
 
 sub run {
 	my ($cgi, $method) = @_;
 	$pi_config ||= PublicInbox::Config->new;
-	my %ctx = (cgi => $cgi, pi_config => $pi_config);
+	my $ctx = { cgi => $cgi, pi_config => $pi_config };
 	if ($method !~ /\AGET|HEAD\z/) {
 		return r(405, 'Method Not Allowed');
 	}
@@ -32,40 +33,26 @@ sub run {
 	if ($path_info eq '/') {
 		r404();
 	} elsif ($path_info =~ m!$LISTNAME_RE\z!o) {
-		invalid_list(\%ctx, $1) || r301(\%ctx, $1);
+		invalid_list($ctx, $1) || r301($ctx, $1);
 	} elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) {
-		invalid_list(\%ctx, $1) || get_index(\%ctx);
+		invalid_list($ctx, $1) || get_index($ctx);
 	} elsif ($path_info =~ m!$LISTNAME_RE/(?:atom\.xml|new\.atom)\z!o) {
-		invalid_list(\%ctx, $1) || get_atom(\%ctx);
+		invalid_list($ctx, $1) || get_atom($ctx);
 
-	# thread display
-	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t/\z!o) {
-		invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
-	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t\.mbox(\.gz)?\z!o) {
-		my $sfx = $3;
-		invalid_list_mid(\%ctx, $1, $2) || get_thread_mbox(\%ctx, $sfx);
-	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t\.atom\z!o) {
-		invalid_list_mid(\%ctx, $1, $2) || get_thread_atom(\%ctx);
-	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/T/\z!o) {
-		$ctx{flat} = 1;
-		invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
-
-	# single-message pages
-	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/\z!o) {
-		invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx);
-	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/raw\z!o) {
-		invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx);
+	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/$END_RE\z!o) {
+		msg_page($ctx, $1, $2, $3);
 
-	# full-message page
-	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/f/\z!o) {
-		invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx);
+	# some Message-IDs have slashes in them and the HTTP server
+	# may try to be clever and unescape them :<
+	} elsif ($path_info =~ m!$LISTNAME_RE/(\S+/\S+)/$END_RE\z!o) {
+		msg_page($ctx, $1, $2, $3);
 
 	# convenience redirects order matters
 	} elsif ($path_info =~ m!$LISTNAME_RE/([^/]{2,})\z!o) {
-		r301(\%ctx, $1, $2);
+		r301($ctx, $1, $2);
 
 	} else {
-		legacy_redirects(\%ctx, $path_info);
+		legacy_redirects($ctx, $path_info);
 	}
 }
 
@@ -203,10 +190,11 @@ sub get_full_html {
 
 # /$LISTNAME/$MESSAGE_ID/t/
 sub get_thread {
-	my ($ctx) = @_;
+	my ($ctx, $flat) = @_;
 	my $srch = searcher($ctx) or return need_search($ctx);
 	require PublicInbox::View;
 	my $foot = footer($ctx);
+	$ctx->{flat} = $flat;
 	PublicInbox::View::thread_html($ctx, $foot, $srch);
 }
 
@@ -390,4 +378,19 @@ sub r301 {
 	  [ "Redirecting to $url\n" ] ]
 }
 
+sub msg_page {
+	my ($ctx, $list, $mid, $e) = @_;
+	unless (invalid_list_mid($ctx, $list, $mid)) {
+		'' eq $e and return get_mid_html($ctx);
+		't/' eq $e and return get_thread($ctx);
+		't.atom' eq $e and return get_thread_atom($ctx);
+		't.mbox' eq $e and return get_thread_mbox($ctx);
+		't.mbox.gz' eq $e and return get_thread_mbox($ctx, '.gz');
+		'T/' eq $e and return get_thread($ctx, 1);
+		'raw' eq $e and return get_mid_txt($ctx);
+		'f/' eq $e and return get_full_html($ctx);
+	}
+	r404($ctx);
+}
+
 1;
-- 
EW


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2015-09-03  7:41 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-09-03  4:43 [PATCH] www: attempt to handle Message-IDs with slashes Eric Wong
2015-09-03  7:41 ` [PATCH 2/1] " Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).