From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-2.9 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, T_RP_MATCHES_RCVD,URIBL_BLOCKED shortcircuit=no autolearn=unavailable version=3.3.2 X-Original-To: meta@public-inbox.org Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 0F0FB200F5 for ; Thu, 3 Sep 2015 04:43:14 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] www: attempt to handle Message-IDs with slashes Date: Thu, 3 Sep 2015 04:43:13 +0000 Message-Id: <1441255393-15621-1-git-send-email-e@80x24.org> List-Id: Unfortunately, some HTTP servers will try to be clever with %2F and escape it to '/', making life difficult for us. Fortunately, not many Message-IDs have slashes in them. --- lib/PublicInbox/WWW.pm | 57 ++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 11b5402..6eebf62 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -17,12 +17,13 @@ use constant SSOMA_URL => 'http://ssoma.public-inbox.org/'; use constant PI_URL => 'http://public-inbox.org/'; our $LISTNAME_RE = qr!\A/([\w\.\-]+)!; our $MID_RE = qr!([^/]+)!; +our $END_RE = qr!(f/|T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; our $pi_config; sub run { my ($cgi, $method) = @_; $pi_config ||= PublicInbox::Config->new; - my %ctx = (cgi => $cgi, pi_config => $pi_config); + my $ctx = { cgi => $cgi, pi_config => $pi_config }; if ($method !~ /\AGET|HEAD\z/) { return r(405, 'Method Not Allowed'); } @@ -32,40 +33,26 @@ sub run { if ($path_info eq '/') { r404(); } elsif ($path_info =~ m!$LISTNAME_RE\z!o) { - invalid_list(\%ctx, $1) || r301(\%ctx, $1); + invalid_list($ctx, $1) || r301($ctx, $1); } elsif ($path_info =~ m!$LISTNAME_RE(?:/|/index\.html)?\z!o) { - invalid_list(\%ctx, $1) || get_index(\%ctx); + invalid_list($ctx, $1) || get_index($ctx); } elsif ($path_info =~ m!$LISTNAME_RE/(?:atom\.xml|new\.atom)\z!o) { - invalid_list(\%ctx, $1) || get_atom(\%ctx); + invalid_list($ctx, $1) || get_atom($ctx); - # thread display - } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t/\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx); - } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t\.mbox(\.gz)?\z!o) { - my $sfx = $3; - invalid_list_mid(\%ctx, $1, $2) || get_thread_mbox(\%ctx, $sfx); - } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/t\.atom\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_thread_atom(\%ctx); - } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/T/\z!o) { - $ctx{flat} = 1; - invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx); - - # single-message pages - } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx); - } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/raw\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx); + } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/$END_RE\z!o) { + msg_page($ctx, $1, $2, $3); - # full-message page - } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/f/\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx); + # some Message-IDs have slashes in them and the HTTP server + # may try to be clever and unescape them :< + } elsif ($path_info =~ m!$LISTNAME_RE/(\S+/\S+)/$END_RE\z!o) { + msg_page($ctx, $1, $2, $3); # convenience redirects order matters } elsif ($path_info =~ m!$LISTNAME_RE/([^/]{2,})\z!o) { - r301(\%ctx, $1, $2); + r301($ctx, $1, $2); } else { - legacy_redirects(\%ctx, $path_info); + legacy_redirects($ctx, $path_info); } } @@ -203,10 +190,11 @@ sub get_full_html { # /$LISTNAME/$MESSAGE_ID/t/ sub get_thread { - my ($ctx) = @_; + my ($ctx, $flat) = @_; my $srch = searcher($ctx) or return need_search($ctx); require PublicInbox::View; my $foot = footer($ctx); + $ctx->{flat} = $flat; PublicInbox::View::thread_html($ctx, $foot, $srch); } @@ -390,4 +378,19 @@ sub r301 { [ "Redirecting to $url\n" ] ] } +sub msg_page { + my ($ctx, $list, $mid, $e) = @_; + unless (invalid_list_mid($ctx, $list, $mid)) { + '' eq $e and return get_mid_html($ctx); + 't/' eq $e and return get_thread($ctx); + 't.atom' eq $e and return get_thread_atom($ctx); + 't.mbox' eq $e and return get_thread_mbox($ctx); + 't.mbox.gz' eq $e and return get_thread_mbox($ctx, '.gz'); + 'T/' eq $e and return get_thread($ctx, 1); + 'raw' eq $e and return get_mid_txt($ctx); + 'f/' eq $e and return get_full_html($ctx); + } + r404($ctx); +} + 1; -- EW