unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] support dumping thread as an mbox
@ 2015-08-21 10:26 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2015-08-21 10:26 UTC (permalink / raw)
  To: meta

Some folks may not want to download and install Perl code like
ssoma, so allow downloading an mbox containing the entire
thread.
---
 lib/PublicInbox/Mbox.pm | 55 +++++++++++++++++++++++++++++++++++++++++++++++++
 lib/PublicInbox/WWW.pm  | 11 ++++++++++
 public-inbox.cgi        | 43 ++++++++++++++++++++++++++------------
 t/cgi.t                 | 19 +++++++++++++++++
 4 files changed, 115 insertions(+), 13 deletions(-)
 create mode 100644 lib/PublicInbox/Mbox.pm

diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
new file mode 100644
index 0000000..2ec5065
--- /dev/null
+++ b/lib/PublicInbox/Mbox.pm
@@ -0,0 +1,55 @@
+# Copyright (C) 2015, all contributors <meta@public-inbox.org>
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+# Streaming interface for formatting messages as an mbox
+package PublicInbox::Mbox;
+use strict;
+use warnings;
+use PublicInbox::MID qw/mid_clean mid_compressed mid2path/;
+use Fcntl qw(SEEK_SET);
+
+sub thread_mbox {
+	my ($ctx, $srch) = @_;
+	my $mid = mid_compressed($ctx->{mid});
+	my $res = $srch->get_thread($mid);
+	my $msgs = delete $res->{msgs};
+	require PublicInbox::GitCatFile;
+	require Email::Simple;
+	my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
+
+	sub {
+		my ($res) = @_; # Plack callback
+		my $w = $res->([200, [ 'Content-Type' => 'text/plain' ] ]);
+		while (defined(my $smsg = shift @$msgs)) {
+			my $msg = eval {
+				my $path = 'HEAD:' . mid2path($smsg->mid);
+				Email::Simple->new($git->cat_file($path));
+			};
+			emit($w, $msg) if $msg;
+		}
+	}
+}
+
+sub emit {
+	my ($fh, $simple) = @_; # Email::Simple object
+
+	# drop potentially confusing headers, ssoma already should've dropped
+	# Lines and Content-Length
+	foreach my $d (qw(Lines Content-Length Status)) {
+		$simple->header_set($d);
+	}
+
+	my $buf = $simple->header_obj->as_string;
+	unless ($buf =~ /\AFrom /) {
+		$fh->write("From a\@a Thu Jan  1 00:00:00 1970\n");
+	}
+	$fh->write($buf .= $simple->crlf);
+
+	$buf = $simple->body;
+	$simple->body_set('');
+	$buf =~ s/^(From )/>$1/gm;
+	$buf .= "\n" unless $buf =~ /\n\z/s;
+
+	$fh->write($buf);
+}
+
+1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index b0c1348..cd8a570 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -53,6 +53,9 @@ sub run {
 	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.html\z!o) {
 		invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx, $cgi);
 
+	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.mbox\z!o) {
+		invalid_list_mid(\%ctx, $1, $2) || get_thread_mbox(\%ctx, $cgi);
+
 	} elsif ($path_info =~ m!$LISTNAME_RE/f/\S+\.txt\z!o) {
 		invalid_list_mid(\%ctx, $1, $2) ||
 			redirect_mid_txt(\%ctx, $cgi);
@@ -326,4 +329,12 @@ sub msg_pfx {
 	"../f/$href.html";
 }
 
+# /$LISTNAME/t/$MESSAGE_ID.mbox                    -> search results as mbox
+sub get_thread_mbox {
+	my ($ctx, $cgi) = @_;
+	my $srch = searcher($ctx) or return need_search($ctx);
+	require PublicInbox::Mbox;
+	PublicInbox::Mbox::thread_mbox($ctx, $srch);
+}
+
 1;
diff --git a/public-inbox.cgi b/public-inbox.cgi
index ed0f12c..5c6dfe8 100755
--- a/public-inbox.cgi
+++ b/public-inbox.cgi
@@ -3,12 +3,22 @@
 # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
 use strict;
 use warnings;
+use IO::Handle;
 require PublicInbox::WWW;
 use CGI qw/-nosticky/;
 our $NO_SCRIPT_NAME;
+our %HTTP_CODES;
 BEGIN {
 	$NO_SCRIPT_NAME = 1 if $ENV{NO_SCRIPT_NAME};
 	CGI->compile if $ENV{MOD_PERL};
+
+	%HTTP_CODES = (
+		200 => 'OK',
+		301 => 'Moved Permanently',
+		404 => 'Not Found',
+		405 => 'Method Not Allowed',
+		501 => 'Not Implemented',
+	);
 }
 
 # some servers (Ruby webrick) include scheme://host[:port] here,
@@ -19,23 +29,30 @@ delete $ENV{REQUEST_URI};
 $ENV{SCRIPT_NAME} = '' if $NO_SCRIPT_NAME;
 my $req = CGI->new;
 my $ret = PublicInbox::WWW::run($req, $req->request_method);
-binmode STDOUT;
+
+my $out = select;
+$out->binmode;
 if (@ARGV && $ARGV[0] eq 'static') {
-	print $ret->[2]->[0]; # only show the body
+	$out->write($ret->[2]->[0]); # only show the body
 } else { # CGI
-	my ($status, $headers, $body) = @$ret;
-	my %codes = (
-		200 => 'OK',
-		301 => 'Moved Permanently',
-		404 => 'Not Found',
-		405 => 'Method Not Allowed',
-		501 => 'Not Implemented',
-	);
+	if (ref($ret) eq 'CODE') {
+		$ret->(*dump_header);
+	} else {
+		my ($status, $headers, $body) = @$ret;
+
+		dump_header([$status, $headers])->write($body->[0]);
+	}
+}
 
-	print "Status: $status $codes{$status}\r\n";
+sub dump_header {
+	my ($res) = @_;
+	my $fh = select;
+	my ($status, $headers) = @$res;
+	$fh->write("Status: $status $HTTP_CODES{$status}\r\n");
 	my @tmp = @$headers;
 	while (my ($k, $v) = splice(@tmp, 0, 2)) {
-		print "$k: $v\r\n";
+		$fh->write("$k: $v\r\n");
 	}
-	print "\r\n", $body->[0];
+	$fh->write("\r\n");
+	$fh;
 }
diff --git a/t/cgi.t b/t/cgi.t
index a16ebd8..2747a15 100644
--- a/t/cgi.t
+++ b/t/cgi.t
@@ -10,6 +10,7 @@ use IPC::Run qw/run/;
 
 use constant CGI => "blib/script/public-inbox.cgi";
 my $mda = "blib/script/public-inbox-mda";
+my $index = "blib/script/public-inbox-index";
 my $tmpdir = tempdir(CLEANUP => 1);
 my $home = "$tmpdir/pi-home";
 my $pi_home = "$home/.public-inbox";
@@ -178,6 +179,24 @@ EOF
 		"slashy URL generated correctly");
 }
 
+# retrieve thread as an mbox
+{
+	local $ENV{HOME} = $home;
+	local $ENV{PATH} = $main_path;
+	my $path = "/test/t/blahblah%40example.com.mbox";
+	my $res = cgi_run($path);
+	like($res->{head}, qr/^Status: 501 /, "search not-yet-enabled");
+	my $indexed = system($index, $maindir) == 0;
+	if ($indexed) {
+		$res = cgi_run($path);
+		# use Data::Dumper; print STDERR Dumper($res);
+		like($res->{head}, qr/^Status: 200 /, "search returned mbox");
+		like($res->{body}, qr/^From /m, "From lines in mbox");
+	} else {
+		like($res->{head}, qr/^Status: 501 /, "search not available");
+	}
+}
+
 # redirect list-name-only URLs
 {
 	local $ENV{HOME} = $home;
-- 
EW


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2015-08-21 10:26 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-08-21 10:26 [PATCH] support dumping thread as an mbox Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).