unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 3/6] www: support downloading attachments
Date: Thu, 19 May 2016 21:28:44 +0000	[thread overview]
Message-ID: <20160519212847.4822-3-e@80x24.org> (raw)
In-Reply-To: <20160519212847.4822-1-e@80x24.org>

This can be useful for lists where the convention is to
attach (rather than inline) patches into the message body.
---
 lib/PublicInbox/Feed.pm      |  6 +++---
 lib/PublicInbox/View.pm      | 39 ++++++++++++++++++++++---------------
 lib/PublicInbox/WWW.pm       | 11 +++++++++++
 lib/PublicInbox/WwwAttach.pm | 46 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 83 insertions(+), 19 deletions(-)
 create mode 100644 lib/PublicInbox/WwwAttach.pm

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 0b864c2..e2df97b 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -315,10 +315,10 @@ sub add_to_feed {
 	my $mid = $header_obj->header_raw('Message-ID');
 	defined $mid or return 0;
 	$mid = PublicInbox::Hval->new_msgid($mid);
-	my $href = $mid->as_href;
+	my $href = $midurl.$mid->as_href;
 
 	my $content = qq(<pre\nstyle="white-space:pre-wrap">) .
-		PublicInbox::View::multipart_text_as_html($mime) .
+		PublicInbox::View::multipart_text_as_html($mime, $href) .
 		'</pre>';
 	my $date = $header_obj->header('Date');
 	my $updated = feed_updated($date);
@@ -346,7 +346,7 @@ sub add_to_feed {
 	my $h = '[a-f0-9]';
 	my (@uuid5) = ($add =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o);
 	my $id = 'urn:uuid:' . join('-', @uuid5);
-	$fh->write(qq!</div></content><link\nhref="$midurl$href/"/>!.
+	$fh->write(qq!</div></content><link\nhref="$href/"/>!.
 		   "<id>$id</id></entry>");
 	1;
 }
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 4260167..326da4c 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -10,7 +10,6 @@ use URI::Escape qw/uri_escape_utf8/;
 use Date::Parse qw/str2time/;
 use Encode qw/find_encoding/;
 use Encode::MIME::Header;
-use Email::MIME::ContentType qw/parse_content_type/;
 use PublicInbox::Hval qw/ascii_html/;
 use PublicInbox::Linkify;
 use PublicInbox::MID qw/mid_clean id_compress mid2path mid_mime/;
@@ -27,7 +26,7 @@ sub msg_html {
 	$footer = defined($footer) ? "\n$footer" : '';
 	my $hdr = $mime->header_obj;
 	headers_to_html_header($hdr, $ctx) .
-		multipart_text_as_html($mime) .
+		multipart_text_as_html($mime, '') .
 		'</pre><hr /><pre>' .
 		html_footer($hdr, 1, $ctx, 'R/') .
 		$footer .
@@ -125,7 +124,7 @@ sub index_entry {
 	my $mhref = "${path}$href/";
 
 	# scan through all parts, looking for displayable text
-	msg_iter($mime, sub { index_walk($fh, $_[0]) });
+	msg_iter($mime, sub { index_walk($fh, $mhref, $_[0]) });
 	$rv = "\n" . html_footer($hdr, 0, $ctx, "$path$href/R/");
 
 	if (defined $irt) {
@@ -211,8 +210,8 @@ sub emit_thread_html {
 }
 
 sub index_walk {
-	my ($fh, $p) = @_;
-	my $s = add_text_body($p);
+	my ($fh, $upfx, $p) = @_;
+	my $s = add_text_body($upfx, $p);
 
 	return if $s eq '';
 
@@ -222,13 +221,13 @@ sub index_walk {
 }
 
 sub multipart_text_as_html {
-	my ($mime) = @_;
+	my ($mime, $upfx) = @_;
 	my $rv = "";
 
 	# scan through all parts, looking for displayable text
 	msg_iter($mime, sub {
 		my ($p) = @_;
-		$p = add_text_body($p);
+		$p = add_text_body($upfx, $p);
 		$rv .= $p;
 		$rv .= "\n" if $p ne '';
 	});
@@ -249,8 +248,8 @@ sub flush_quote {
 	$$s .= qq(<span\nclass="q">) . $rv . '</span>'
 }
 
-sub attach_link ($$$) {
-	my ($ct, $p, $fn) = @_;
+sub attach_link ($$$$) {
+	my ($upfx, $ct, $p, $fn) = @_;
 	my ($part, $depth, @idx) = @$p;
 	my $nl = $idx[-1] > 1 ? "\n" : '';
 	my $idx = join('.', @idx);
@@ -262,29 +261,37 @@ sub attach_link ($$$) {
 	$desc = $fn unless defined $desc;
 	$desc = '' unless defined $desc;
 	$desc = ': '.$desc if $desc;
-	"$nl<b>[-- Attachment #$idx$desc --]\n" .
-	"[-- Type: $ct, Size: $size bytes --]</b>"
+	my $sfn;
+	if (defined $fn && $fn =~ /\A[\w-]+\.[a-z0-9]+\z/) {
+		$sfn = $fn;
+	} elsif ($ct eq 'text/plain') {
+		$sfn = 'a.txt';
+	} else {
+		$sfn = 'a.bin';
+	}
+	qq($nl<a\nhref="$upfx$idx-$sfn">[-- Attachment #$idx$desc --]\n) .
+	"[-- Type: $ct, Size: $size bytes --]</a>"
 }
 
 sub add_text_body {
-	my ($p) = @_; # from msg_iter: [ Email::MIME, depth, @idx ]
+	my ($upfx, $p) = @_; # from msg_iter: [ Email::MIME, depth, @idx ]
 	my ($part, $depth, @idx) = @$p;
 	my $ct = $part->content_type;
 	my $fn = $part->filename;
 
 	if (defined $ct && $ct =~ m!\btext/x?html\b!i) {
-		return attach_link($ct, $p, $fn);
+		return attach_link($upfx, $ct, $p, $fn);
 	}
 
 	my $s = eval { $part->body_str };
 
 	# badly-encoded message? tell the world about it!
-	return attach_link($ct, $p, $fn) if $@;
+	return attach_link($upfx, $ct, $p, $fn) if $@;
 
 	my @lines = split(/^/m, $s);
 	$s = '';
-	if (defined($fn) || $depth > 1 || $idx[0] > 1) {
-		$s .= attach_link($ct, $p, $fn);
+	if (defined($fn) || $depth > 0) {
+		$s .= attach_link($upfx, $ct, $p, $fn);
 		$s .= "\n\n";
 	}
 	my @quot;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 465dcb2..f87f417 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -23,6 +23,7 @@ use PublicInbox::GitHTTPBackend;
 our $INBOX_RE = qr!\A/([\w\.\-]+)!;
 our $MID_RE = qr!([^/]+)!;
 our $END_RE = qr!(T/|t/|R/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
+our $ATTACH_RE = qr!(\d[\.\d]*)-([\w-]+\.[a-z0-9]+)!i;
 
 sub new {
 	my ($class, $pi_config) = @_;
@@ -73,6 +74,10 @@ sub call {
 	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$END_RE\z!o) {
 		msg_page($self, $ctx, $1, $2, $3);
 
+	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$ATTACH_RE\z!o) {
+		my ($idx, $fn) = ($3, $4);
+		invalid_inbox_mid($self, $ctx, $1, $2) ||
+			get_attach($ctx, $idx, $fn);
 	# in case people leave off the trailing slash:
 	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/(T|t|R)\z!o) {
 		my ($inbox, $mid, $suffix) = ($1, $2, $3);
@@ -442,4 +447,10 @@ sub news_www {
 	$self->{news_www} = PublicInbox::NewsWWW->new($self->{pi_config});
 }
 
+sub get_attach {
+	my ($ctx, $idx, $fn) = @_;
+	require PublicInbox::WwwAttach;
+	PublicInbox::WwwAttach::get_attach($ctx, $idx, $fn);
+}
+
 1;
diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm
new file mode 100644
index 0000000..5cf56a8
--- /dev/null
+++ b/lib/PublicInbox/WwwAttach.pm
@@ -0,0 +1,46 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# For retrieving attachments from messages in the WWW interface
+package PublicInbox::WwwAttach; # internal package
+use strict;
+use warnings;
+use Email::MIME;
+use Email::MIME::ContentType qw(parse_content_type);
+$Email::MIME::ContentType::STRICT_PARAMS = 0;
+use PublicInbox::MID qw(mid2path);
+use PublicInbox::MsgIter;
+
+# /$LISTNAME/$MESSAGE_ID/$IDX-$FILENAME
+sub get_attach ($$$) {
+	my ($ctx, $idx, $fn) = @_;
+	my $path = mid2path($ctx->{mid});
+
+	my $res = [ 404, [ 'Content-Type', 'text/plain' ], [ "Not found\n" ] ];
+	my $mime = $ctx->{git}->cat_file("HEAD:$path") or return $res;
+	$mime = Email::MIME->new($mime);
+	msg_iter($mime, sub {
+		my ($part, $depth, @idx) = @{$_[0]};
+		return if join('.', @idx) ne $idx;
+		$res->[0] = 200;
+		my $ct = $part->content_type;
+		$ct = parse_content_type($ct) if $ct;
+
+		# discrete == type, we remain Debian wheezy-compatible
+		if ($ct && (($ct->{discrete} || '') eq 'text')) {
+			# display all text as text/plain:
+			my $cset = $ct->{attributes}->{charset};
+			if ($cset && ($cset =~ /\A[\w-]+\z/)) {
+				$res->[1]->[1] .= qq(; charset=$cset);
+			}
+		} else { # TODO: allow user to configure safe types
+			$res->[1]->[1] = 'application/octet-stream';
+		}
+		$part = $part->body;
+		push @{$res->[1]}, 'Content-Length', bytes::length($part);
+		$res->[2]->[0] = $part;
+	});
+	$res;
+}
+
+1;
-- 
EW


  parent reply	other threads:[~2016-05-19 21:28 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-05-19 21:25 [PATCH 1/7] view: rely on Email::MIME::body_str for decoding Eric Wong
2016-05-19 21:28 ` [PATCH 1/6] msg_iter: new internal API for iterating through MIME Eric Wong
2016-05-19 21:28   ` [PATCH 2/6] switch read-only uses of walk_parts to msg_iter Eric Wong
2016-05-19 21:28   ` Eric Wong [this message]
2016-05-19 21:28   ` [PATCH 4/6] msg_iter: workaround broken Email::MIME versions Eric Wong
2016-05-19 21:28   ` [PATCH 5/6] www: validate and check filenames in URLs Eric Wong
2016-05-19 21:28   ` [PATCH 6/6] view: reduce clutter for attachments w/o description Eric Wong
2016-05-19 22:06     ` [PATCH 8/7] www: tighten up allowable filenames for attachments Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160519212847.4822-3-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).