* [PATCH] atom: switch to getline/close for response bodies
@ 2016-12-03 1:50 Eric Wong
0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2016-12-03 1:50 UTC (permalink / raw)
To: meta
This will let us stream larger Atom documents bodies without
wasting too much memory and reduce the amount of round-trip
requests needed to get necessary information.
Hopefully clients are using streaming (SAX) parsers, too.
This is the final transition in the core public-inbox
code to allow migrating to a "pull"-based body streaming
scheme which allows a HTTP server to respond appropriately
to backpressure from slow clients.
---
MANIFEST | 1 +
lib/PublicInbox/Feed.pm | 188 +++++++--------------------------------
lib/PublicInbox/SearchView.pm | 29 +++---
lib/PublicInbox/WwwAtomStream.pm | 134 ++++++++++++++++++++++++++++
t/common.perl | 21 ++---
5 files changed, 186 insertions(+), 187 deletions(-)
create mode 100644 lib/PublicInbox/WwwAtomStream.pm
diff --git a/MANIFEST b/MANIFEST
index 3a4d7c4..3388b1a 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -88,6 +88,7 @@ lib/PublicInbox/View.pm
lib/PublicInbox/WWW.pm
lib/PublicInbox/WWW.pod
lib/PublicInbox/WatchMaildir.pm
+lib/PublicInbox/WwwAtomStream.pm
lib/PublicInbox/WwwAttach.pm
lib/PublicInbox/WwwStream.pm
lib/PublicInbox/WwwText.pm
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 25fec10..31d82ad 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -6,26 +6,45 @@ package PublicInbox::Feed;
use strict;
use warnings;
use Email::MIME;
-use Date::Parse qw(strptime);
-use PublicInbox::Hval qw/ascii_html/;
use PublicInbox::View;
-use PublicInbox::MID qw/mid_clean mid2path/;
-use PublicInbox::Address;
-use POSIX qw/strftime/;
+use PublicInbox::WwwAtomStream;
use constant {
- DATEFMT => '%Y-%m-%dT%H:%M:%SZ', # Atom standard
MAX_PER_PAGE => 25, # this needs to be tunable
};
# main function
sub generate {
my ($ctx) = @_;
- sub { emit_atom($_[0], $ctx) };
+ my @paths;
+ each_recent_blob($ctx, sub { push @paths, $_[0] });
+ return _no_thread() unless @paths;
+
+ my $ibx = $ctx->{-inbox};
+ PublicInbox::WwwAtomStream->response($ctx, 200, sub {
+ while (my $path = shift @paths) {
+ my $mime = do_cat_mail($ibx, $path) or next;
+ return $mime;
+ }
+ });
}
sub generate_thread_atom {
my ($ctx) = @_;
- sub { emit_atom_thread($_[0], $ctx) };
+ my $mid = $ctx->{mid};
+ my $res = $ctx->{srch}->get_thread($mid);
+ return _no_thread() unless $res->{total};
+
+ my $ibx = $ctx->{-inbox};
+ my $html_url = $ibx->base_url($ctx->{env});
+ $html_url .= PublicInbox::Hval->new_msgid($mid)->{href};
+ $ctx->{-html_url} = $html_url;
+ my $msgs = $res->{msgs};
+ PublicInbox::WwwAtomStream->response($ctx, 200, sub {
+ while (my $msg = shift @$msgs) {
+ $msg = $ibx->msg_by_smsg($msg) and
+ return Email::MIME->new($msg);
+ }
+ });
}
sub generate_html_index {
@@ -73,80 +92,8 @@ sub new_html {
# private subs
-sub title_tag {
- my ($title) = @_;
- $title =~ tr/\t\n / /s; # squeeze spaces
- # try to avoid the type attribute in title:
- $title = ascii_html($title);
- my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
- "<title$type>$title</title>";
-}
-
-sub atom_header {
- my ($feed_opts, $title) = @_;
-
- $title = title_tag($feed_opts->{description}) unless (defined $title);
-
- qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
- qq{<feed\nxmlns="http://www.w3.org/2005/Atom">} .
- qq{$title} .
- qq(<link\nrel="alternate"\ntype="text/html") .
- qq(\nhref="$feed_opts->{url}"/>) .
- qq(<link\nrel="self"\nhref="$feed_opts->{atomurl}"/>) .
- qq(<id>mailto:$feed_opts->{id_addr}</id>);
-}
-
-sub emit_atom {
- my ($cb, $ctx) = @_;
- my $feed_opts = get_feedopts($ctx);
- my $fh = $cb->([ 200, ['Content-Type' => 'application/atom+xml']]);
- my $max = $ctx->{max} || MAX_PER_PAGE;
- my $x = atom_header($feed_opts);
- my $ibx = $ctx->{-inbox};
- each_recent_blob($ctx, sub {
- my ($path, undef, $ts) = @_;
- if (defined $x) {
- $fh->write($x . feed_updated(undef, $ts));
- $x = undef;
- }
- my $s = feed_entry($feed_opts, $path, $ibx) or return 0;
- $fh->write($s);
- 1;
- });
- end_feed($fh);
-}
-
-sub _no_thread {
- my ($cb) = @_;
- $cb->([404, ['Content-Type', 'text/plain'],
- ["No feed found for thread\n"]]);
-}
-
-sub end_feed {
- my ($fh) = @_;
- $fh->write('</feed>');
- $fh->close;
-}
-
-sub emit_atom_thread {
- my ($cb, $ctx) = @_;
- my $mid = $ctx->{mid};
- my $res = $ctx->{srch}->get_thread($mid);
- return _no_thread($cb) unless $res->{total};
- my $feed_opts = get_feedopts($ctx);
- my $fh = $cb->([200, ['Content-Type' => 'application/atom+xml']]);
- my $ibx = $ctx->{-inbox};
- my $html_url = $ibx->base_url($ctx->{env});
- $html_url .= PublicInbox::Hval->new_msgid($mid)->{href};
-
- $feed_opts->{url} = $html_url;
- $feed_opts->{emit_header} = 1;
-
- foreach my $msg (@{$res->{msgs}}) {
- my $s = feed_entry($feed_opts, mid2path($msg->mid), $ibx);
- $fh->write($s) if defined $s;
- }
- end_feed($fh);
+sub _no_thread () {
+ [404, ['Content-Type', 'text/plain'], ["No feed found for thread\n"]];
}
sub new_html_footer {
@@ -199,7 +146,7 @@ sub each_recent_blob {
if ($line =~ /$addmsg/o) {
my $add = $1;
next if $deleted{$add}; # optimization-only
- $nr += $cb->($add, $cur_commit, $ts, $u, $subj);
+ $cb->($add, $cur_commit, $ts, $u, $subj) and $nr++;
if ($nr >= $max) {
$last = 1;
last;
@@ -228,81 +175,6 @@ sub each_recent_blob {
($first_commit, $last_commit);
}
-# private functions below
-sub get_feedopts {
- my ($ctx) = @_;
- my $inbox = $ctx->{inbox};
- my $obj = $ctx->{-inbox};
- my %rv = ( description => $obj->description );
-
- $rv{address} = $obj->{address};
- $rv{id_addr} = $obj->{-primary_address};
- my $url_base = $obj->base_url($ctx->{env});
- if (my $mid = $ctx->{mid}) { # per-thread feed:
- $rv{atomurl} = "$url_base$mid/t.atom";
- } else {
- $rv{atomurl} = $url_base."new.atom";
- }
- $rv{url} ||= $url_base;
- $rv{midurl} = $url_base;
-
- \%rv;
-}
-
-sub feed_updated {
- my ($date, $ts) = @_;
- my @t = eval { strptime($date) } if defined $date;
- @t = gmtime($ts || time) unless scalar @t;
-
- '<updated>' . strftime(DATEFMT, @t) . '</updated>';
-}
-
-# returns undef or string
-sub feed_entry {
- my ($feed_opts, $add, $ibx) = @_;
-
- my $mime = do_cat_mail($ibx, $add) or return;
- my $url = $feed_opts->{url};
- my $midurl = $feed_opts->{midurl};
-
- my $header_obj = $mime->header_obj;
- my $mid = mid_clean($header_obj->header_raw('Message-ID'));
- $mid = PublicInbox::Hval->new_msgid($mid);
- my $href = $midurl . $mid->{href}. '/';
-
- my $date = $header_obj->header('Date');
- my $updated = feed_updated($date);
-
- my $title = $header_obj->header('Subject');
- defined $title or return;
- $title = title_tag($title);
-
- my $from = $header_obj->header('From') or return;
- my ($email) = PublicInbox::Address::emails($from);
- my $name = join(', ',PublicInbox::Address::names($from));
- $name = ascii_html($name);
- $email = ascii_html($email);
-
- my $s = '';
- if (delete $feed_opts->{emit_header}) {
- $s .= atom_header($feed_opts, $title) . $updated;
- }
- $s .= "<entry><author><name>$name</name><email>$email</email>" .
- "</author>$title$updated" .
- qq{<content\ntype="xhtml">} .
- qq{<div\nxmlns="http://www.w3.org/1999/xhtml">} .
- qq(<pre\nstyle="white-space:pre-wrap">) .
- PublicInbox::View::multipart_text_as_html($mime, $href) .
- '</pre>';
-
- $add =~ tr!/!!d;
- my $h = '[a-f0-9]';
- my (@uuid5) = ($add =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o);
- my $id = 'urn:uuid:' . join('-', @uuid5);
- $s .= qq!</div></content><link\nhref="$href"/>!.
- "<id>$id</id></entry>";
-}
-
sub do_cat_mail {
my ($ibx, $path) = @_;
my $mime = eval { $ibx->msg_by_path($path) } or return;
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index fbba9c4..6af151a 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -8,6 +8,7 @@ use warnings;
use PublicInbox::SearchMsg;
use PublicInbox::Hval qw/ascii_html/;
use PublicInbox::View;
+use PublicInbox::WwwAtomStream;
use PublicInbox::MID qw(mid2path mid_mime mid_clean mid_escape);
use Email::MIME;
require PublicInbox::Git;
@@ -46,7 +47,7 @@ sub sres_top_html {
$cb = *noop;
} else {
my $x = $q->{x};
- return sub { adump($_[0], $mset, $q, $ctx) } if ($x eq 'A');
+ return adump($_[0], $mset, $q, $ctx) if $x eq 'A';
$ctx->{-html_tip} = search_nav_top($mset, $q) . "\n\n";
if ($x eq 't') {
@@ -213,23 +214,17 @@ sub ctx_prepare {
sub adump {
my ($cb, $mset, $q, $ctx) = @_;
- my $fh = $cb->([ 200, ['Content-Type' => 'application/atom+xml']]);
my $ibx = $ctx->{-inbox};
- my $feed_opts = PublicInbox::Feed::get_feedopts($ctx);
- my $x = ascii_html($q->{'q'});
- $x = qq{$x - search results};
- $feed_opts->{atomurl} = $feed_opts->{url} . '?'. $q->qs_html;
- $feed_opts->{url} .= '?'. $q->qs_html(x => undef);
- $x = PublicInbox::Feed::atom_header($feed_opts, "<title>$x</title>");
- $fh->write($x. PublicInbox::Feed::feed_updated());
-
- for ($mset->items) {
- $x = PublicInbox::SearchMsg->load_doc($_->get_document)->mid;
- $x = mid2path($x);
- my $s = PublicInbox::Feed::feed_entry($feed_opts, $x, $ibx);
- $fh->write($s) if defined $s;
- }
- PublicInbox::Feed::end_feed($fh);
+ my @items = $mset->items;
+ $ctx->{search_query} = $q;
+ PublicInbox::WwwAtomStream->response($ctx, 200, sub {
+ while (my $x = shift @items) {
+ $x = PublicInbox::SearchMsg->load_doc($x->get_document);
+ $x = $ibx->msg_by_smsg($x) and
+ return Email::MIME->new($x);
+ }
+ return undef;
+ });
}
package PublicInbox::SearchQuery;
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
new file mode 100644
index 0000000..5720384
--- /dev/null
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -0,0 +1,134 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Atom body stream for which yields getline+close methods
+package PublicInbox::WwwAtomStream;
+use strict;
+use warnings;
+
+# FIXME: locale-independence:
+use POSIX qw(strftime);
+use Date::Parse qw(strptime);
+
+use PublicInbox::Address;
+use PublicInbox::Hval qw(ascii_html);
+use PublicInbox::MID qw/mid_clean mid2path mid_escape/;
+
+# called by PSGI server after getline:
+sub close {}
+
+sub new {
+ my ($class, $ctx, $cb) = @_;
+ $ctx->{emit_header} = 1;
+ $ctx->{feed_base_url} = $ctx->{-inbox}->base_url($ctx->{env});
+ bless { cb => $cb || *close, ctx => $ctx }, $class;
+}
+
+sub response {
+ my ($class, $ctx, $code, $cb) = @_;
+ [ $code, [ 'Content-Type', 'application/atom+xml' ],
+ $class->new($ctx, $cb) ]
+}
+
+# called once for each message by PSGI server
+sub getline {
+ my ($self) = @_;
+ if (my $middle = $self->{cb}) {
+ my $mime = $middle->();
+ return feed_entry($self, $mime) if $mime;
+ }
+ delete $self->{cb} ? '</feed>' : undef;
+}
+
+# private
+
+sub title_tag {
+ my ($title) = @_;
+ $title =~ tr/\t\n / /s; # squeeze spaces
+ # try to avoid the type attribute in title:
+ $title = ascii_html($title);
+ my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
+ "<title$type>$title</title>";
+}
+
+sub atom_header {
+ my ($ctx, $title) = @_;
+ my $ibx = $ctx->{-inbox};
+ my $base_url = $ctx->{feed_base_url};
+ my $search_q = $ctx->{search_query};
+ my $self_url = $base_url;
+ my $mid = $ctx->{mid};
+ if (defined $mid) { # per-thread
+ $self_url .= mid_escape($mid).'/t.atom';
+ } elsif (defined $search_q) {
+ my $query = $search_q->{'q'};
+ $title = title_tag("$query - search results");
+ $base_url .= '?' . $search_q->qs_html(x => undef);
+ $self_url .= '?' . $search_q->qs_html;
+ } else {
+ $title = title_tag($ibx->description);
+ $self_url .= 'new.atom';
+ }
+ my $mtime = (stat($ibx->{mainrepo}))[9] || time;
+
+ qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
+ qq{<feed\nxmlns="http://www.w3.org/2005/Atom">} .
+ qq{$title} .
+ qq(<link\nrel="alternate"\ntype="text/html") .
+ qq(\nhref="$base_url"/>) .
+ qq(<link\nrel="self"\nhref="$self_url"/>) .
+ qq(<id>mailto:$ibx->{-primary_address}</id>) .
+ feed_updated(gmtime($mtime));
+}
+
+# returns undef or string
+sub feed_entry {
+ my ($self, $mime) = @_;
+ my $ctx = $self->{ctx};
+ my $hdr = $mime->header_obj;
+ my $mid = mid_clean($hdr->header_raw('Message-ID'));
+
+ my $uuid = mid2path($mid);
+ $uuid =~ tr!/!!d;
+ my $h = '[a-f0-9]';
+ my (@uuid5) = ($uuid =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o);
+ $uuid = 'urn:uuid:' . join('-', @uuid5);
+
+ $mid = PublicInbox::Hval->new_msgid($mid);
+ my $href = $ctx->{feed_base_url} . $mid->{href}. '/';
+
+ my $date = $hdr->header('Date');
+ my @t = eval { strptime($date) } if defined $date;
+ @t = gmtime(time) unless scalar @t;
+ my $updated = feed_updated(@t);
+
+ my $title = $hdr->header('Subject');
+ $title = '(no subject)' unless defined $title && $title ne '';
+ $title = title_tag($title);
+
+ my $from = $hdr->header('From') or return;
+ my ($email) = PublicInbox::Address::emails($from);
+ my $name = join(', ',PublicInbox::Address::names($from));
+ $name = ascii_html($name);
+ $email = ascii_html($email);
+
+ my $s = '';
+ if (delete $ctx->{emit_header}) {
+ $s .= atom_header($ctx, $title);
+ }
+ $s .= "<entry><author><name>$name</name><email>$email</email>" .
+ "</author>$title$updated" .
+ qq{<content\ntype="xhtml">} .
+ qq{<div\nxmlns="http://www.w3.org/1999/xhtml">} .
+ qq(<pre\nstyle="white-space:pre-wrap">) .
+ PublicInbox::View::multipart_text_as_html($mime, $href) .
+ '</pre>' .
+ qq!</div></content><link\nhref="$href"/>!.
+ "<id>$uuid</id></entry>";
+}
+
+sub feed_updated {
+ '<updated>' . strftime('%Y-%m-%dT%H:%M:%SZ', @_) . '</updated>';
+}
+
+1;
diff --git a/t/common.perl b/t/common.perl
index bec5769..1251333 100644
--- a/t/common.perl
+++ b/t/common.perl
@@ -1,18 +1,15 @@
# Copyright (C) 2015 all contributors <meta@public-inbox.org>
# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
-require IO::File;
-use POSIX qw/dup/;
sub stream_to_string {
- my ($cb) = @_;
- my $headers;
- my $io = IO::File->new_tmpfile;
- my $dup = dup($io->fileno);
- my $response = sub { $headers = \@_, $io };
- $cb->($response);
- $io = IO::File->new;
- $io->fdopen($dup, 'r+');
- $io->seek(0, 0);
- $io->read(my $str, ($io->stat)[7]);
+ my ($res) = @_;
+ my $body = $res->[2];
+ my $str = '';
+ while (defined(my $chunk = $body->getline)) {
+ $str .= $chunk;
+ }
+ $body->close;
$str;
}
+
+1;
--
EW
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2016-12-03 1:50 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-12-03 1:50 [PATCH] atom: switch to getline/close for response bodies Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).