unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/6] bunch of cleanups and a new feature!
@ 2016-07-09  3:18 Eric Wong
  2016-07-09  3:18 ` [PATCH 1/6] www: drop unused constants Eric Wong
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Eric Wong @ 2016-07-09  3:18 UTC (permalink / raw)
  To: meta

The final patch seems to be what's needed to maintain fairness
for small inboxes while serving large ones.

Eric Wong (6):
      www: drop unused constants
      www: cleanup parameter passing
      feed: remove dead code and unneeded use
      cleanup some unnecessary use/requires
      qspawn: allow configurable limiters
      www: add configurable limiters

 MANIFEST                          |  1 +
 lib/PublicInbox/Config.pm         | 13 +++++++-
 lib/PublicInbox/ExtMsg.pm         |  4 +--
 lib/PublicInbox/Feed.pm           | 23 -------------
 lib/PublicInbox/GitHTTPBackend.pm | 15 +++++++--
 lib/PublicInbox/Inbox.pm          | 25 +++++++++++++-
 lib/PublicInbox/Mbox.pm           |  1 -
 lib/PublicInbox/Qspawn.pm         | 39 ++++++++++++++++------
 lib/PublicInbox/View.pm           |  2 --
 lib/PublicInbox/WWW.pm            | 70 ++++++++++++++++++---------------------
 t/config.t                        |  2 ++
 t/config_limiter.t                | 50 ++++++++++++++++++++++++++++
 t/qspawn.t                        | 10 +++---
 13 files changed, 171 insertions(+), 84 deletions(-)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/6] www: drop unused constants
  2016-07-09  3:18 [PATCH 0/6] bunch of cleanups and a new feature! Eric Wong
@ 2016-07-09  3:18 ` Eric Wong
  2016-07-09  3:18 ` [PATCH 2/6] www: cleanup parameter passing Eric Wong
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2016-07-09  3:18 UTC (permalink / raw)
  To: meta

We no longer generate our footer, here.  We are not currently
advertising ssoma, here.
---
 lib/PublicInbox/WWW.pm | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 2c60d59..b602206 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -16,8 +16,6 @@ use warnings;
 use PublicInbox::Config;
 use PublicInbox::Hval;
 use URI::Escape qw(uri_escape_utf8 uri_unescape);
-use constant SSOMA_URL => '//ssoma.public-inbox.org/';
-use constant PI_URL => '//public-inbox.org/';
 require PublicInbox::Git;
 use PublicInbox::GitHTTPBackend;
 our $INBOX_RE = qr!\A/([\w\.\-]+)!;
-- 
EW


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/6] www: cleanup parameter passing
  2016-07-09  3:18 [PATCH 0/6] bunch of cleanups and a new feature! Eric Wong
  2016-07-09  3:18 ` [PATCH 1/6] www: drop unused constants Eric Wong
@ 2016-07-09  3:18 ` Eric Wong
  2016-07-09  3:18 ` [PATCH 3/6] feed: remove dead code and unneeded use Eric Wong
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2016-07-09  3:18 UTC (permalink / raw)
  To: meta

Reduce the size of hashes a bit and drops some unneeded hash
lookups for uncommon paths.
---
 lib/PublicInbox/ExtMsg.pm |  4 +--
 lib/PublicInbox/Feed.pm   |  1 -
 lib/PublicInbox/WWW.pm    | 68 +++++++++++++++++++++++------------------------
 3 files changed, 34 insertions(+), 39 deletions(-)

diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index 019d50a..2a9316b 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -23,16 +23,14 @@ our @EXT_URL = (
 
 sub ext_msg {
 	my ($ctx) = @_;
-	my $pi_config = $ctx->{pi_config};
 	my $cur = $ctx->{-inbox};
 	my $mid = $ctx->{mid};
-	my $env = $ctx->{env};
 
 	eval { require PublicInbox::Search };
 	my $have_xap = $@ ? 0 : 1;
 	my (@nox, @ibx, @found);
 
-	$pi_config->each_inbox(sub {
+	$ctx->{www}->{pi_config}->each_inbox(sub {
 		my ($other) = @_;
 		return if $other->{name} eq $cur->{name} || !$other->base_url;
 
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 65adf37..3b0ae42 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -253,7 +253,6 @@ sub each_recent_blob {
 # private functions below
 sub get_feedopts {
 	my ($ctx) = @_;
-	my $pi_config = $ctx->{pi_config};
 	my $inbox = $ctx->{inbox};
 	my $obj = $ctx->{-inbox};
 	my %rv = ( description => $obj->description );
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index b602206..26cd571 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -37,7 +37,7 @@ sub run {
 
 sub call {
 	my ($self, $env) = @_;
-	my $ctx = { env => $env, www => $self, pi_config => $self->{pi_config} };
+	my $ctx = { env => $env, www => $self };
 
 	# we don't care about multi-value
 	my %qp = map {
@@ -54,8 +54,7 @@ sub call {
 	if ($method eq 'POST' &&
 		 $path_info =~ m!$INBOX_RE/(git-upload-pack)\z!) {
 		my $path = $2;
-		return (invalid_inbox($self, $ctx, $1) ||
-			serve_git($env, $ctx->{git}, $path));
+		return invalid_inbox($ctx, $1) || serve_git($ctx, $path);
 	}
 	elsif ($method !~ /\AGET|HEAD\z/) {
 		return r(405, 'Method Not Allowed');
@@ -65,27 +64,25 @@ sub call {
 	if ($path_info eq '/') {
 		r404();
 	} elsif ($path_info =~ m!$INBOX_RE\z!o) {
-		invalid_inbox($self, $ctx, $1) || r301($ctx, $1);
+		invalid_inbox($ctx, $1) || r301($ctx, $1);
 	} elsif ($path_info =~ m!$INBOX_RE(?:/|/index\.html)?\z!o) {
-		invalid_inbox($self, $ctx, $1) || get_index($ctx);
+		invalid_inbox($ctx, $1) || get_index($ctx);
 	} elsif ($path_info =~ m!$INBOX_RE/(?:atom\.xml|new\.atom)\z!o) {
-		invalid_inbox($self, $ctx, $1) || get_atom($ctx);
+		invalid_inbox($ctx, $1) || get_atom($ctx);
 	} elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) {
-		invalid_inbox($self, $ctx, $1) || get_new($ctx);
+		invalid_inbox($ctx, $1) || get_new($ctx);
 	} elsif ($path_info =~ m!$INBOX_RE/
 				($PublicInbox::GitHTTPBackend::ANY)\z!ox) {
 		my $path = $2;
-		invalid_inbox($self, $ctx, $1) ||
-			serve_git($env, $ctx->{git}, $path);
+		invalid_inbox($ctx, $1) || serve_git($ctx, $path);
 	} elsif ($path_info =~ m!$INBOX_RE/([\w-]+).mbox\.gz\z!o) {
-		serve_mbox_range($self, $ctx, $1, $2);
+		serve_mbox_range($ctx, $1, $2);
 	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$END_RE\z!o) {
-		msg_page($self, $ctx, $1, $2, $3);
+		msg_page($ctx, $1, $2, $3);
 
 	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$ATTACH_RE\z!o) {
 		my ($idx, $fn) = ($3, $4);
-		invalid_inbox_mid($self, $ctx, $1, $2) ||
-			get_attach($ctx, $idx, $fn);
+		invalid_inbox_mid($ctx, $1, $2) || get_attach($ctx, $idx, $fn);
 	# in case people leave off the trailing slash:
 	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/(T|t)\z!o) {
 		my ($inbox, $mid, $suffix) = ($1, $2, $3);
@@ -104,7 +101,7 @@ sub call {
 		r301($ctx, $1, $2);
 
 	} else {
-		legacy_redirects($self, $ctx, $path_info);
+		legacy_redirects($ctx, $path_info);
 	}
 }
 
@@ -140,9 +137,10 @@ sub r404 {
 sub r { [ $_[0], ['Content-Type' => 'text/plain'], [ join(' ', @_, "\n") ] ] }
 
 # returns undef if valid, array ref response if invalid
-sub invalid_inbox {
-	my ($self, $ctx, $inbox) = @_;
-	my $obj = $ctx->{pi_config}->lookup_name($inbox);
+sub invalid_inbox ($$) {
+	my ($ctx, $inbox) = @_;
+	my $www = $ctx->{www};
+	my $obj = $www->{pi_config}->lookup_name($inbox);
 	if (defined $obj) {
 		$ctx->{git_dir} = $obj->{mainrepo};
 		$ctx->{git} = $obj->git;
@@ -155,13 +153,13 @@ sub invalid_inbox {
 	# generation and link things intended for nntp:// to https?://,
 	# so try to infer links and redirect them to the appropriate
 	# list URL.
-	$self->news_www->call($ctx->{env});
+	$www->news_www->call($ctx->{env});
 }
 
 # returns undef if valid, array ref response if invalid
 sub invalid_inbox_mid {
-	my ($self, $ctx, $inbox, $mid) = @_;
-	my $ret = invalid_inbox($self, $ctx, $inbox);
+	my ($ctx, $inbox, $mid) = @_;
+	my $ret = invalid_inbox($ctx, $inbox);
 	return $ret if $ret;
 
 	$ctx->{mid} = $mid = uri_unescape($mid);
@@ -195,7 +193,7 @@ sub get_new {
 sub get_index {
 	my ($ctx) = @_;
 	require PublicInbox::Feed;
-	my $srch = searcher($ctx);
+	searcher($ctx);
 	if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) {
 		require PublicInbox::SearchView;
 		PublicInbox::SearchView::sres_top_html($ctx);
@@ -288,7 +286,7 @@ sub get_thread_atom {
 }
 
 sub legacy_redirects {
-	my ($self, $ctx, $path_info) = @_;
+	my ($ctx, $path_info) = @_;
 
 	# single-message pages
 	if ($path_info =~ m!$INBOX_RE/m/(\S+)/\z!o) {
@@ -333,7 +331,7 @@ sub legacy_redirects {
 	# some Message-IDs have slashes in them and the HTTP server
 	# may try to be clever and unescape them :<
 	} elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/$END_RE\z!o) {
-		msg_page($self, $ctx, $1, $2, $3);
+		msg_page($ctx, $1, $2, $3);
 
 	# in case people leave off the trailing slash:
 	} elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/(T|t)\z!o) {
@@ -341,7 +339,7 @@ sub legacy_redirects {
 	} elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/f\z!o) {
 		r301($ctx, $1, $2);
 	} else {
-		$self->news_www->call($ctx->{env});
+		$ctx->{www}->news_www->call($ctx->{env});
 	}
 }
 
@@ -349,7 +347,7 @@ sub r301 {
 	my ($ctx, $inbox, $mid, $suffix) = @_;
 	my $obj = $ctx->{-inbox};
 	unless ($obj) {
-		my $r404 = invalid_inbox($ctx->{www}, $ctx, $inbox);
+		my $r404 = invalid_inbox($ctx, $inbox);
 		return $r404 if $r404;
 		$obj = $ctx->{-inbox};
 	}
@@ -365,9 +363,9 @@ sub r301 {
 }
 
 sub msg_page {
-	my ($self, $ctx, $inbox, $mid, $e) = @_;
+	my ($ctx, $inbox, $mid, $e) = @_;
 	my $ret;
-	$ret = invalid_inbox_mid($self, $ctx, $inbox, $mid) and return $ret;
+	$ret = invalid_inbox_mid($ctx, $inbox, $mid) and return $ret;
 	'' eq $e and return get_mid_html($ctx);
 	'T/' eq $e and return get_thread($ctx, 1);
 	't/' eq $e and return get_thread($ctx);
@@ -382,13 +380,13 @@ sub msg_page {
 }
 
 sub serve_git {
-	my ($env, $git, $path) = @_;
-	PublicInbox::GitHTTPBackend::serve($env, $git, $path);
+	my ($ctx, $path) = @_;
+	PublicInbox::GitHTTPBackend::serve($ctx->{env}, $ctx->{git}, $path);
 }
 
 sub serve_mbox_range {
-	my ($self, $ctx, $inbox, $range) = @_;
-	invalid_inbox($self, $ctx, $inbox) || eval {
+	my ($ctx, $inbox, $range) = @_;
+	invalid_inbox($ctx, $inbox) || eval {
 		require PublicInbox::Mbox;
 		searcher($ctx);
 		PublicInbox::Mbox::emit_range($ctx, $range);
@@ -397,10 +395,10 @@ sub serve_mbox_range {
 
 sub news_www {
 	my ($self) = @_;
-	my $nw = $self->{news_www};
-	return $nw if $nw;
-	require PublicInbox::NewsWWW;
-	$self->{news_www} = PublicInbox::NewsWWW->new($self->{pi_config});
+	$self->{news_www} ||= do {
+		require PublicInbox::NewsWWW;
+		PublicInbox::NewsWWW->new($self->{pi_config});
+	}
 }
 
 sub get_attach {
-- 
EW


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/6] feed: remove dead code and unneeded use
  2016-07-09  3:18 [PATCH 0/6] bunch of cleanups and a new feature! Eric Wong
  2016-07-09  3:18 ` [PATCH 1/6] www: drop unused constants Eric Wong
  2016-07-09  3:18 ` [PATCH 2/6] www: cleanup parameter passing Eric Wong
@ 2016-07-09  3:18 ` Eric Wong
  2016-07-09  3:18 ` [PATCH 4/6] cleanup some unnecessary use/requires Eric Wong
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2016-07-09  3:18 UTC (permalink / raw)
  To: meta

We've cleaned up our code in recent days and WwwStream
provides a consistent header for our HTML pages.
---
 lib/PublicInbox/Feed.pm | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 3b0ae42..a697a43 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -8,7 +8,6 @@ use warnings;
 use Email::MIME;
 use Date::Parse qw(strptime);
 use PublicInbox::Hval qw/ascii_html/;
-use PublicInbox::Git;
 use PublicInbox::View;
 use PublicInbox::MID qw/mid_clean mid2path/;
 use PublicInbox::Address;
@@ -150,27 +149,6 @@ sub emit_atom_thread {
 	end_feed($fh);
 }
 
-sub _html_index_top {
-	my ($feed_opts, $srch) = @_;
-
-	my $title = ascii_html($feed_opts->{description} || '');
-	my $top = "<b>$title</b> (<a\nhref=\"new.atom\">Atom feed</a>)";
-	if ($srch) {
-		$top = qq{<form\naction=""><pre>$top} .
-			  qq{ <input\nname=q\ntype=text />} .
-			  qq{<input\ntype=submit\nvalue=search />} .
-			  q{</pre></form><pre>}
-	} else {
-		$top = '<pre>' . $top . "\n";
-	}
-
-	"<html><head><title>$title</title>" .
-		"<link\nrel=alternate\ntitle=\"Atom feed\"\n".
-		"href=\"new.atom\"\ntype=\"application/atom+xml\"/>" .
-		PublicInbox::Hval::STYLE .
-		"</head><body>$top";
-}
-
 sub new_html_footer {
 	my ($ctx, $last) = @_;
 	my $qp = delete $ctx->{qp} or return;
-- 
EW


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/6] cleanup some unnecessary use/requires
  2016-07-09  3:18 [PATCH 0/6] bunch of cleanups and a new feature! Eric Wong
                   ` (2 preceding siblings ...)
  2016-07-09  3:18 ` [PATCH 3/6] feed: remove dead code and unneeded use Eric Wong
@ 2016-07-09  3:18 ` Eric Wong
  2016-07-09  3:18 ` [PATCH 5/6] qspawn: allow configurable limiters Eric Wong
  2016-07-09  3:18 ` [PATCH 6/6] www: add " Eric Wong
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2016-07-09  3:18 UTC (permalink / raw)
  To: meta

Hopefully this can reduce memory overhead for people that
use one-shot CGI.
---
 lib/PublicInbox/GitHTTPBackend.pm | 1 +
 lib/PublicInbox/Mbox.pm           | 1 -
 lib/PublicInbox/View.pm           | 2 --
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index b684988..ebb0850 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -10,6 +10,7 @@ use Fcntl qw(:seek);
 use IO::File;
 use HTTP::Date qw(time2str);
 use HTTP::Status qw(status_message);
+use Plack::Util;
 use PublicInbox::Qspawn;
 
 # n.b. serving "description" and "cloneurl" should be innocuous enough to
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 9dad0f6..d2c0954 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -8,7 +8,6 @@ use strict;
 use warnings;
 use PublicInbox::MID qw/mid_clean/;
 use URI::Escape qw/uri_escape_utf8/;
-use Plack::Util;
 require Email::Simple;
 
 sub emit1 {
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 054b358..275fed4 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -8,8 +8,6 @@ use strict;
 use warnings;
 use URI::Escape qw/uri_escape_utf8/;
 use Date::Parse qw/str2time/;
-use Encode::MIME::Header;
-use Plack::Util;
 use PublicInbox::Hval qw/ascii_html/;
 use PublicInbox::Linkify;
 use PublicInbox::MID qw/mid_clean id_compress mid_mime/;
-- 
EW


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 5/6] qspawn: allow configurable limiters
  2016-07-09  3:18 [PATCH 0/6] bunch of cleanups and a new feature! Eric Wong
                   ` (3 preceding siblings ...)
  2016-07-09  3:18 ` [PATCH 4/6] cleanup some unnecessary use/requires Eric Wong
@ 2016-07-09  3:18 ` Eric Wong
  2016-07-09  3:18 ` [PATCH 6/6] www: add " Eric Wong
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2016-07-09  3:18 UTC (permalink / raw)
  To: meta

And bump the default limit to 32 so we match git-daemon
behavior.  This shall allow us to configure different levels
of concurrency for different repositories and prevent clones
of giant repos from stalling service to small repos.
---
 lib/PublicInbox/GitHTTPBackend.pm |  6 +++++-
 lib/PublicInbox/Qspawn.pm         | 38 ++++++++++++++++++++++++++++----------
 t/qspawn.t                        | 10 ++++++----
 3 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index ebb0850..ed8fdf0 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -13,6 +13,9 @@ use HTTP::Status qw(status_message);
 use Plack::Util;
 use PublicInbox::Qspawn;
 
+# 32 is same as the git-daemon connection limit
+my $default_limiter = PublicInbox::Qspawn::Limiter->new(32);
+
 # n.b. serving "description" and "cloneurl" should be innocuous enough to
 # not cause problems.  serving "config" might...
 my @text = qw[HEAD info/refs
@@ -176,6 +179,7 @@ sub prepare_range {
 # returns undef if 403 so it falls back to dumb HTTP
 sub serve_smart {
 	my ($env, $git, $path) = @_;
+	my $limiter = $default_limiter;
 	my $in = $env->{'psgi.input'};
 	my $fd = eval { fileno($in) };
 	unless (defined $fd && $fd >= 0) {
@@ -248,7 +252,7 @@ sub serve_smart {
 		# holding the input here is a waste of FDs and memory
 		$env->{'psgi.input'} = undef;
 
-		$x->start(sub { # may run later, much later...
+		$x->start($limiter, sub { # may run later, much later...
 			($rpipe) = @_;
 			$in = undef;
 			if ($async) {
diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm
index 9299096..cc9c340 100644
--- a/lib/PublicInbox/Qspawn.pm
+++ b/lib/PublicInbox/Qspawn.pm
@@ -1,12 +1,14 @@
 # Copyright (C) 2016 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Limits the number of processes spawned
+# This does not depend on Danga::Socket or any other external
+# scheduling mechanism, you just need to call start and finish
+# appropriately
 package PublicInbox::Qspawn;
 use strict;
 use warnings;
 use PublicInbox::Spawn qw(popen_rd);
-our $LIMIT = 1;
-my $running = 0;
-my @run_queue;
 
 sub new ($$$;) {
 	my ($class, $cmd, $env, $opt) = @_;
@@ -16,9 +18,10 @@ sub new ($$$;) {
 sub _do_spawn {
 	my ($self, $cb) = @_;
 	my $err;
+
 	($self->{rpipe}, $self->{pid}) = popen_rd(@{$self->{args}});
 	if (defined $self->{pid}) {
-		$running++;
+		$self->{limiter}->{running}++;
 	} else {
 		$self->{err} = $!;
 	}
@@ -27,26 +30,41 @@ sub _do_spawn {
 
 sub finish ($) {
 	my ($self) = @_;
+	my $limiter = $self->{limiter};
 	if (delete $self->{rpipe}) {
 		my $pid = delete $self->{pid};
 		$self->{err} = $pid == waitpid($pid, 0) ? $? :
 				"PID:$pid still running?";
-		$running--;
+		$limiter->{running}--;
 	}
-	if (my $next = shift @run_queue) {
+	if (my $next = shift @{$limiter->{run_queue}}) {
 		_do_spawn(@$next);
 	}
 	$self->{err};
 }
 
-sub start ($$) {
-	my ($self, $cb) = @_;
+sub start {
+	my ($self, $limiter, $cb) = @_;
+	$self->{limiter} = $limiter;
 
-	if ($running < $LIMIT) {
+	if ($limiter->{running} < $limiter->{limit}) {
 		_do_spawn($self, $cb);
 	} else {
-		push @run_queue, [ $self, $cb ];
+		push @{$limiter->{run_queue}}, [ $self, $cb ];
 	}
 }
 
+package PublicInbox::Qspawn::Limiter;
+use strict;
+use warnings;
+
+sub new {
+	my ($class, $limit) = @_;
+	bless {
+		limit => $limit || 1,
+		running => 0,
+		run_queue => [],
+	}, $class;
+}
+
 1;
diff --git a/t/qspawn.t b/t/qspawn.t
index 05072e2..9c42e10 100644
--- a/t/qspawn.t
+++ b/t/qspawn.t
@@ -2,10 +2,12 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use Test::More;
 use_ok 'PublicInbox::Qspawn';
+
+my $limiter = PublicInbox::Qspawn::Limiter->new(1);
 {
 	my $x = PublicInbox::Qspawn->new([qw(true)]);
 	my $run = 0;
-	$x->start(sub {
+	$x->start($limiter, sub {
 		my ($rpipe) = @_;
 		is(0, sysread($rpipe, my $buf, 1), 'read zero bytes');
 		ok(!$x->finish, 'no error on finish');
@@ -17,7 +19,7 @@ use_ok 'PublicInbox::Qspawn';
 {
 	my $x = PublicInbox::Qspawn->new([qw(false)]);
 	my $run = 0;
-	$x->start(sub {
+	$x->start($limiter, sub {
 		my ($rpipe) = @_;
 		is(0, sysread($rpipe, my $buf, 1), 'read zero bytes from false');
 		my $err = $x->finish;
@@ -30,7 +32,7 @@ use_ok 'PublicInbox::Qspawn';
 foreach my $cmd ([qw(sleep 1)], [qw(sh -c), 'sleep 1; false']) {
 	my $s = PublicInbox::Qspawn->new($cmd);
 	my @run;
-	$s->start(sub {
+	$s->start($limiter, sub {
 		my ($rpipe) = @_;
 		push @run, 'sleep';
 		is(0, sysread($rpipe, my $buf, 1), 'read zero bytes');
@@ -39,7 +41,7 @@ foreach my $cmd ([qw(sleep 1)], [qw(sh -c), 'sleep 1; false']) {
 	my @t = map {
 		my $i = $n++;
 		my $x = PublicInbox::Qspawn->new([qw(true)]);
-		$x->start(sub {
+		$x->start($limiter, sub {
 			my ($rpipe) = @_;
 			push @run, $i;
 		});
-- 
EW


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 6/6] www: add configurable limiters
  2016-07-09  3:18 [PATCH 0/6] bunch of cleanups and a new feature! Eric Wong
                   ` (4 preceding siblings ...)
  2016-07-09  3:18 ` [PATCH 5/6] qspawn: allow configurable limiters Eric Wong
@ 2016-07-09  3:18 ` Eric Wong
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2016-07-09  3:18 UTC (permalink / raw)
  To: meta

Currently only for git-http-backend use, this allows limiting
the number of spawned processes per-inbox or by group, if there
are multiple large inboxes amidst a sea of small ones.

For example, a "big" repo limiter could be used for big inboxes:
which would be shared between multiple repos:

[limiter "big"]
	max = 4
[publicinbox "git"]
	address = git@vger.kernel.org
	mainrepo = /path/to/git.git
	; shared limiter with giant:
	httpbackendmax = big
[publicinbox "giant"]
	address = giant@project.org
	mainrepo = /path/to/giant.git
	; shared limiter with git:
	httpbackendmax = big

; This is a tiny inbox, use the default limiter with 32 slots:
[publicinbox "meta"]
	address = meta@public-inbox.org
	mainrepo = /path/to/meta.git
---
 MANIFEST                          |  1 +
 lib/PublicInbox/Config.pm         | 13 +++++++++-
 lib/PublicInbox/GitHTTPBackend.pm | 10 ++++++--
 lib/PublicInbox/Inbox.pm          | 25 +++++++++++++++++++-
 lib/PublicInbox/Qspawn.pm         |  7 +++---
 t/config.t                        |  2 ++
 t/config_limiter.t                | 50 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 101 insertions(+), 7 deletions(-)
 create mode 100644 t/config_limiter.t

diff --git a/MANIFEST b/MANIFEST
index ceb1a9d..75bb43e 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -106,6 +106,7 @@ t/cgi.t
 t/check-www-inbox.perl
 t/common.perl
 t/config.t
+t/config_limiter.t
 t/emergency.t
 t/fail-bin/spamc
 t/feed.t
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index d34d11a..d7eaa3e 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -20,6 +20,7 @@ sub new {
 	$self->{-by_addr} ||= {};
 	$self->{-by_name} ||= {};
 	$self->{-by_newsgroup} ||= {};
+	$self->{-limiters} ||= {};
 	$self;
 }
 
@@ -85,6 +86,15 @@ sub lookup_newsgroup {
 	undef;
 }
 
+sub limiter {
+	my ($self, $name) = @_;
+	$self->{-limiters}->{$name} ||= do {
+		require PublicInbox::Qspawn;
+		my $key = "limiter.$name.max";
+		PublicInbox::Qspawn::Limiter->new($self->{$key});
+	};
+}
+
 sub get {
 	my ($self, $inbox, $key) = @_;
 
@@ -131,7 +141,7 @@ sub _fill {
 	my $rv = {};
 
 	foreach my $k (qw(mainrepo address filter url newsgroup
-			watch watchheader)) {
+			watch watchheader httpbackendmax)) {
 		my $v = $self->{"$pfx.$k"};
 		$rv->{$k} = $v if defined $v;
 	}
@@ -139,6 +149,7 @@ sub _fill {
 	my $name = $pfx;
 	$name =~ s/\Apublicinbox\.//;
 	$rv->{name} = $name;
+	$rv->{-pi_config} = $self;
 	$rv = PublicInbox::Inbox->new($rv);
 	my $v = $rv->{address};
 	if (ref($v) eq 'ARRAY') {
diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index ed8fdf0..d491479 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -179,7 +179,6 @@ sub prepare_range {
 # returns undef if 403 so it falls back to dumb HTTP
 sub serve_smart {
 	my ($env, $git, $path) = @_;
-	my $limiter = $default_limiter;
 	my $in = $env->{'psgi.input'};
 	my $fd = eval { fileno($in) };
 	unless (defined $fd && $fd >= 0) {
@@ -197,7 +196,14 @@ sub serve_smart {
 		my $val = $env->{$name};
 		$env{$name} = $val if defined $val;
 	}
-	my $git_dir = ref $git ? $git->{git_dir} : $git;
+	my ($git_dir, $limiter);
+	if (ref $git) {
+		$limiter = $git->{-httpbackend_limiter} || $default_limiter;
+		$git_dir = $git->{git_dir};
+	} else {
+		$limiter = $default_limiter;
+		$git_dir = $git;
+	}
 	$env{GIT_HTTP_EXPORT_ALL} = '1';
 	$env{PATH_TRANSLATED} = "$git_dir/$path";
 	my %rdr = ( 0 => fileno($in) );
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index dc9980b..23b7721 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -34,6 +34,7 @@ sub new {
 	my $v = $opts->{address} ||= 'public-inbox@example.com';
 	my $p = $opts->{-primary_address} = ref($v) eq 'ARRAY' ? $v->[0] : $v;
 	$opts->{domain} = ($p =~ /\@(\S+)\z/) ? $1 : 'localhost';
+	weaken($opts->{-pi_config});
 	bless $opts, $class;
 }
 
@@ -44,11 +45,33 @@ sub _weaken_fields {
 	}
 }
 
+sub _set_limiter ($$$) {
+	my ($self, $git, $pfx) = @_;
+	my $lkey = "-${pfx}_limiter";
+	$git->{$lkey} = $self->{$lkey} ||= eval {
+		my $mkey = $pfx.'max';
+		my $val = $self->{$mkey} or return;
+		my $lim;
+		if ($val =~ /\A\d+\z/) {
+			require PublicInbox::Qspawn;
+			$lim = PublicInbox::Qspawn::Limiter->new($val);
+		} elsif ($val =~ /\A[a-z][a-z0-9]*\z/) {
+			$lim = $self->{-pi_config}->limiter($val);
+			warn "$mkey limiter=$val not found\n" if !$lim;
+		} else {
+			warn "$mkey limiter=$val not understood\n";
+		}
+		$lim;
+	}
+}
+
 sub git {
 	my ($self) = @_;
 	$self->{git} ||= eval {
 		_weaken_later($self);
-		PublicInbox::Git->new($self->{mainrepo});
+		my $g = PublicInbox::Git->new($self->{mainrepo});
+		_set_limiter($self, $g, 'httpbackend');
+		$g;
 	};
 }
 
diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm
index cc9c340..697c55a 100644
--- a/lib/PublicInbox/Qspawn.pm
+++ b/lib/PublicInbox/Qspawn.pm
@@ -47,7 +47,7 @@ sub start {
 	my ($self, $limiter, $cb) = @_;
 	$self->{limiter} = $limiter;
 
-	if ($limiter->{running} < $limiter->{limit}) {
+	if ($limiter->{running} < $limiter->{max}) {
 		_do_spawn($self, $cb);
 	} else {
 		push @{$limiter->{run_queue}}, [ $self, $cb ];
@@ -59,9 +59,10 @@ use strict;
 use warnings;
 
 sub new {
-	my ($class, $limit) = @_;
+	my ($class, $max) = @_;
 	bless {
-		limit => $limit || 1,
+		# 32 is same as the git-daemon connection limit
+		max => $max || 32,
 		running => 0,
 		run_queue => [],
 	}, $class;
diff --git a/t/config.t b/t/config.t
index dc448cd..77e8f4a 100644
--- a/t/config.t
+++ b/t/config.t
@@ -30,6 +30,7 @@ my $tmpdir = tempdir('pi-config-XXXXXX', TMPDIR => 1, CLEANUP => 1);
 		'url' => 'http://example.com/meta',
 		-primary_address => 'meta@public-inbox.org',
 		'name' => 'meta',
+		-pi_config => $cfg,
 	}, "lookup matches expected output");
 
 	is($cfg->lookup('blah@example.com'), undef,
@@ -45,6 +46,7 @@ my $tmpdir = tempdir('pi-config-XXXXXX', TMPDIR => 1, CLEANUP => 1);
 		'domain' => 'public-inbox.org',
 		'name' => 'test',
 		'url' => 'http://example.com/test',
+		-pi_config => $cfg,
 	}, "lookup matches expected output for test");
 }
 
diff --git a/t/config_limiter.t b/t/config_limiter.t
new file mode 100644
index 0000000..bfea151
--- /dev/null
+++ b/t/config_limiter.t
@@ -0,0 +1,50 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use PublicInbox::Config;
+my $cfgpfx = "publicinbox.test";
+{
+	my $config = PublicInbox::Config->new({
+		"$cfgpfx.address" => 'test@example.com',
+		"$cfgpfx.mainrepo" => '/path/to/non/existent',
+		"$cfgpfx.httpbackendmax" => 12,
+	});
+	my $ibx = $config->lookup_name('test');
+	my $git = $ibx->git;
+	my $old = "$git";
+	my $lim = $git->{-httpbackend_limiter};
+	ok($lim, 'Limiter exists');
+	is($lim->{max}, 12, 'limiter has expected slots');
+	$git = undef;
+	$ibx->{git} = undef;
+	$git = $ibx->git;
+	isnt($old, "$git", 'got new Git object');
+	is("$git->{-httpbackend_limiter}", "$lim", 'same limiter');
+}
+
+{
+	my $config = PublicInbox::Config->new({
+		'limiter.named.max' => 3,
+		"$cfgpfx.address" => 'test@example.com',
+		"$cfgpfx.mainrepo" => '/path/to/non/existent',
+		"$cfgpfx.httpbackendmax" => 'named',
+	});
+	my $ibx = $config->lookup_name('test');
+	my $git = $ibx->git;
+	ok($git, 'got git object');
+	my $old = "$git";
+	my $lim = $git->{-httpbackend_limiter};
+	ok($lim, 'Limiter exists');
+	is($lim->{max}, 3, 'limiter has expected slots');
+	$git = undef;
+	$ibx->{git} = undef;
+	PublicInbox::Inbox::weaken_task;
+	$git = $ibx->git;
+	isnt($old, "$git", 'got new Git object');
+	is("$git->{-httpbackend_limiter}", "$lim", 'same limiter');
+	is($lim->{max}, 3, 'limiter has expected slots');
+}
+
+done_testing;
-- 
EW


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-07-09  3:18 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-07-09  3:18 [PATCH 0/6] bunch of cleanups and a new feature! Eric Wong
2016-07-09  3:18 ` [PATCH 1/6] www: drop unused constants Eric Wong
2016-07-09  3:18 ` [PATCH 2/6] www: cleanup parameter passing Eric Wong
2016-07-09  3:18 ` [PATCH 3/6] feed: remove dead code and unneeded use Eric Wong
2016-07-09  3:18 ` [PATCH 4/6] cleanup some unnecessary use/requires Eric Wong
2016-07-09  3:18 ` [PATCH 5/6] qspawn: allow configurable limiters Eric Wong
2016-07-09  3:18 ` [PATCH 6/6] www: add " Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).