* [PATCH 1/6] inbox: drop psgi.url_scheme requirement from base_url
2019-01-31 10:51 [PATCH 0/6] more minor cleanups and doc updates Eric Wong
@ 2019-01-31 10:51 ` Eric Wong
2019-01-31 10:51 ` [PATCH 2/6] qspawn: documentation updates Eric Wong
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2019-01-31 10:51 UTC (permalink / raw)
To: meta
This will make it easier to make command-line tools
from SolverGit.
---
lib/PublicInbox/Inbox.pm | 4 ++--
t/solver_git.t | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 6fe896f..cde4625 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -202,8 +202,8 @@ sub cloneurl {
sub base_url {
my ($self, $env) = @_;
- if ($env) { # PSGI env
- my $scheme = $env->{'psgi.url_scheme'};
+ my $scheme;
+ if ($env && ($scheme = $env->{'psgi.url_scheme'})) { # PSGI env
my $host_port = $env->{HTTP_HOST} ||
"$env->{SERVER_NAME}:$env->{SERVER_PORT}";
my $url = "$scheme://$host_port". ($env->{SCRIPT_NAME} || '/');
diff --git a/t/solver_git.t b/t/solver_git.t
index 66e6317..8de6398 100644
--- a/t/solver_git.t
+++ b/t/solver_git.t
@@ -44,7 +44,7 @@ $ibx->{-repo_objs} = [ PublicInbox::Git->new($git_dir) ];
my $res;
my $solver = PublicInbox::SolverGit->new($ibx, sub { $res = $_[0] });
open my $log, '+>>', "$mainrepo/solve.log" or die "open: $!";
-my $psgi_env = { 'psgi.url_scheme' => 'http', HTTP_HOST => 'example.com' };
+my $psgi_env = { 'psgi.errors' => *STDERR };
$solver->solve($psgi_env, $log, '69df7d5', {});
ok($res, 'solved a blob!');
my $wt_git = $res->[0];
--
EW
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/6] qspawn: documentation updates
2019-01-31 10:51 [PATCH 0/6] more minor cleanups and doc updates Eric Wong
2019-01-31 10:51 ` [PATCH 1/6] inbox: drop psgi.url_scheme requirement from base_url Eric Wong
@ 2019-01-31 10:51 ` Eric Wong
2019-01-31 10:51 ` [PATCH 3/6] config: tiny cleanup to use _array() sub Eric Wong
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2019-01-31 10:51 UTC (permalink / raw)
To: meta
This will become critical for future changes to display
git commits, diffs, and trees.
Use "qspawn.wcb" instead of "qspawn.response" to enhance
readability.
---
lib/PublicInbox/Qspawn.pm | 77 +++++++++++++++++++++++++++++++++-----
lib/PublicInbox/ViewVCS.pm | 2 +-
2 files changed, 68 insertions(+), 11 deletions(-)
diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm
index 913fac8..509a441 100644
--- a/lib/PublicInbox/Qspawn.pm
+++ b/lib/PublicInbox/Qspawn.pm
@@ -1,17 +1,40 @@
-# Copyright (C) 2016-2018 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2019 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-# Limits the number of processes spawned
+# Like most Perl modules in public-inbox, this is internal and
+# NOT subject to any stability guarantees! It is only documented
+# for other hackers.
+#
+# This is used to limit the number of processes spawned by the
+# PSGI server, so it acts like a semaphore and queues up extra
+# commands to be run if currently at the limit. Multiple "limiters"
+# may be configured which give inboxes different channels to
+# operate in. This can be useful to ensure smaller inboxes can
+# be cloned while cloning of large inboxes is maxed out.
+#
# This does not depend on Danga::Socket or any other external
-# scheduling mechanism, you just need to call start and finish
-# appropriately
+# scheduling mechanism, you just need to call start() and finish()
+# appropriately. However, public-inbox-httpd (which uses Danga::Socket)
+# will be able to schedule this based on readability of stdout from
+# the spawned process. See GitHTTPBackend.pm and SolverGit.pm for
+# usage examples. It does not depend on any form of threading.
+#
+# This is useful for scheduling CGI execution of both long-lived
+# git-http-backend(1) process (for "git clone") as well as short-lived
+# processes such as git-apply(1).
+
package PublicInbox::Qspawn;
use strict;
use warnings;
use PublicInbox::Spawn qw(popen_rd);
require Plack::Util;
+
my $def_limiter;
+# declares a command to spawn (but does not spawn it).
+# $cmd is the command to spawn
+# $env is the environ for the child process
+# $opt can include redirects and perhaps other process spawning options
sub new ($$$;) {
my ($class, $cmd, $env, $opt) = @_;
bless { args => [ $cmd, $env, $opt ] }, $class;
@@ -79,6 +102,10 @@ sub _psgi_finish ($$) {
}
}
+# Similar to `backtick` or "qx" ("perldoc -f qx"), it calls $qx_cb with
+# the stdout of the given command when done; but respects the given limiter
+# $env is the PSGI env. As with ``/qx; only use this when output is small
+# and safe to slurp.
sub psgi_qx {
my ($self, $env, $limiter, $qx_cb) = @_;
my $qx = PublicInbox::Qspawn::Qx->new;
@@ -125,6 +152,28 @@ sub filter_fh ($$) {
});
}
+# Used for streaming the stdout of one process as a PSGI response.
+#
+# $env is the PSGI env.
+# optional keys in $env:
+# $env->{'qspawn.wcb'} - the write callback from the PSGI server
+# optional, use this if you've already
+# captured it elsewhere. If not given,
+# psgi_return will return an anonymous
+# sub for the PSGI server to call
+#
+# $env->{'qspawn.filter'} - filter callback, receives a string as input,
+# undef on EOF
+#
+# $limiter - the Limiter object to use (uses the def_limiter if not given)
+#
+# $parse_hdr - Initial read function; often for parsing CGI header output.
+# It will be given the return value of sysread from the pipe
+# and a string ref of the current buffer. Returns an arrayref
+# for PSGI responses. 2-element arrays in PSGI mean the
+# body will be streamed, later, via writes (push-based) to
+# psgix.io. 3-element arrays means the body is available
+# immediately (or streamed via ->getline (pull-based)).
sub psgi_return {
my ($self, $env, $limiter, $parse_hdr) = @_;
my ($fh, $rpipe);
@@ -139,8 +188,10 @@ sub psgi_return {
return if !defined($r) && ($!{EINTR} || $!{EAGAIN});
$parse_hdr->($r, \$buf);
};
- my $res = delete $env->{'qspawn.response'};
+
+ my $wcb = delete $env->{'qspawn.wcb'};
my $async = $env->{'pi-httpd.async'};
+
my $cb = sub {
my $r = $rd_hdr->() or return;
$rd_hdr = undef;
@@ -152,16 +203,16 @@ sub psgi_return {
$rpipe->close;
$end->();
}
- $res->($r);
+ $wcb->($r);
} elsif ($async) {
- $fh = $res->($r); # scalar @$r == 2
+ $fh = $wcb->($r); # scalar @$r == 2
$fh = filter_fh($fh, $filter) if $filter;
$async->async_pass($env->{'psgix.io'}, $fh, \$buf);
} else { # for synchronous PSGI servers
require PublicInbox::GetlineBody;
$r->[2] = PublicInbox::GetlineBody->new($rpipe, $end,
$buf, $filter);
- $res->($r);
+ $wcb->($r);
}
};
$limiter ||= $def_limiter ||= PublicInbox::Qspawn::Limiter->new(32);
@@ -175,10 +226,16 @@ sub psgi_return {
}
};
- return $self->start($limiter, $start_cb) if $res;
+ # the caller already captured the PSGI write callback from
+ # the PSGI server, so we can call ->start, here:
+ return $self->start($limiter, $start_cb) if $wcb;
+ # the caller will return this sub to the PSGI server, so
+ # it can set the response callback (that is, for PublicInbox::HTTP,
+ # the chunked_wcb or identity_wcb callback), but other HTTP servers
+ # are supported:
sub {
- ($res) = @_;
+ ($wcb) = @_;
$self->start($limiter, $start_cb);
};
}
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 63731e9..1e8c31f 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -54,7 +54,7 @@ sub stream_large_blob ($$$$) {
my $qsp = PublicInbox::Qspawn->new($cmd);
my @cl = ('Content-Length', $size);
my $env = $ctx->{env};
- $env->{'qspawn.response'} = delete $ctx->{-wcb};
+ $env->{'qspawn.wcb'} = delete $ctx->{-wcb};
$qsp->psgi_return($env, undef, sub {
my ($r, $bref) = @_;
if (!defined $r) { # error
--
EW
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/6] config: tiny cleanup to use _array() sub
2019-01-31 10:51 [PATCH 0/6] more minor cleanups and doc updates Eric Wong
2019-01-31 10:51 ` [PATCH 1/6] inbox: drop psgi.url_scheme requirement from base_url Eric Wong
2019-01-31 10:51 ` [PATCH 2/6] qspawn: documentation updates Eric Wong
@ 2019-01-31 10:51 ` Eric Wong
2019-01-31 10:51 ` [PATCH 4/6] doc/config: user documentation for limiters Eric Wong
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2019-01-31 10:51 UTC (permalink / raw)
To: meta
---
lib/PublicInbox/Config.pm | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index ccfc114..da443e5 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -32,7 +32,7 @@ sub new {
$self->{-code_repos} ||= {}; # nick => PublicInbox::Git object
if (my $no = delete $self->{'publicinbox.noobfuscate'}) {
- $no = [ $no ] if ref($no) ne 'ARRAY';
+ $no = _array($no);
my @domains;
foreach my $n (@$no) {
my @n = split(/\s+/, $n);
--
EW
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 4/6] doc/config: user documentation for limiters
2019-01-31 10:51 [PATCH 0/6] more minor cleanups and doc updates Eric Wong
` (2 preceding siblings ...)
2019-01-31 10:51 ` [PATCH 3/6] config: tiny cleanup to use _array() sub Eric Wong
@ 2019-01-31 10:51 ` Eric Wong
2019-01-31 10:51 ` [PATCH 5/6] doc/config: document "replyto" configuration knob Eric Wong
2019-01-31 10:51 ` [PATCH 6/6] doc: remove completed TODO items Eric Wong
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2019-01-31 10:51 UTC (permalink / raw)
To: meta
I've relied on this feature to keep the VPS behind
https://public-inbox.org/git/ from OOM-ing since 2016,
so document it to ensure others can make use of low-end
servers like I do.
More limiters may become configurable for viewvcs and
solver functionality (or we continue using the default
one).
---
Documentation/public-inbox-config.pod | 59 +++++++++++++++++++++++++++
1 file changed, 59 insertions(+)
diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod
index 23ebcc5..cad0c4e 100644
--- a/Documentation/public-inbox-config.pod
+++ b/Documentation/public-inbox-config.pod
@@ -91,6 +91,18 @@ C<nntp://news.gmane.org/gmane.mail.public-inbox.general>
Default: none
+=item publicinbox.<name>.httpbackendmax
+
+If a digit, the maximum number of parallel
+L<git-http-backend(1)> processes to allow for cloning this
+particular inbox.
+
+If an alphanumeric value starting with a lowercase alphabetic
+character is specified, the inbox will use a L</NAMED LIMITER>
+which can be shared by multiple inboxes.
+
+Default: 32 (using a default limiter shared by all inboxes)
+
=item publicinbox.<name>.coderepo
The nickname of a "coderepo" section associated with the inbox.
@@ -169,6 +181,53 @@ Default: none
=back
+=head2 NAMED LIMITER (PSGI)
+
+Named limiters are useful for preventing large inboxes from
+monopolizing (or overloading) the server. Since serving git
+clones (via L<git-http-backend(1)> can be memory-intensive for
+large inboxes, it makes sense to put large inboxes on a named
+limiter with a low max value; while smaller inboxes can use
+the default limiter.
+
+=over 8
+
+=item publicinboxlimiter.<name>.max
+
+The maximum number of parallel processes for the given limiter.
+
+=back
+
+=head3 EXAMPLE WITH NAMED LIMITERS
+
+ ; big inboxes which require lots of memory to clone:
+ [publicinbox "big1"]
+ mainrepo = /path/to/big1
+ address = big1@example.com
+ httpbackendmax = big
+ [publicinbox "big2"]
+ mainrepo = /path/to/big2
+ address = big2@example.com
+ httpbackendmax = big
+
+ ; tiny inboxes which are easily cloned:
+ [publicinbox "tiny1"]
+ mainrepo = /path/to/tiny1
+ address = tiny1@example.com
+ [publicinbox "tiny2"]
+ mainrepo = /path/to/tiny2
+ address = tiny2@example.com
+
+ [publicinboxlimiter "big"]
+ max = 4
+
+In the above example, the "big1" and "big2" are limited to four
+parallel L<git-http-backend(1)> processes between them.
+
+However, "tiny1" and "tiny2" will share the default limiter
+which means there can be 32 L<git-http-backend(1)> processes
+between them.
+
=head1 ENVIRONMENT
=over 8
--
EW
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 5/6] doc/config: document "replyto" configuration knob
2019-01-31 10:51 [PATCH 0/6] more minor cleanups and doc updates Eric Wong
` (3 preceding siblings ...)
2019-01-31 10:51 ` [PATCH 4/6] doc/config: user documentation for limiters Eric Wong
@ 2019-01-31 10:51 ` Eric Wong
2019-01-31 10:51 ` [PATCH 6/6] doc: remove completed TODO items Eric Wong
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2019-01-31 10:51 UTC (permalink / raw)
To: meta
I hate it, but it's necessary to support some mirrors.
---
Documentation/public-inbox-config.pod | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod
index cad0c4e..27d27e4 100644
--- a/Documentation/public-inbox-config.pod
+++ b/Documentation/public-inbox-config.pod
@@ -112,6 +112,25 @@ link to the line numbers of blobs.
Default: none
+=item publicinbox.<name>.replyto
+
+May be used to control how reply instructions in the PSGI
+interface are displayed.
+
+":none=dead inbox" may be specified to denote an inactive list
+("dead inbox" may be replaced with another phrase).
+
+A list of comma-delimited email addresses may be specified.
+This can be useful for dedicated inboxes for bot emails, but
+discussion happens on a seperate mailing list/inbox.
+
+Mirrors of existing centralized mailing lists may use ":list"
+here to redirect mail only to the configured inbox address.
+The use of ":list" is discouraged for new mailing lists, as it
+leads to centralization.
+
+Default: :all
+
=item publicinbox.css
The local path name of a CSS file for the PSGI web interface.
--
EW
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 6/6] doc: remove completed TODO items
2019-01-31 10:51 [PATCH 0/6] more minor cleanups and doc updates Eric Wong
` (4 preceding siblings ...)
2019-01-31 10:51 ` [PATCH 5/6] doc/config: document "replyto" configuration knob Eric Wong
@ 2019-01-31 10:51 ` Eric Wong
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2019-01-31 10:51 UTC (permalink / raw)
To: meta
---
TODO | 5 -----
lib/PublicInbox/ViewVCS.pm | 1 -
2 files changed, 6 deletions(-)
diff --git a/TODO b/TODO
index 57ea8c4..05e2640 100644
--- a/TODO
+++ b/TODO
@@ -29,8 +29,6 @@ all need to be considered for everything we introduce)
* Combined "super server" for NNTP/HTTP/POP3 to reduce memory overhead
-* Optional reply-to-nobody for dead lists.
-
* Configurable linkification for per-inbox shorthands:
"$gmane/123456" could be configured to expand to the
appropriate link pointing to the gmane.org list archives,
@@ -78,9 +76,6 @@ all need to be considered for everything we introduce)
* large mbox/Maildir/MH/NNTP spool import (see PublicInbox::Import)
-* Optionally allow indexing Xapian without positional information to
- save space (but prevents "quoted phrase" searching).
-
* Allow NNTP and more of PSGI code to work without Xapian
* Read-only WebDAV interface to the git repo so it can be mounted
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 1e8c31f..eecc51e 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -96,7 +96,6 @@ sub solve_result {
my $raw_link = "(<a\nhref=$path>raw</a>)";
if ($size > $max_size) {
return stream_large_blob($ctx, $res, \$log, $fn) if defined $fn;
- # TODO: stream the raw file if it's gigantic, at least
$log = "<pre><b>Too big to show, download available</b>\n" .
"$oid $type $size bytes $raw_link</pre>" . $log;
return html_page($ctx, 500, \$log);
--
EW
^ permalink raw reply related [flat|nested] 7+ messages in thread