* [PATCH 01/37] view: disable bold in topic display
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 02/37] hval: force monospace for <form> elements, too Eric Wong
` (35 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
It seems pointless due to the indentation, and interacts
badly with some CSS colouring.
---
lib/PublicInbox/View.pm | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index cd125e0..470e3ab 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -1075,7 +1075,7 @@ sub dump_topics {
my $mbox = qq(<a\nhref="$href/t.mbox.gz">mbox.gz</a>);
my $atom = qq(<a\nhref="$href/t.atom">Atom</a>);
- my $s = "<a\nhref=\"$href/T/$anchor\"><b>$top</b></a>\n" .
+ my $s = "<a\nhref=\"$href/T/$anchor\">$top</a>\n" .
" $ds UTC $n - $mbox / $atom\n";
for (my $i = 0; $i < scalar(@ex); $i += 2) {
my $level = $ex[$i];
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 02/37] hval: force monospace for <form> elements, too
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
2019-01-21 20:52 ` [PATCH 01/37] view: disable bold in topic display Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 03/37] t/perf-msgview: add test to check msg_html performance Eric Wong
` (34 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
Same reasoning as commit 7b7885fc3be2719c068c0a2fc860d53f17a1d933,
because GUI browsers have a tendency to use a different
font-family (and thus different size) as the rest of the page.
---
lib/PublicInbox/Hval.pm | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index ccfa324..a120a29 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -18,8 +18,12 @@ our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename/;
# browsers (tested both Firefox and surf (webkit)) uses a larger font
# for the Search <form> element than the rest of the page. Font size
# uniformity is important to people who rely on gigantic fonts.
+# Finally, we use monospace to ensure the Search field and button
+# has the same size and spacing as everything else which is
+# <pre>-formatted anyways.
use constant STYLE =>
- '<style>pre{white-space:pre-wrap}*{font-size:100%}</style>';
+ '<style>pre{white-space:pre-wrap}' .
+ '*{font-size:100%;font-family:monospace}</style>';
my $enc_ascii = find_encoding('us-ascii');
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 03/37] t/perf-msgview: add test to check msg_html performance
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
2019-01-21 20:52 ` [PATCH 01/37] view: disable bold in topic display Eric Wong
2019-01-21 20:52 ` [PATCH 02/37] hval: force monospace for <form> elements, too Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 04/37] solver: initial Perl implementation Eric Wong
` (33 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
This will be necessary to ensure we maintain reasonable
performance when we add diff-highlighting support.
---
MANIFEST | 1 +
t/perf-msgview.t | 50 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 51 insertions(+)
create mode 100644 t/perf-msgview.t
diff --git a/MANIFEST b/MANIFEST
index e4f3df8..dfd9e27 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -185,6 +185,7 @@ t/nntp.t
t/nntpd.t
t/nulsubject.t
t/over.t
+t/perf-msgview.t
t/perf-nntpd.t
t/perf-threading.t
t/plack.t
diff --git a/t/perf-msgview.t b/t/perf-msgview.t
new file mode 100644
index 0000000..adeb7aa
--- /dev/null
+++ b/t/perf-msgview.t
@@ -0,0 +1,50 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Benchmark qw(:all);
+use PublicInbox::Inbox;
+use PublicInbox::View;
+require './t/common.perl';
+
+my @cat = qw(cat-file --buffer --batch-check --batch-all-objects);
+if (require_git(2.19, 1)) {
+ push @cat, '--unordered';
+} else {
+ warn
+"git <2.19, cat-file lacks --unordered, locality suffers\n";
+}
+
+my $pi_dir = $ENV{GIANT_PI_DIR};
+plan skip_all => "GIANT_PI_DIR not defined for $0" unless $pi_dir;
+
+my $ibx = PublicInbox::Inbox->new({ mainrepo => $pi_dir, name => 'name' });
+my $git = $ibx->git;
+my $fh = $git->popen(@cat);
+my $vec = '';
+vec($vec, fileno($fh), 1) = 1;
+select($vec, undef, undef, 60) or die "timed out waiting for --batch-check";
+
+my $ctx = {
+ env => { HTTP_HOST => 'example.com', 'psgi.url_scheme' => 'https' },
+ -inbox => $ibx,
+};
+my ($str, $mime, $res, $cmt, $type);
+my $n = 0;
+my $t = timeit(1, sub {
+ while (<$fh>) {
+ ($cmt, $type) = split / /;
+ next if $type ne 'blob';
+ ++$n;
+ $str = $git->cat_file($cmt);
+ $mime = PublicInbox::MIME->new($str);
+ $res = PublicInbox::View::msg_html($ctx, $mime);
+ $res = $res->[2];
+ while (defined($res->getline)) {}
+ $res->close;
+ }
+});
+diag 'msg_html took '.timestr($t)." for $n messages";
+ok 1;
+done_testing();
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 04/37] solver: initial Perl implementation
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (2 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 03/37] t/perf-msgview: add test to check msg_html performance Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 05/37] git: support multiple URL endpoints Eric Wong
` (32 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
This will lookup git blobs from associated git source code
repositories. If the blobs can't be found, an attempt to
"solve" them via patch application will be performed.
Eventually, this may become the basis of a type-agnostic
frontend similar to "git show"
---
MANIFEST | 4 +
lib/PublicInbox/Git.pm | 16 +
lib/PublicInbox/SolverGit.pm | 400 +++++++++++++++++++
t/solve/0001-simple-mod.patch | 20 +
t/solve/0002-rename-with-modifications.patch | 37 ++
t/solver_git.t | 91 +++++
6 files changed, 568 insertions(+)
create mode 100644 lib/PublicInbox/SolverGit.pm
create mode 100644 t/solve/0001-simple-mod.patch
create mode 100644 t/solve/0002-rename-with-modifications.patch
create mode 100644 t/solver_git.t
diff --git a/MANIFEST b/MANIFEST
index dfd9e27..95ad0c6 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -101,6 +101,7 @@ lib/PublicInbox/SearchIdxPart.pm
lib/PublicInbox/SearchMsg.pm
lib/PublicInbox/SearchThread.pm
lib/PublicInbox/SearchView.pm
+lib/PublicInbox/SolverGit.pm
lib/PublicInbox/Spamcheck.pm
lib/PublicInbox/Spamcheck/Spamc.pm
lib/PublicInbox/Spawn.pm
@@ -201,6 +202,9 @@ t/qspawn.t
t/reply.t
t/search-thr-index.t
t/search.t
+t/solve/0001-simple-mod.patch
+t/solve/0002-rename-with-modifications.patch
+t/solver_git.t
t/spamcheck_spamc.t
t/spawn.t
t/thread-cycle.t
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 90b9214..9676086 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -40,6 +40,7 @@ sub new {
my ($class, $git_dir) = @_;
my @st;
$st[7] = $st[10] = 0;
+ # may contain {-wt} field (working-tree (File::Temp::Dir))
bless { git_dir => $git_dir, st => \@st }, $class
}
@@ -201,6 +202,21 @@ sub packed_bytes {
sub DESTROY { cleanup(@_) }
+# show the blob URL for cgit/gitweb/whatever
+sub src_blob_url {
+ my ($self, $oid) = @_;
+ # blob_fmt = "https://example.com/foo.git/blob/%s"
+ if (my $bfu = $self->{blob_fmt_url}) {
+ return sprintf($bfu, $oid);
+ }
+
+ # don't show full FS path, basename should be OK:
+ if ($self->{git_dir} =~ m!/([^/]+)\z!) {
+ return "/path/to/$1";
+ }
+ '???';
+}
+
1;
__END__
=pod
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
new file mode 100644
index 0000000..f28768a
--- /dev/null
+++ b/lib/PublicInbox/SolverGit.pm
@@ -0,0 +1,400 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "Solve" blobs which don't exist in git code repositories by
+# searching inboxes for post-image blobs.
+
+# this emits a lot of debugging/tracing information which may be
+# publically viewed over HTTP(S). Be careful not to expose
+# local filesystem layouts in the process.
+package PublicInbox::SolverGit;
+use strict;
+use warnings;
+use File::Temp qw();
+use Fcntl qw(SEEK_SET);
+use File::Path qw(make_path);
+use PublicInbox::Git qw(git_unquote);
+use PublicInbox::Spawn qw(spawn popen_rd);
+use PublicInbox::MsgIter qw(msg_iter msg_part_text);
+use URI::Escape qw(uri_escape_utf8);
+
+# don't bother if somebody sends us a patch with these path components,
+# it's junk at best, an attack attempt at worse:
+my %bad_component = map { $_ => 1 } ('', '.', '..');
+
+sub new {
+ my ($class, $gits, $inboxes) = @_;
+ bless {
+ gits => $gits,
+ inboxes => $inboxes,
+ }, $class;
+}
+
+# look for existing blobs already in git repos
+sub solve_existing ($$) {
+ my ($self, $want) = @_;
+ foreach my $git (@{$self->{gits}}) {
+ my ($oid_full, $type, $size) = $git->check($want->{oid_b});
+ if (defined($type) && $type eq 'blob') {
+ return [ $git, $oid_full, $type, int($size) ];
+ }
+ }
+ undef;
+}
+
+# returns a hashref with information about a diff:
+# {
+# oid_a => abbreviated pre-image oid,
+# oid_b => abbreviated post-image oid,
+# tmp => anonymous file handle with the diff,
+# hdr_lines => arrayref of various header lines for mode information
+# mode_a => original mode of oid_a (string, not integer),
+# ibx => PublicInbox::Inbox object containing the diff
+# smsg => PublicInbox::SearchMsg object containing diff
+# path_a => pre-image path
+# path_b => post-image path
+# }
+sub extract_diff ($$$$) {
+ my ($p, $re, $ibx, $smsg) = @_;
+ my ($part) = @$p; # ignore $depth and @idx;
+ my $hdr_lines; # diff --git a/... b/...
+ my $tmp;
+ my $ct = $part->content_type || 'text/plain';
+ my ($s, undef) = msg_part_text($part, $ct);
+ defined $s or return;
+ my $di = {};
+ foreach my $l (split(/^/m, $s)) {
+ if ($l =~ /$re/) {
+ $di->{oid_a} = $1;
+ $di->{oid_b} = $2;
+ my $mode_a = $3;
+ if ($mode_a =~ /\A(?:100644|120000|100755)\z/) {
+ $di->{mode_a} = $mode_a;
+ }
+
+ # start writing the diff out to a tempfile
+ open($tmp, '+>', undef) or die "open(tmp): $!";
+ $di->{tmp} = $tmp;
+ $di->{hdr_lines} = $hdr_lines;
+
+ print $tmp @$hdr_lines, $l or die "print(tmp): $!";
+
+ # for debugging/diagnostics:
+ $di->{ibx} = $ibx;
+ $di->{smsg} = $smsg;
+ } elsif ($l =~ m!\Adiff --git ("?a/.+) ("?b/.+)$!) {
+ return $di if $tmp; # got our blob, done!
+
+ my ($path_a, $path_b) = ($1, $2);
+
+ # don't care for leading 'a/' and 'b/'
+ my (undef, @a) = split(m{/}, git_unquote($path_a));
+ my (undef, @b) = split(m{/}, git_unquote($path_b));
+
+ # get rid of path-traversal attempts and junk patches:
+ foreach (@a, @b) {
+ return if $bad_component{$_};
+ }
+
+ $di->{path_a} = join('/', @a);
+ $di->{path_b} = join('/', @b);
+ $hdr_lines = [ $l ];
+ } elsif ($tmp) {
+ print $tmp $l or die "print(tmp): $!";
+ } elsif ($hdr_lines) {
+ push @$hdr_lines, $l;
+ }
+ }
+ $tmp ? $di : undef;
+}
+
+sub path_searchable ($) { defined($_[0]) && $_[0] =~ m!\A[\w/\. \-]+\z! }
+
+sub find_extract_diff ($$$) {
+ my ($self, $ibx, $want) = @_;
+ my $srch = $ibx->search or return;
+
+ my $post = $want->{oid_b} or die 'BUG: no {oid_b}';
+ $post =~ /\A[a-f0-9]+\z/ or die "BUG: oid_b not hex: $post";
+
+ my $q = "dfpost:$post";
+ my $pre = $want->{oid_a};
+ if (defined $pre && $pre =~ /\A[a-f0-9]+\z/) {
+ $q .= " dfpre:$pre";
+ } else {
+ $pre = '[a-f0-9]{7}'; # for $re below
+ }
+
+ my $path_b = $want->{path_b};
+ if (path_searchable($path_b)) {
+ $q .= qq{ dfn:"$path_b"};
+
+ my $path_a = $want->{path_a};
+ if (path_searchable($path_a) && $path_a ne $path_b) {
+ $q .= qq{ dfn:"$path_a"};
+ }
+ }
+
+ my $msgs = $srch->query($q, { relevance => 1 });
+ my $re = qr/\Aindex ($pre[a-f0-9]*)\.\.($post[a-f0-9]*)(?: (\d+))?/;
+
+ my $di;
+ foreach my $smsg (@$msgs) {
+ $ibx->smsg_mime($smsg) or next;
+ msg_iter(delete($smsg->{mime}), sub {
+ $di ||= extract_diff($_[0], $re, $ibx, $smsg);
+ });
+ return $di if $di;
+ }
+}
+
+# pure Perl "git init"
+sub do_git_init_wt ($) {
+ my ($self) = @_;
+ my $wt = File::Temp->newdir('solver.wt-XXXXXXXX', TMPDIR => 1);
+ my $dir = $wt->dirname;
+
+ foreach (qw(objects/info refs/heads)) {
+ make_path("$dir/.git/$_") or die "make_path $_: $!";
+ }
+ open my $fh, '>', "$dir/.git/config" or die "open .git/config: $!";
+ print $fh <<'EOF' or die "print .git/config $!";
+[core]
+ repositoryFormatVersion = 0
+ filemode = true
+ bare = false
+ fsyncObjectfiles = false
+ logAllRefUpdates = false
+EOF
+ close $fh or die "close .git/config: $!";
+
+ open $fh, '>', "$dir/.git/HEAD" or die "open .git/HEAD: $!";
+ print $fh "ref: refs/heads/master\n" or die "print .git/HEAD: $!";
+ close $fh or die "close .git/HEAD: $!";
+
+ my $f = '.git/objects/info/alternates';
+ open $fh, '>', "$dir/$f" or die "open: $f: $!";
+ foreach my $git (@{$self->{gits}}) {
+ print $fh "$git->{git_dir}/objects\n" or die "print $f: $!";
+ }
+ close $fh or die "close: $f: $!";
+ $wt;
+}
+
+sub extract_old_mode ($) {
+ my ($di) = @_;
+ if (grep(/\Aold mode (100644|100755|120000)$/, @{$di->{hdr_lines}})) {
+ return $1;
+ }
+ '100644';
+}
+
+sub reap ($$) {
+ my ($pid, $msg) = @_;
+ waitpid($pid, 0) == $pid or die "waitpid($msg): $!";
+ $? == 0 or die "$msg failed: $?";
+}
+
+sub prepare_wt ($$$) {
+ my ($wt_dir, $existing, $di) = @_;
+ my $oid_full = $existing->[1];
+ my ($r, $w);
+ my $path_a = $di->{path_a} or die "BUG: path_a missing for $oid_full";
+ my $mode_a = $di->{mode_a} || extract_old_mode($di);
+ my @git = (qw(git -C), $wt_dir);
+
+ pipe($r, $w) or die "pipe: $!";
+ my $rdr = { 0 => fileno($r) };
+ my $pid = spawn([@git, qw(update-index -z --index-info)], {}, $rdr);
+ close $r or die "close pipe(r): $!";
+ print $w "$mode_a $oid_full\t$path_a\0" or die "print update-index: $!";
+ close $w or die "close update-index: $!";
+ reap($pid, 'update-index -z --index-info');
+
+ $pid = spawn([@git, qw(checkout-index -a -f -u)]);
+ reap($pid, 'checkout-index -a -f -u');
+}
+
+sub do_apply ($$$$) {
+ my ($out, $wt_git, $wt_dir, $di) = @_;
+
+ my $tmp = delete $di->{tmp} or die "BUG: no tmp ", di_info($di);
+ $tmp->flush or die "tmp->flush failed: $!";
+ $out->flush or die "err->flush failed: $!";
+ sysseek($tmp, 0, SEEK_SET) or die "sysseek(tmp) failed: $!";
+
+ defined(my $err_fd = fileno($out)) or die "fileno(out): $!";
+ my $rdr = { 0 => fileno($tmp), 1 => $err_fd, 2 => $err_fd };
+ my $cmd = [ qw(git -C), $wt_dir,
+ qw(apply --whitespace=warn -3 --verbose) ];
+ reap(spawn($cmd, undef, $rdr), 'apply');
+
+ local $/ = "\0";
+ my $rd = popen_rd([qw(git -C), $wt_dir, qw(ls-files -s -z)]);
+
+ defined(my $line = <$rd>) or die "failed to read ls-files: $!";
+ chomp $line or die "no trailing \\0 in [$line] from ls-files";
+
+ my ($info, $file) = split(/\t/, $line, 2);
+ my ($mode_b, $oid_b_full, $stage) = split(/ /, $info);
+
+ defined($line = <$rd>) and die "extra files in index: $line";
+ close $rd or die "close ls-files: $?";
+
+ $file eq $di->{path_b} or
+ die "index mismatch: file=$file != path_b=$di->{path_b}";
+ my $abs_path = "$wt_dir/$file";
+ -r $abs_path or die "WT_DIR/$file not readable";
+ my $size = -s _;
+
+ print $out "OK $mode_b $oid_b_full $stage\t$file\n";
+ [ $wt_git, $oid_b_full, 'blob', $size, $di ];
+}
+
+sub di_url ($) {
+ my ($di) = @_;
+ # note: we don't pass the PSGI env here, different inboxes
+ # can have different HTTP_HOST on the same instance.
+ my $url = $di->{ibx}->base_url;
+ my $mid = $di->{smsg}->{mid};
+ defined($url) ? "<$url/$mid/>" : "<$mid>";
+}
+
+sub apply_patches ($$$$$) {
+ my ($self, $out, $wt, $found, $patches) = @_;
+ my $wt_dir = $wt->dirname;
+ my $wt_git = PublicInbox::Git->new("$wt_dir/.git");
+ $wt_git->{-wt} = $wt;
+
+ my $cur = 0;
+ my $tot = scalar @$patches;
+
+ foreach my $di (@$patches) {
+ my $i = ++$cur;
+ my $oid_a = $di->{oid_a};
+ my $existing = $found->{$oid_a};
+ my $empty_oid = $oid_a =~ /\A0+\z/;
+
+ if ($empty_oid && $i != 0) {
+ die "empty oid at [$i/$tot] ", di_url($di);
+ }
+ if (!$existing && !$empty_oid) {
+ die "missing $oid_a at [$i/$tot] ", di_url($di);
+ }
+
+ # prepare the worktree for patch application:
+ if ($i == 1 && $existing) {
+ prepare_wt($wt_dir, $existing, $di);
+ }
+ unless (-f "$wt_dir/$di->{path_a}") {
+ die "missing $di->{path_a} at [$i/$tot] ", di_url($di);
+ }
+
+ print $out "applying [$i/$tot] ", di_url($di), "\n",
+ join('', @{$di->{hdr_lines}}), "\n"
+ or die "print \$out failed: $!";
+
+ # apply the patch!
+ $found->{$di->{oid_b}} = do_apply($out, $wt_git, $wt_dir, $di);
+ }
+}
+
+sub dump_found ($$) {
+ my ($out, $found) = @_;
+ foreach my $oid (sort keys %$found) {
+ my ($git, $oid, $di) = @{$found->{$oid}};
+ my $loc = $di ? di_info($di) : $git->src_blob_url($oid);
+ print $out "$oid from $loc\n";
+ }
+}
+
+sub dump_patches ($$) {
+ my ($out, $patches) = @_;
+ my $tot = scalar(@$patches);
+ my $i = 0;
+ foreach my $di (@$patches) {
+ ++$i;
+ print $out "[$i/$tot] ", di_url($di), "\n";
+ }
+}
+
+# recreate $oid_b
+# Returns a 2-element array ref: [ PublicInbox::Git object, oid_full ]
+# or undef if nothing was found.
+sub solve ($$$$) {
+ my ($self, $out, $oid_b, $hints) = @_;
+
+ # should we even get here? Probably not, but somebody
+ # could be manually typing URLs:
+ return if $oid_b =~ /\A0+\z/;
+
+ my $req = { %$hints, oid_b => $oid_b };
+ my @todo = ($req);
+ my $found = {}; # { oid_abbrev => [ PublicInbox::Git, oid_full, $di ] }
+ my $patches = []; # [ array of $di hashes ]
+
+ my $max = $self->{max_steps} || 200;
+ my $steps = 0;
+
+ while (defined(my $want = pop @todo)) {
+ # see if we can find the blob in an existing git repo:
+ if (my $existing = solve_existing($self, $want)) {
+ my $want_oid = $want->{oid_b};
+ return $existing if $want_oid eq $oid_b; # DONE!
+
+ $found->{$want_oid} = $existing;
+ next; # ok, one blob resolved, more to go?
+ }
+
+ # scan through inboxes to look for emails which results in
+ # the oid we want:
+ foreach my $ibx (@{$self->{inboxes}}) {
+ my $di = find_extract_diff($self, $ibx, $want) or next;
+
+ unshift @$patches, $di;
+
+ # good, we can find a path to the oid we $want, now
+ # lets see if we need to apply more patches:
+ my $src = $di->{oid_a};
+ if ($src !~ /\A0+\z/) {
+ if (++$steps > $max) {
+ print $out
+"Aborting, too many steps to $oid_b\n";
+
+ return;
+ }
+
+ # we have to solve it using another oid, fine:
+ my $job = {
+ oid_b => $src,
+ path_b => $di->{path_a},
+ };
+ push @todo, $job;
+ }
+ last; # onto the next @todo item
+ }
+ }
+
+ unless (scalar(@$patches)) {
+ print $out "no patch(es) for $oid_b\n";
+ dump_found($out, $found);
+ return;
+ }
+
+ # reconstruct the oid_b blob using patches we found:
+ eval {
+ my $wt = do_git_init_wt($self);
+ apply_patches($self, $out, $wt, $found, $patches);
+ };
+ if ($@) {
+ print $out "E: $@\nfound: ";
+ dump_found($out, $found);
+ print $out "patches: ";
+ dump_patches($out, $patches);
+ return;
+ }
+
+ $found->{$oid_b};
+}
+
+1;
diff --git a/t/solve/0001-simple-mod.patch b/t/solve/0001-simple-mod.patch
new file mode 100644
index 0000000..c6bb157
--- /dev/null
+++ b/t/solve/0001-simple-mod.patch
@@ -0,0 +1,20 @@
+From: WEB DESIGN EXPERT <BOFH@YHBT.net>
+To: meta@public-inbox.org
+Subject: [PATCH] TODO: take expert web design advice
+Date: Mon, 1 Apr 2019 08:15:20 +0000
+Message-Id: <20190401081523.16213-1-BOFH@YHBT.net>
+
+---
+ TODO | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/TODO b/TODO
+index 605013e..69df7d5 100644
+--- a/TODO
++++ b/TODO
+@@ -109,3 +109,5 @@ all need to be considered for everything we introduce)
+
+ * Optional history squashing to reduce commit and intermediate
+ tree objects
++
++ * Make use of <blink> and <marquee> tags
diff --git a/t/solve/0002-rename-with-modifications.patch b/t/solve/0002-rename-with-modifications.patch
new file mode 100644
index 0000000..aa415e0
--- /dev/null
+++ b/t/solve/0002-rename-with-modifications.patch
@@ -0,0 +1,37 @@
+From: POLITICAL CORRECTNESS EXPERT <BOFH@YHBT.net>
+To: meta@public-inbox.org
+Subject: [PATCH] POLITICALLY CORRECT FILE NAMING
+Date: Mon, 1 Apr 2019 08:15:20 +0000
+Message-Id: <20190401081523.16213-2-BOFH@YHBT.net>
+
+HACKING MIGHT GET US REPORTED TO EFF-BEE-EYE
+AND USE MARKDOWN CUZ MOAR FLAVORS == BETTER
+---
+ HACKING => CONTRIBUTING.md | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+ rename HACKING => CONTRIBUTING.md (94%)
+
+diff --git a/HACKING b/CONTRIBUTING.md
+similarity index 94%
+rename from HACKING
+rename to CONTRIBUTING.md
+index 3435775..0a92431 100644
+--- a/HACKING
++++ b/CONTRIBUTING.md
+@@ -1,5 +1,5 @@
+-hacking public-inbox
+---------------------
++contributing to public-inbox
++----------------------------
+
+ Send all patches and "git request-pull"-formatted emails to our
+ self-hosting inbox at meta@public-inbox.org
+@@ -15,7 +15,7 @@ Please consider our goals in mind:
+ Decentralization, Accessibility, Compatibility, Performance
+
+ These goals apply to everyone: users viewing over the web or NNTP,
+-sysadmins running public-inbox, and other hackers working public-inbox.
++sysadmins running public-inbox, and other contributors working public-inbox.
+
+ We will reject any feature which advocates or contributes to any
+ particular instance of a public-inbox becoming a single point of failure.
diff --git a/t/solver_git.t b/t/solver_git.t
new file mode 100644
index 0000000..fe322ea
--- /dev/null
+++ b/t/solver_git.t
@@ -0,0 +1,91 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw(tempdir);
+use Cwd qw(abs_path);
+require './t/common.perl';
+require_git(2.6);
+
+my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test
+ URI::Escape Plack::Builder);
+foreach my $mod (@mods) {
+ eval "require $mod";
+ plan skip_all => "$mod missing for $0" if $@;
+}
+chomp(my $git_dir = `git rev-parse --git-dir 2>/dev/null`);
+plan skip_all => "$0 must be run from a git working tree" if $?;
+$git_dir = abs_path($git_dir);
+
+use_ok "PublicInbox::$_" for (qw(Inbox V2Writable MIME Git SolverGit));
+
+my $mainrepo = tempdir('pi-solver-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $opts = {
+ mainrepo => $mainrepo,
+ name => 'test-v2writable',
+ version => 2,
+ -primary_address => 'test@example.com',
+};
+my $ibx = PublicInbox::Inbox->new($opts);
+my $im = PublicInbox::V2Writable->new($ibx, 1);
+$im->{parallel} = 0;
+
+sub deliver_patch ($) {
+ open my $fh, '<', $_[0] or die "open: $!";
+ my $mime = PublicInbox::MIME->new(do { local $/; <$fh> });
+ $im->add($mime);
+ $im->done;
+}
+
+deliver_patch('t/solve/0001-simple-mod.patch');
+
+my $gits = [ PublicInbox::Git->new($git_dir) ];
+my $solver = PublicInbox::SolverGit->new($gits, [ $ibx ]);
+open my $log, '+>>', "$mainrepo/solve.log" or die "open: $!";
+my $res = $solver->solve($log, '69df7d5', {});
+ok($res, 'solved a blob!');
+my $wt_git = $res->[0];
+is(ref($wt_git), 'PublicInbox::Git', 'got a git object for the blob');
+my $expect = '69df7d565d49fbaaeb0a067910f03dc22cd52bd0';
+is($res->[1], $expect, 'resolved blob to unabbreviated identifier');
+is($res->[2], 'blob', 'type specified');
+is($res->[3], 4405, 'size returned');
+
+is(ref($wt_git->cat_file($res->[1])), 'SCALAR', 'wt cat-file works');
+is_deeply([$expect, 'blob', 4405],
+ [$wt_git->check($res->[1])], 'wt check works');
+
+if (0) { # TODO: check this?
+ seek($log, 0, 0);
+ my $z = do { local $/; <$log> };
+ diag $z;
+}
+
+$res = undef;
+my $wt_git_dir = $wt_git->{git_dir};
+$wt_git = undef;
+ok(!-d $wt_git_dir, 'no references to WT held');
+
+$res = $solver->solve($log, '0'x40, {});
+is($res, undef, 'no error on z40');
+
+my $git_v2_20_1_tag = '7a95a1cd084cb665c5c2586a415e42df0213af74';
+$res = $solver->solve($log, $git_v2_20_1_tag, {});
+is($res, undef, 'no error on a tag not in our repo');
+
+deliver_patch('t/solve/0002-rename-with-modifications.patch');
+$res = $solver->solve($log, '0a92431', {});
+ok($res, 'resolved without hints');
+
+my $hints = {
+ oid_a => '3435775',
+ path_a => 'HACKING',
+ path_b => 'CONTRIBUTING'
+};
+my $hinted = $solver->solve($log, '0a92431', $hints);
+# don't compare ::Git objects:
+shift @$res; shift @$hinted;
+is_deeply($res, $hinted, 'hints work (or did not hurt :P');
+
+done_testing();
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 05/37] git: support multiple URL endpoints
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (3 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 04/37] solver: initial Perl implementation Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 06/37] git: add git_quote Eric Wong
` (31 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
For redundancy and centralization resistance.
---
lib/PublicInbox/Git.pm | 28 +++++++++++++++++++++-------
1 file changed, 21 insertions(+), 7 deletions(-)
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 9676086..a270180 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -202,19 +202,33 @@ sub packed_bytes {
sub DESTROY { cleanup(@_) }
+sub local_nick ($) {
+ my ($self) = @_;
+ my $ret = '???';
+ # don't show full FS path, basename should be OK:
+ if ($self->{git_dir} =~ m!/([^/]+)(?:/\.git)?\z!) {
+ $ret = "/path/to/$1";
+ }
+ wantarray ? ($ret) : $ret;
+}
+
# show the blob URL for cgit/gitweb/whatever
sub src_blob_url {
my ($self, $oid) = @_;
- # blob_fmt = "https://example.com/foo.git/blob/%s"
- if (my $bfu = $self->{blob_fmt_url}) {
- return sprintf($bfu, $oid);
+ # blob_url_format = "https://example.com/foo.git/blob/%s"
+ if (my $bfu = $self->{blob_url_format}) {
+ return map { sprintf($_, $oid) } @$bfu if wantarray;
+ return sprintf($bfu->[0], $oid);
}
+ local_nick($self);
+}
- # don't show full FS path, basename should be OK:
- if ($self->{git_dir} =~ m!/([^/]+)\z!) {
- return "/path/to/$1";
+sub pub_urls {
+ my ($self) = @_;
+ if (my $urls = $self->{cgit_url}) {
+ return @$urls;
}
- '???';
+ local_nick($self);
}
1;
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 06/37] git: add git_quote
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (4 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 05/37] git: support multiple URL endpoints Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 07/37] git: check saves error on disambiguation Eric Wong
` (30 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
It'll be helpful for displaying progress in SolverGit
output.
---
lib/PublicInbox/Git.pm | 12 +++++++++++-
t/git.t | 7 ++++++-
2 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index a270180..d0ac6b6 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -13,7 +13,7 @@ use POSIX qw(dup2);
require IO::Handle;
use PublicInbox::Spawn qw(spawn popen_rd);
use base qw(Exporter);
-our @EXPORT_OK = qw(git_unquote);
+our @EXPORT_OK = qw(git_unquote git_quote);
my %GIT_ESC = (
a => "\a",
@@ -26,6 +26,8 @@ my %GIT_ESC = (
'"' => '"',
'\\' => '\\',
);
+my %ESC_GIT = map { $GIT_ESC{$_} => $_ } keys %GIT_ESC;
+
# unquote pathnames used by git, see quote.c::unquote_c_style.c in git.git
sub git_unquote ($) {
@@ -36,6 +38,14 @@ sub git_unquote ($) {
$_[0];
}
+sub git_quote ($) {
+ if ($_[0] =~ s/([\\"\a\b\f\n\r\t\013]|[^[:print:]])/
+ '\\'.($ESC_GIT{$1}||sprintf("%0o",ord($1)))/egs) {
+ return qq{"$_[0]"};
+ }
+ $_[0];
+}
+
sub new {
my ($class, $git_dir) = @_;
my @st;
diff --git a/t/git.t b/t/git.t
index 2d58a10..9c80fbb 100644
--- a/t/git.t
+++ b/t/git.t
@@ -144,11 +144,16 @@ if ('alternates reloaded') {
is($$found, $config, 'alternates reloaded');
}
-use_ok 'PublicInbox::Git', qw(git_unquote);
+use_ok 'PublicInbox::Git', qw(git_unquote git_quote);
my $s;
is("foo\nbar", git_unquote($s = '"foo\\nbar"'), 'unquoted newline');
is("Eléanor", git_unquote($s = '"El\\303\\251anor"'), 'unquoted octal');
is(git_unquote($s = '"I\"m"'), 'I"m', 'unquoted dq');
is(git_unquote($s = '"I\\m"'), 'I\\m', 'unquoted backslash');
+is(git_quote($s = "Eléanor"), '"El\\303\\251anor"', 'quoted octal');
+is(git_quote($s = "hello\"world"), '"hello\"world"', 'quoted dq');
+is(git_quote($s = "hello\\world"), '"hello\\\\world"', 'quoted backslash');
+is(git_quote($s = "hello\nworld"), '"hello\\nworld"', 'quoted LF');
+
done_testing();
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 07/37] git: check saves error on disambiguation
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (5 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 06/37] git: add git_quote Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 08/37] solver: various bugfixes and cleanups Eric Wong
` (29 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
This will be useful for disambiguating short OIDs in older
emails when abbreviations were shorter.
Tested against the following script with /path/to/git.git
==> t.perl <==
use strict;
use PublicInbox::Git;
use Data::Dumper;
my $dir = shift or die "Usage: $0 GIT_DIR # (of git.git)";
my $git = PublicInbox::Git->new($dir);
my @res = $git->check('dead');
print Dumper({res => \@res, err=> $git->last_check_err});
@res = $git->check('5335669531d83d7d6c905bcfca9b5f8e182dc4d4');
print Dumper({res => \@res, err=> $git->last_check_err});
---
lib/PublicInbox/Git.pm | 38 +++++++++++++++++++++++++++++++++++---
1 file changed, 35 insertions(+), 3 deletions(-)
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index d0ac6b6..a61cb31 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -64,9 +64,25 @@ sub alternates_changed {
$self->{st} = \@st;
}
+sub last_check_err {
+ my ($self) = @_;
+ my $fh = $self->{err_c} or return;
+ sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!");
+ defined(sysread($fh, my $buf, -s $fh)) or
+ fail($self, "sysread failed: $!");
+ $buf;
+}
+
sub _bidi_pipe {
- my ($self, $batch, $in, $out, $pid) = @_;
- return if $self->{$pid};
+ my ($self, $batch, $in, $out, $pid, $err) = @_;
+ if ($self->{$pid}) {
+ if (defined $err) { # "err_c"
+ my $fh = $self->{$err};
+ sysseek($fh, 0, 0) or fail($self, "sysseek failed: $!");
+ truncate($fh, 0) or fail($self, "truncate failed: $!");
+ }
+ return;
+ }
my ($in_r, $in_w, $out_r, $out_w);
pipe($in_r, $in_w) or fail($self, "pipe failed: $!");
@@ -78,6 +94,11 @@ sub _bidi_pipe {
my @cmd = ('git', "--git-dir=$self->{git_dir}", qw(cat-file), $batch);
my $redir = { 0 => fileno($out_r), 1 => fileno($in_w) };
+ if ($err) {
+ open(my $fh, '+>', undef) or fail($self, "open.err failed: $!");
+ $self->{$err} = $fh;
+ $redir->{2} = fileno($fh);
+ }
my $p = spawn(\@cmd, undef, $redir);
defined $p or fail($self, "spawn failed: $!");
$self->{$pid} = $p;
@@ -152,12 +173,23 @@ sub batch_prepare ($) { _bidi_pipe($_[0], qw(--batch in out pid)) }
sub check {
my ($self, $obj) = @_;
- $self->_bidi_pipe(qw(--batch-check in_c out_c pid_c));
+ _bidi_pipe($self, qw(--batch-check in_c out_c pid_c err_c));
$self->{out_c}->print($obj, "\n") or fail($self, "write error: $!");
local $/ = "\n";
chomp(my $line = $self->{in_c}->getline);
my ($hex, $type, $size) = split(' ', $line);
return if $type eq 'missing';
+
+ # "dead" in git.git shows "dangling 4\ndead\n", not sure why
+ # https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/
+ # so handle the oddball stuff just in case
+ if ($hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop') {
+ $size = $type + length("\n");
+ my $r = read($self->{in_c}, my $buf, $size);
+ defined($r) or fail($self, "read failed: $!");
+ return;
+ }
+
($hex, $type, $size);
}
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 08/37] solver: various bugfixes and cleanups
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (6 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 07/37] git: check saves error on disambiguation Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 09/37] view: wire up diff and vcs viewers with solver Eric Wong
` (28 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
Remove the make_path dependency and call mkdir directly.
Capture mode on new files, avoid referencing non-existent
functions and enhance the debug output for users to read.
---
lib/PublicInbox/SolverGit.pm | 87 ++++++++++++++++++++++++------------
1 file changed, 58 insertions(+), 29 deletions(-)
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index f28768a..d7209e6 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -12,8 +12,7 @@ use strict;
use warnings;
use File::Temp qw();
use Fcntl qw(SEEK_SET);
-use File::Path qw(make_path);
-use PublicInbox::Git qw(git_unquote);
+use PublicInbox::Git qw(git_unquote git_quote);
use PublicInbox::Spawn qw(spawn popen_rd);
use PublicInbox::MsgIter qw(msg_iter msg_part_text);
use URI::Escape qw(uri_escape_utf8);
@@ -31,15 +30,31 @@ sub new {
}
# look for existing blobs already in git repos
-sub solve_existing ($$) {
- my ($self, $want) = @_;
+sub solve_existing ($$$) {
+ my ($self, $out, $want) = @_;
+ my $oid_b = $want->{oid_b};
+ my @ambiguous; # Array of [ git, $oids]
foreach my $git (@{$self->{gits}}) {
- my ($oid_full, $type, $size) = $git->check($want->{oid_b});
+ my ($oid_full, $type, $size) = $git->check($oid_b);
if (defined($type) && $type eq 'blob') {
return [ $git, $oid_full, $type, int($size) ];
}
+
+ next if length($oid_b) == 40;
+
+ # parse stderr of "git cat-file --batch-check"
+ my $err = $git->last_check_err;
+ my (@oids) = ($err =~ /\b([a-f0-9]{40})\s+blob\b/g);
+ next unless scalar(@oids);
+
+ # TODO: do something with the ambiguous array?
+ # push @ambiguous, [ $git, @oids ];
+
+ print $out "`$oid_b' ambiguous in ",
+ join("\n", $git->pub_urls), "\n",
+ join('', map { "$_ blob\n" } @oids), "\n";
}
- undef;
+ scalar(@ambiguous) ? \@ambiguous : undef;
}
# returns a hashref with information about a diff:
@@ -64,19 +79,22 @@ sub extract_diff ($$$$) {
defined $s or return;
my $di = {};
foreach my $l (split(/^/m, $s)) {
- if ($l =~ /$re/) {
+ if ($l =~ $re) {
$di->{oid_a} = $1;
$di->{oid_b} = $2;
- my $mode_a = $3;
- if ($mode_a =~ /\A(?:100644|120000|100755)\z/) {
- $di->{mode_a} = $mode_a;
+ if (defined($3)) {
+ my $mode_a = $3;
+ if ($mode_a =~ /\A(?:100644|120000|100755)\z/) {
+ $di->{mode_a} = $mode_a;
+ }
}
# start writing the diff out to a tempfile
open($tmp, '+>', undef) or die "open(tmp): $!";
$di->{tmp} = $tmp;
- $di->{hdr_lines} = $hdr_lines;
+ push @$hdr_lines, $l;
+ $di->{hdr_lines} = $hdr_lines;
print $tmp @$hdr_lines, $l or die "print(tmp): $!";
# for debugging/diagnostics:
@@ -103,6 +121,9 @@ sub extract_diff ($$$$) {
print $tmp $l or die "print(tmp): $!";
} elsif ($hdr_lines) {
push @$hdr_lines, $l;
+ if ($l =~ /\Anew file mode (100644|120000|100755)$/) {
+ $di->{mode_a} = $1;
+ }
}
}
$tmp ? $di : undef;
@@ -154,8 +175,8 @@ sub do_git_init_wt ($) {
my $wt = File::Temp->newdir('solver.wt-XXXXXXXX', TMPDIR => 1);
my $dir = $wt->dirname;
- foreach (qw(objects/info refs/heads)) {
- make_path("$dir/.git/$_") or die "make_path $_: $!";
+ foreach ('', qw(objects refs objects/info refs/heads)) {
+ mkdir("$dir/.git/$_") or die "mkdir $_: $!";
}
open my $fh, '>', "$dir/.git/config" or die "open .git/config: $!";
print $fh <<'EOF' or die "print .git/config $!";
@@ -174,9 +195,8 @@ EOF
my $f = '.git/objects/info/alternates';
open $fh, '>', "$dir/$f" or die "open: $f: $!";
- foreach my $git (@{$self->{gits}}) {
- print $fh "$git->{git_dir}/objects\n" or die "print $f: $!";
- }
+ print($fh (map { "$_->{git_dir}/objects\n" } @{$self->{gits}})) or
+ die "print $f: $!";
close $fh or die "close: $f: $!";
$wt;
}
@@ -195,8 +215,8 @@ sub reap ($$) {
$? == 0 or die "$msg failed: $?";
}
-sub prepare_wt ($$$) {
- my ($wt_dir, $existing, $di) = @_;
+sub prepare_wt ($$$$) {
+ my ($out, $wt_dir, $existing, $di) = @_;
my $oid_full = $existing->[1];
my ($r, $w);
my $path_a = $di->{path_a} or die "BUG: path_a missing for $oid_full";
@@ -208,17 +228,21 @@ sub prepare_wt ($$$) {
my $pid = spawn([@git, qw(update-index -z --index-info)], {}, $rdr);
close $r or die "close pipe(r): $!";
print $w "$mode_a $oid_full\t$path_a\0" or die "print update-index: $!";
+
close $w or die "close update-index: $!";
reap($pid, 'update-index -z --index-info');
$pid = spawn([@git, qw(checkout-index -a -f -u)]);
reap($pid, 'checkout-index -a -f -u');
+
+ print $out "Working tree prepared:\n",
+ "$mode_a $oid_full\t", git_quote($path_a), "\n";
}
sub do_apply ($$$$) {
my ($out, $wt_git, $wt_dir, $di) = @_;
- my $tmp = delete $di->{tmp} or die "BUG: no tmp ", di_info($di);
+ my $tmp = delete $di->{tmp} or die "BUG: no tmp ", di_url($di);
$tmp->flush or die "tmp->flush failed: $!";
$out->flush or die "err->flush failed: $!";
sysseek($tmp, 0, SEEK_SET) or die "sysseek(tmp) failed: $!";
@@ -257,7 +281,7 @@ sub di_url ($) {
# can have different HTTP_HOST on the same instance.
my $url = $di->{ibx}->base_url;
my $mid = $di->{smsg}->{mid};
- defined($url) ? "<$url/$mid/>" : "<$mid>";
+ defined($url) ? "<$url$mid/>" : "<$mid>";
}
sub apply_patches ($$$$$) {
@@ -275,7 +299,7 @@ sub apply_patches ($$$$$) {
my $existing = $found->{$oid_a};
my $empty_oid = $oid_a =~ /\A0+\z/;
- if ($empty_oid && $i != 0) {
+ if ($empty_oid && $i != 1) {
die "empty oid at [$i/$tot] ", di_url($di);
}
if (!$existing && !$empty_oid) {
@@ -284,13 +308,13 @@ sub apply_patches ($$$$$) {
# prepare the worktree for patch application:
if ($i == 1 && $existing) {
- prepare_wt($wt_dir, $existing, $di);
+ prepare_wt($out, $wt_dir, $existing, $di);
}
- unless (-f "$wt_dir/$di->{path_a}") {
+ if (!$empty_oid && ! -f "$wt_dir/$di->{path_a}") {
die "missing $di->{path_a} at [$i/$tot] ", di_url($di);
}
- print $out "applying [$i/$tot] ", di_url($di), "\n",
+ print $out "\napplying [$i/$tot] ", di_url($di), "\n",
join('', @{$di->{hdr_lines}}), "\n"
or die "print \$out failed: $!";
@@ -302,8 +326,8 @@ sub apply_patches ($$$$$) {
sub dump_found ($$) {
my ($out, $found) = @_;
foreach my $oid (sort keys %$found) {
- my ($git, $oid, $di) = @{$found->{$oid}};
- my $loc = $di ? di_info($di) : $git->src_blob_url($oid);
+ my ($git, $oid, undef, undef, $di) = @{$found->{$oid}};
+ my $loc = $di ? di_url($di) : $git->src_blob_url($oid);
print $out "$oid from $loc\n";
}
}
@@ -330,7 +354,7 @@ sub solve ($$$$) {
my $req = { %$hints, oid_b => $oid_b };
my @todo = ($req);
- my $found = {}; # { oid_abbrev => [ PublicInbox::Git, oid_full, $di ] }
+ my $found = {}; # { abbrev => [ ::Git, oid_full, type, size, $di ] }
my $patches = []; # [ array of $di hashes ]
my $max = $self->{max_steps} || 200;
@@ -338,9 +362,14 @@ sub solve ($$$$) {
while (defined(my $want = pop @todo)) {
# see if we can find the blob in an existing git repo:
- if (my $existing = solve_existing($self, $want)) {
+ if (my $existing = solve_existing($self, $out, $want)) {
my $want_oid = $want->{oid_b};
- return $existing if $want_oid eq $oid_b; # DONE!
+ if ($want_oid eq $oid_b) { # DONE!
+ my @pub_urls = $existing->[0]->pub_urls;
+ print $out "found $want_oid in ",
+ join("\n", @pub_urls),"\n";
+ return $existing;
+ }
$found->{$want_oid} = $existing;
next; # ok, one blob resolved, more to go?
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 09/37] view: wire up diff and vcs viewers with solver
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (7 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 08/37] solver: various bugfixes and cleanups Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 10/37] git: disable abbreviations with cat-file hints Eric Wong
` (27 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
---
MANIFEST | 2 +
lib/PublicInbox/Config.pm | 59 ++++++++++++++-
lib/PublicInbox/View.pm | 47 +++++++++---
lib/PublicInbox/ViewDiff.pm | 147 ++++++++++++++++++++++++++++++++++++
lib/PublicInbox/ViewVCS.pm | 87 +++++++++++++++++++++
lib/PublicInbox/WWW.pm | 18 ++++-
6 files changed, 345 insertions(+), 15 deletions(-)
create mode 100644 lib/PublicInbox/ViewDiff.pm
create mode 100644 lib/PublicInbox/ViewVCS.pm
diff --git a/MANIFEST b/MANIFEST
index 95ad0c6..5e980fe 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -109,6 +109,8 @@ lib/PublicInbox/SpawnPP.pm
lib/PublicInbox/Unsubscribe.pm
lib/PublicInbox/V2Writable.pm
lib/PublicInbox/View.pm
+lib/PublicInbox/ViewDiff.pm
+lib/PublicInbox/ViewVCS.pm
lib/PublicInbox/WWW.pm
lib/PublicInbox/WWW.pod
lib/PublicInbox/WatchMaildir.pm
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index bea2617..355e64b 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -2,12 +2,19 @@
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# Used throughout the project for reading configuration
+#
+# Note: I hate camelCase; but git-config(1) uses it, but it's better
+# than alllowercasewithoutunderscores, so use lc('configKey') where
+# applicable for readability
+
package PublicInbox::Config;
use strict;
use warnings;
require PublicInbox::Inbox;
use PublicInbox::Spawn qw(popen_rd);
+sub _array ($) { ref($_[0]) eq 'ARRAY' ? $_[0] : [ $_[0] ] }
+
# returns key-value pairs of config directives in a hash
# if keys may be multi-value, the value is an array ref containing all values
sub new {
@@ -22,6 +29,7 @@ sub new {
$self->{-by_newsgroup} ||= {};
$self->{-no_obfuscate} ||= {};
$self->{-limiters} ||= {};
+ $self->{-code_repos} ||= {}; # nick => PublicInbox::Git object
if (my $no = delete $self->{'publicinbox.noobfuscate'}) {
$no = [ $no ] if ref($no) ne 'ARRAY';
@@ -169,6 +177,41 @@ sub valid_inbox_name ($) {
1;
}
+# parse a code repo
+# Only git is supported at the moment, but SVN and Hg are possibilities
+sub _fill_code_repo {
+ my ($self, $nick) = @_;
+ my $pfx = "coderepo.$nick";
+
+ my $dir = $self->{"$pfx.dir"}; # aka "GIT_DIR"
+ unless (defined $dir) {
+ warn "$pfx.repodir unset";
+ return;
+ }
+
+ my $git = PublicInbox::Git->new($dir);
+ foreach my $t (qw(blob commit tree tag)) {
+ $git->{$t.'_url_format'} =
+ _array($self->{lc("$pfx.${t}UrlFormat")});
+ }
+
+ if (my $cgits = $self->{lc("$pfx.cgitUrl")}) {
+ $git->{cgit_url} = $cgits = _array($cgits);
+
+ # cgit supports "/blob/?id=%s", but it's only a plain-text
+ # display and requires an unabbreviated id=
+ foreach my $t (qw(blob commit tag)) {
+ $git->{$t.'_url_format'} ||= map {
+ "$_/$t/?id=%s"
+ } @$cgits;
+ }
+ }
+ # TODO: support gitweb and other repository viewers?
+ # TODO: parse cgitrc
+
+ $git;
+}
+
sub _fill {
my ($self, $pfx) = @_;
my $rv = {};
@@ -192,9 +235,9 @@ sub _fill {
}
# TODO: more arrays, we should support multi-value for
# more things to encourage decentralization
- foreach my $k (qw(address altid nntpmirror)) {
+ foreach my $k (qw(address altid nntpmirror coderepo)) {
if (defined(my $v = $self->{"$pfx.$k"})) {
- $rv->{$k} = ref($v) eq 'ARRAY' ? $v : [ $v ];
+ $rv->{$k} = _array($v);
}
}
@@ -224,6 +267,18 @@ sub _fill {
$rv->{-no_obfuscate_re} = $self->{-no_obfuscate_re};
each_inbox($self, sub {}); # noop to populate -no_obfuscate
}
+
+ if (my $ibx_code_repos = $rv->{coderepo}) {
+ my $code_repos = $self->{-code_repos};
+ my $repo_objs = $rv->{-repo_objs} = [];
+ foreach my $nick (@$ibx_code_repos) {
+ valid_inbox_name($nick) or next;
+ my $repo = $code_repos->{$nick} ||=
+ _fill_code_repo($self, $nick);
+ push @$repo_objs, $repo if $repo;
+ }
+ }
+
$rv
}
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 470e3ab..0187ec3 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -14,6 +14,7 @@ use PublicInbox::MsgIter;
use PublicInbox::Address;
use PublicInbox::WwwStream;
use PublicInbox::Reply;
+use PublicInbox::ViewDiff qw(flush_diff);
require POSIX;
use Time::Local qw(timegm);
@@ -28,7 +29,7 @@ sub msg_html {
my ($ctx, $mime, $more, $smsg) = @_;
my $hdr = $mime->header_obj;
my $ibx = $ctx->{-inbox};
- my $obfs_ibx = $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
+ $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
my $tip = _msg_html_prepare($hdr, $ctx, $more, 0);
my $end = 2;
PublicInbox::WwwStream->response($ctx, 200, sub {
@@ -36,7 +37,7 @@ sub msg_html {
if ($nr == 1) {
# $more cannot be true w/o $smsg being defined:
my $upfx = $more ? '../'.mid_escape($smsg->mid).'/' : '';
- $tip . multipart_text_as_html($mime, $upfx, $obfs_ibx) .
+ $tip . multipart_text_as_html($mime, $upfx, $ibx) .
'</pre><hr>'
} elsif ($more && @$more) {
++$end;
@@ -81,15 +82,15 @@ sub msg_html_more {
my $str = eval {
my ($id, $prev, $smsg) = @$more;
my $mid = $ctx->{mid};
- $smsg = $ctx->{-inbox}->smsg_mime($smsg);
+ my $ibx = $ctx->{-inbox};
+ $smsg = $ibx->smsg_mime($smsg);
my $next = $ctx->{srch}->next_by_mid($mid, \$id, \$prev);
@$more = $next ? ($id, $prev, $next) : ();
if ($smsg) {
my $mime = $smsg->{mime};
my $upfx = '../' . mid_escape($smsg->mid) . '/';
_msg_html_prepare($mime->header_obj, $ctx, $more, $nr) .
- multipart_text_as_html($mime, $upfx,
- $ctx->{-obfs_ibx}) .
+ multipart_text_as_html($mime, $upfx, $ibx) .
'</pre><hr>'
} else {
'';
@@ -260,7 +261,8 @@ sub index_entry {
$rv .= "\n";
# scan through all parts, looking for displayable text
- msg_iter($mime, sub { $rv .= add_text_body($mhref, $obfs_ibx, $_[0]) });
+ my $ibx = $ctx->{-inbox};
+ msg_iter($mime, sub { $rv .= add_text_body($mhref, $ibx, $_[0]) });
# add the footer
$rv .= "\n<a\nhref=#$id_m\nid=e$id>^</a> ".
@@ -488,11 +490,11 @@ sub thread_html {
}
sub multipart_text_as_html {
- my ($mime, $upfx, $obfs_ibx) = @_;
+ my ($mime, $upfx, $ibx) = @_;
my $rv = "";
# scan through all parts, looking for displayable text
- msg_iter($mime, sub { $rv .= add_text_body($upfx, $obfs_ibx, $_[0]) });
+ msg_iter($mime, sub { $rv .= add_text_body($upfx, $ibx, $_[0]) });
$rv;
}
@@ -545,7 +547,8 @@ sub attach_link ($$$$;$) {
}
sub add_text_body {
- my ($upfx, $obfs_ibx, $p) = @_;
+ my ($upfx, $ibx, $p) = @_;
+ my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
# $p - from msg_iter: [ Email::MIME, depth, @idx ]
my ($part, $depth) = @$p; # attachment @idx is unused
my $ct = $part->content_type || 'text/plain';
@@ -554,6 +557,19 @@ sub add_text_body {
return attach_link($upfx, $ct, $p, $fn) unless defined $s;
+ my ($diff, $spfx);
+ if ($ibx->{-repo_objs} && $s =~ /^(?:diff|---|\+{3}) /ms) {
+ $diff = [];
+ my $n_slash = $upfx =~ tr!/!/!;
+ if ($n_slash == 0) {
+ $spfx = '../';
+ } elsif ($n_slash == 1) {
+ $spfx = '';
+ } else { # nslash == 2
+ $spfx = '../../';
+ }
+ };
+
my @lines = split(/^/m, $s);
$s = '';
if (defined($fn) || $depth > 0 || $err) {
@@ -568,19 +584,26 @@ sub add_text_body {
# show the previously buffered quote inline
flush_quote(\$s, $l, \@quot) if @quot;
- # regular line, OK
- $l->linkify_1($cur);
- $s .= $l->linkify_2(ascii_html($cur));
+ if ($diff) {
+ push @$diff, $cur;
+ } else {
+ # regular line, OK
+ $l->linkify_1($cur);
+ $s .= $l->linkify_2(ascii_html($cur));
+ }
} else {
+ flush_diff(\$s, $spfx, $l, $diff) if $diff && @$diff;
push @quot, $cur;
}
}
if (@quot) { # ugh, top posted
flush_quote(\$s, $l, \@quot);
+ flush_diff(\$s, $spfx, $l, $diff) if $diff && @$diff;
obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx;
$s;
} else {
+ flush_diff(\$s, $spfx, $l, $diff) if $diff && @$diff;
obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx;
if ($s =~ /\n\z/s) { # common, last line ends with a newline
$s;
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
new file mode 100644
index 0000000..ee450fa
--- /dev/null
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -0,0 +1,147 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# used by PublicInbox::View
+package PublicInbox::ViewDiff;
+use strict;
+use warnings;
+use base qw(Exporter);
+our @EXPORT_OK = qw(flush_diff);
+
+use PublicInbox::Hval qw(ascii_html);
+use PublicInbox::Git qw(git_unquote);
+
+sub DSTATE_INIT () { 0 }
+sub DSTATE_STAT () { 1 } # TODO
+sub DSTATE_HEAD () { 2 } # /^diff --git /, /^index /, /^--- /, /^\+\+\+ /
+sub DSTATE_HUNK () { 3 } # /^@@ /
+sub DSTATE_CTX () { 4 } # /^ /
+sub DSTATE_ADD () { 5 } # /^\+/
+sub DSTATE_DEL () { 6 } # /^\-/
+
+my $OID_NULL = '0{7,40}';
+my $OID_BLOB = '[a-f0-9]{7,40}';
+my $PATH_A = '"?a/.+|/dev/null';
+my $PATH_B = '"?b/.+|/dev/null';
+
+sub to_html ($$) {
+ $_[0]->linkify_1($_[1]);
+ $_[0]->linkify_2(ascii_html($_[1]));
+}
+
+# link to line numbers in blobs
+sub diff_hunk ($$$$) {
+ my ($dctx, $spfx, $ca, $cb) = @_;
+ my $oid_a = $dctx->{oid_a};
+ my $oid_b = $dctx->{oid_b};
+
+ (defined($oid_a) && defined($oid_b)) or return "@@ $ca $cb @@";
+
+ my ($n) = ($ca =~ /^-(\d+)/);
+ $n = defined($n) ? do { ++$n; "#n$n" } : '';
+
+ my $rv = qq(@@ <a\nhref=$spfx$oid_a/s$n>$ca</a>);
+
+ ($n) = ($cb =~ /^\+(\d+)/);
+ $n = defined($n) ? do { ++$n; "#n$n" } : '';
+
+ $rv .= qq( <a\nhref=$spfx$oid_b/s$n>$cb</a> @@);
+}
+
+sub flush_diff ($$$$) {
+ my ($dst, $spfx, $linkify, $diff) = @_;
+ my $state = DSTATE_INIT;
+ my $dctx; # {}, keys: oid_a, oid_b, path_a, path_b
+
+ foreach my $s (@$diff) {
+ if ($s =~ /^ /) {
+ if ($state == DSTATE_HUNK || $state == DSTATE_ADD ||
+ $state == DSTATE_DEL || $state == DSTATE_HEAD) {
+ $$dst .= "</span><span\nclass=ctx>";
+ $state = DSTATE_CTX;
+ }
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ /^-- $/) { # email signature begins
+ if ($state != DSTATE_INIT) {
+ $state = DSTATE_INIT;
+ $$dst .= '</span>';
+ }
+ $$dst .= $s;
+ } elsif ($s =~ m!^diff --git ($PATH_A) ($PATH_B)$!x) {
+ if ($state != DSTATE_HEAD) {
+ my ($pa, $pb) = ($1, $2);
+ $$dst .= '</span>' if $state != DSTATE_INIT;
+ $$dst .= "<span\nclass=head>";
+ $state = DSTATE_HEAD;
+ $pa = (split('/', git_unquote($pa), 2))[1];
+ $pb = (split('/', git_unquote($pb), 2))[1];
+ $dctx = { path_a => $pa, path_b => $pb };
+ }
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ s/^(index $OID_NULL\.\.)($OID_BLOB)\b//o) {
+ $$dst .= qq($1<a\nhref=$spfx$2/s>$2</a>);
+ $$dst .= to_html($linkify, $s) ;
+ } elsif ($s =~ s/^index ($OID_NULL)(\.\.$OID_BLOB)\b//o) {
+ $$dst .= 'index ';
+ $$dst .= qq(<a\nhref=$spfx$1/s>$1</a>$2);
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ /^index ($OID_BLOB)\.\.($OID_BLOB)/o) {
+ $dctx->{oid_a} = $1;
+ $dctx->{oid_b} = $2;
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ s/^@@ (\S+) (\S+) @@//) {
+ my ($ca, $cb) = ($1, $2);
+ if ($state == DSTATE_HEAD || $state == DSTATE_CTX ||
+ $state == DSTATE_ADD || $state == DSTATE_DEL) {
+ $$dst .= "</span><span\nclass=hunk>";
+ $state = DSTATE_HUNK;
+ $$dst .= diff_hunk($dctx, $spfx, $ca, $cb);
+ } else {
+ $$dst .= to_html($linkify, "@@ $ca $cb @@");
+ }
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ m!^--- $PATH_A!) {
+ if ($state == DSTATE_INIT) { # color only (no oid link)
+ $state = DSTATE_HEAD;
+ $$dst .= "<span\nclass=head>";
+ }
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ m!^\+{3} $PATH_B!) {
+ if ($state == DSTATE_INIT) { # color only (no oid link)
+ $state = DSTATE_HEAD;
+ $$dst .= "<span\nclass=head>";
+ }
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ /^\+/) {
+ if ($state != DSTATE_ADD && $state != DSTATE_INIT) {
+ $$dst .= "</span><span\nclass=add>";
+ $state = DSTATE_ADD;
+ }
+ $$dst .= to_html($linkify, $s);
+ } elsif ($s =~ /^-/) {
+ if ($state != DSTATE_DEL && $state != DSTATE_INIT) {
+ $$dst .= "</span><span\nclass=del>";
+ $state = DSTATE_DEL;
+ }
+ $$dst .= to_html($linkify, $s);
+ # ignore the following lines in headers:
+ } elsif ($s =~ /^(?:dis)similarity index/ ||
+ $s =~ /^(?:old|new) mode/ ||
+ $s =~ /^(?:deleted|new) file mode/ ||
+ $s =~ /^(?:copy|rename) (?:from|to) / ||
+ $s =~ /^(?:dis)?similarity index /) {
+ $$dst .= to_html($linkify, $s);
+ } else {
+ if ($state != DSTATE_INIT) {
+ $$dst .= '</span>';
+ $state = DSTATE_INIT;
+ }
+ $$dst .= to_html($linkify, $s);
+ }
+ }
+ @$diff = ();
+ $$dst .= '</span>' if $state != DSTATE_INIT;
+ undef;
+}
+
+1;
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
new file mode 100644
index 0000000..49fb1c5
--- /dev/null
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -0,0 +1,87 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# show any VCS object, similar to "git show"
+package PublicInbox::ViewVCS;
+use strict;
+use warnings;
+use Encode qw(find_encoding);
+use PublicInbox::SolverGit;
+use PublicInbox::WwwStream;
+use PublicInbox::Linkify;
+use PublicInbox::Hval qw(ascii_html);
+my %QP_MAP = ( A => 'oid_a', B => 'oid_b', a => 'path_a', b => 'path_b' );
+my $max_size = 1024 * 1024; # TODO: configurable
+my $enc_utf8 = find_encoding('UTF-8');
+
+sub html_page ($$$) {
+ my ($ctx, $code, $strref) = @_;
+ $ctx->{-upfx} = '../'; # from "/$INBOX/$OID/s"
+ PublicInbox::WwwStream->response($ctx, $code, sub {
+ my ($nr, undef) = @_;
+ $nr == 1 ? $$strref : undef;
+ });
+}
+
+sub show ($$;$) {
+ my ($ctx, $oid_b, $fn) = @_;
+ my $ibx = $ctx->{-inbox};
+ my $inboxes = [ $ibx ];
+ my $solver = PublicInbox::SolverGit->new($ibx->{-repo_objs}, $inboxes);
+ my $qp = $ctx->{qp};
+ my $hints = {};
+ while (my ($from, $to) = each %QP_MAP) {
+ defined(my $v = $qp->{$from}) or next;
+ $hints->{$to} = $v;
+ }
+
+ open my $log, '+>', undef or die "open: $!";
+ my $res = $solver->solve($log, $oid_b, $hints);
+
+ seek($log, 0, 0) or die "seek: $!";
+ $log = do { local $/; <$log> };
+
+ my $l = PublicInbox::Linkify->new;
+ $l->linkify_1($log);
+ $log = '<pre>debug log:</pre><hr /><pre>' .
+ $l->linkify_2(ascii_html($log)) . '</pre>';
+
+ $res or return html_page($ctx, 404, \$log);
+
+ my ($git, $oid, $type, $size, $di) = @$res;
+ if ($size > $max_size) {
+ # TODO: stream the raw file if it's gigantic, at least
+ $log = '<pre><b>Too big to show</b></pre>' . $log;
+ return html_page($ctx, 500, \$log);
+ }
+
+ my $blob = $git->cat_file($oid);
+ if (!$blob) { # WTF?
+ my $e = "Failed to retrieve generated blob ($oid)";
+ $ctx->{env}->{'psgi.errors'}->print("$e ($git->{git_dir})\n");
+ $log = "<pre><b>$e</b></pre>" . $log;
+ return html_page($ctx, 500, \$log);
+ }
+
+ if (index($$blob, "\0") >= 0) {
+ $log = "<pre>$oid $type $size bytes (binary)</pre>" . $log;
+ return html_page($ctx, 200, \$log);
+ }
+
+ $$blob = $enc_utf8->decode($$blob);
+ my $nl = ($$blob =~ tr/\n/\n/);
+ my $pad = length($nl);
+
+ # using some of the same CSS class names and ids as cgit
+ $log = "<pre>$oid $type $size bytes</pre><hr /><table\nclass=blob>".
+ "<tr><td\nclass=linenumbers><pre>" . join('', map {
+ sprintf("<a id=n$_ href=#n$_>% ${pad}u</a>\n", $_)
+ } (1..$nl)) . '</pre></td>' .
+ '<td><pre> </pre></td>'. # pad for non-CSS users
+ "<td\nclass=lines><pre><code>" . ascii_html($$blob) .
+ '</pre></td></tr></table>' . $log;
+
+ html_page($ctx, 200, \$log);
+}
+
+1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 3562e46..c73370f 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -25,6 +25,7 @@ our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!;
our $MID_RE = qr!([^/]+)!;
our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
our $ATTACH_RE = qr!(\d[\.\d]*)-([[:alnum:]][\w\.-]+[[:alnum:]])!i;
+our $OID_RE = qr![a-f0-9]{7,40}!;
sub new {
my ($class, $pi_config) = @_;
@@ -117,7 +118,10 @@ sub call {
r301($ctx, $1, $2);
} elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) {
get_text($ctx, $1, $2);
-
+ } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s\z!o) {
+ get_vcs_object($ctx, $1, $2);
+ } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/_([\w\.\-]+)\z!o) {
+ get_vcs_object($ctx, $1, $2, $3);
# convenience redirects order matters
} elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) {
r301($ctx, $1, $2);
@@ -259,6 +263,18 @@ sub get_text {
PublicInbox::WwwText::get_text($ctx, $key);
}
+# show git objects (blobs and commits)
+# /$INBOX/_/$OBJECT_ID/show
+# /$INBOX/_/${OBJECT_ID}_${FILENAME}
+# KEY may contain slashes
+sub get_vcs_object ($$$;$) {
+ my ($ctx, $inbox, $oid, $filename) = @_;
+ my $r404 = invalid_inbox($ctx, $inbox);
+ return $r404 if $r404;
+ require PublicInbox::ViewVCS;
+ PublicInbox::ViewVCS::show($ctx, $oid, $filename);
+}
+
sub ctx_get {
my ($ctx, $key) = @_;
my $val = $ctx->{$key};
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 10/37] git: disable abbreviations with cat-file hints
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (8 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 09/37] view: wire up diff and vcs viewers with solver Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 11/37] solver: operate directly on git index Eric Wong
` (26 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
Ambiguity is not worth it for internal usage with the
solver.
---
lib/PublicInbox/Git.pm | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index a61cb31..b655921 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -92,7 +92,8 @@ sub _bidi_pipe {
fcntl($in_w, 1031, 4096) if $batch eq '--batch-check';
}
- my @cmd = ('git', "--git-dir=$self->{git_dir}", qw(cat-file), $batch);
+ my @cmd = (qw(git), "--git-dir=$self->{git_dir}",
+ qw(-c core.abbrev=40 cat-file), $batch);
my $redir = { 0 => fileno($out_r), 1 => fileno($in_w) };
if ($err) {
open(my $fh, '+>', undef) or fail($self, "open.err failed: $!");
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 11/37] solver: operate directly on git index
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (9 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 10/37] git: disable abbreviations with cat-file hints Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 12/37] view: enable naming hints for raw blob downloads Eric Wong
` (25 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
No need to incur extra I/O traffic with a working-tree and
uncompressed files on the filesystem. git can handle patch
application in memory and we rely on exact blob matching
anyways, so no need for 3way patch application.
---
lib/PublicInbox/SolverGit.pm | 23 +++++++++--------------
1 file changed, 9 insertions(+), 14 deletions(-)
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index d7209e6..8dfc52e 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -215,7 +215,7 @@ sub reap ($$) {
$? == 0 or die "$msg failed: $?";
}
-sub prepare_wt ($$$$) {
+sub prepare_index ($$$$) {
my ($out, $wt_dir, $existing, $di) = @_;
my $oid_full = $existing->[1];
my ($r, $w);
@@ -232,10 +232,7 @@ sub prepare_wt ($$$$) {
close $w or die "close update-index: $!";
reap($pid, 'update-index -z --index-info');
- $pid = spawn([@git, qw(checkout-index -a -f -u)]);
- reap($pid, 'checkout-index -a -f -u');
-
- print $out "Working tree prepared:\n",
+ print $out "index prepared:\n",
"$mode_a $oid_full\t", git_quote($path_a), "\n";
}
@@ -250,7 +247,7 @@ sub do_apply ($$$$) {
defined(my $err_fd = fileno($out)) or die "fileno(out): $!";
my $rdr = { 0 => fileno($tmp), 1 => $err_fd, 2 => $err_fd };
my $cmd = [ qw(git -C), $wt_dir,
- qw(apply --whitespace=warn -3 --verbose) ];
+ qw(apply --cached --whitespace=warn --verbose) ];
reap(spawn($cmd, undef, $rdr), 'apply');
local $/ = "\0";
@@ -267,11 +264,12 @@ sub do_apply ($$$$) {
$file eq $di->{path_b} or
die "index mismatch: file=$file != path_b=$di->{path_b}";
- my $abs_path = "$wt_dir/$file";
- -r $abs_path or die "WT_DIR/$file not readable";
- my $size = -s _;
- print $out "OK $mode_b $oid_b_full $stage\t$file\n";
+ my (undef, undef, $size) = $wt_git->check($oid_b_full);
+
+ defined($size) or die "failed to read_size from $oid_b_full";
+
+ print $out "$mode_b $oid_b_full\t$file\n";
[ $wt_git, $oid_b_full, 'blob', $size, $di ];
}
@@ -308,10 +306,7 @@ sub apply_patches ($$$$$) {
# prepare the worktree for patch application:
if ($i == 1 && $existing) {
- prepare_wt($out, $wt_dir, $existing, $di);
- }
- if (!$empty_oid && ! -f "$wt_dir/$di->{path_a}") {
- die "missing $di->{path_a} at [$i/$tot] ", di_url($di);
+ prepare_index($out, $wt_dir, $existing, $di);
}
print $out "\napplying [$i/$tot] ", di_url($di), "\n",
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 12/37] view: enable naming hints for raw blob downloads
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (10 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 11/37] solver: operate directly on git index Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 13/37] git: support 'ambiguous' result from --batch-check Eric Wong
` (24 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
Meaningful names in URLs are nice, and it can make
life easier for supporting syntax-highlighting
---
lib/PublicInbox/ViewDiff.pm | 27 +++++++++++++++++++--------
lib/PublicInbox/ViewVCS.pm | 32 +++++++++++++++++++++++++++-----
2 files changed, 46 insertions(+), 13 deletions(-)
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index ee450fa..94f015f 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -2,12 +2,16 @@
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
#
# used by PublicInbox::View
+# This adds CSS spans for diff highlighting.
+# It also generates links for ViewVCS + SolverGit to show
+# (or reconstruct) blobs.
+
package PublicInbox::ViewDiff;
use strict;
use warnings;
use base qw(Exporter);
our @EXPORT_OK = qw(flush_diff);
-
+use URI::Escape qw(uri_escape_utf8);
use PublicInbox::Hval qw(ascii_html);
use PublicInbox::Git qw(git_unquote);
@@ -18,6 +22,7 @@ sub DSTATE_HUNK () { 3 } # /^@@ /
sub DSTATE_CTX () { 4 } # /^ /
sub DSTATE_ADD () { 5 } # /^\+/
sub DSTATE_DEL () { 6 } # /^\-/
+sub UNSAFE () { "^A-Za-z0-9\-\._~/" }
my $OID_NULL = '0{7,40}';
my $OID_BLOB = '[a-f0-9]{7,40}';
@@ -40,18 +45,18 @@ sub diff_hunk ($$$$) {
my ($n) = ($ca =~ /^-(\d+)/);
$n = defined($n) ? do { ++$n; "#n$n" } : '';
- my $rv = qq(@@ <a\nhref=$spfx$oid_a/s$n>$ca</a>);
+ my $rv = qq(@@ <a\nhref=$spfx$oid_a/s$dctx->{Q}$n>$ca</a>);
($n) = ($cb =~ /^\+(\d+)/);
$n = defined($n) ? do { ++$n; "#n$n" } : '';
- $rv .= qq( <a\nhref=$spfx$oid_b/s$n>$cb</a> @@);
+ $rv .= qq( <a\nhref=$spfx$oid_b/s$dctx->{Q}$n>$cb</a> @@);
}
sub flush_diff ($$$$) {
my ($dst, $spfx, $linkify, $diff) = @_;
my $state = DSTATE_INIT;
- my $dctx; # {}, keys: oid_a, oid_b, path_a, path_b
+ my $dctx = { Q => '' }; # {}, keys: oid_a, oid_b, path_a, path_b
foreach my $s (@$diff) {
if ($s =~ /^ /) {
@@ -67,7 +72,7 @@ sub flush_diff ($$$$) {
$$dst .= '</span>';
}
$$dst .= $s;
- } elsif ($s =~ m!^diff --git ($PATH_A) ($PATH_B)$!x) {
+ } elsif ($s =~ m!^diff --git ($PATH_A) ($PATH_B)$!) {
if ($state != DSTATE_HEAD) {
my ($pa, $pb) = ($1, $2);
$$dst .= '</span>' if $state != DSTATE_INIT;
@@ -75,15 +80,21 @@ sub flush_diff ($$$$) {
$state = DSTATE_HEAD;
$pa = (split('/', git_unquote($pa), 2))[1];
$pb = (split('/', git_unquote($pb), 2))[1];
- $dctx = { path_a => $pa, path_b => $pb };
+ $dctx = {
+ Q => "?b=".uri_escape_utf8($pb, UNSAFE),
+ };
+ if ($pa ne $pb) {
+ $dctx->{Q} .=
+ "&a=".uri_escape_utf8($pa, UNSAFE);
+ }
}
$$dst .= to_html($linkify, $s);
} elsif ($s =~ s/^(index $OID_NULL\.\.)($OID_BLOB)\b//o) {
- $$dst .= qq($1<a\nhref=$spfx$2/s>$2</a>);
+ $$dst .= qq($1<a\nhref=$spfx$2/s$dctx->{Q}>$2</a>);
$$dst .= to_html($linkify, $s) ;
} elsif ($s =~ s/^index ($OID_NULL)(\.\.$OID_BLOB)\b//o) {
$$dst .= 'index ';
- $$dst .= qq(<a\nhref=$spfx$1/s>$1</a>$2);
+ $$dst .= qq(<a\nhref=$spfx$1/s$dctx->{Q}>$1</a>$2);
$$dst .= to_html($linkify, $s);
} elsif ($s =~ /^index ($OID_BLOB)\.\.($OID_BLOB)/o) {
$dctx->{oid_a} = $1;
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 49fb1c5..90c0907 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -2,6 +2,17 @@
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# show any VCS object, similar to "git show"
+# FIXME: we only show blobs for now
+#
+# This can use a "solver" to reconstruct blobs based on git
+# patches (with abbreviated OIDs in the header). However, the
+# abbreviated OIDs must match exactly what's in the original
+# email (unless a normal code repo already has the blob).
+#
+# In other words, we can only reliably reconstruct blobs based
+# on links generated by ViewDiff (and only if the emailed
+# patches apply 100% cleanly to published blobs).
+
package PublicInbox::ViewVCS;
use strict;
use warnings;
@@ -9,7 +20,7 @@ use Encode qw(find_encoding);
use PublicInbox::SolverGit;
use PublicInbox::WwwStream;
use PublicInbox::Linkify;
-use PublicInbox::Hval qw(ascii_html);
+use PublicInbox::Hval qw(ascii_html to_filename);
my %QP_MAP = ( A => 'oid_a', B => 'oid_b', a => 'path_a', b => 'path_b' );
my $max_size = 1024 * 1024; # TODO: configurable
my $enc_utf8 = find_encoding('UTF-8');
@@ -63,8 +74,18 @@ sub show ($$;$) {
return html_page($ctx, 500, \$log);
}
- if (index($$blob, "\0") >= 0) {
- $log = "<pre>$oid $type $size bytes (binary)</pre>" . $log;
+ my $binary = index($$blob, "\0") >= 0;
+ if ($fn) {
+ my $h = [ 'Content-Length', $size, 'Content-Type' ];
+ push(@$h, ($binary ? 'application/octet-stream' : 'text/plain'));
+ return [ 200, $h, [ $$blob ]];
+ }
+
+ my $path = to_filename($di->{path_b} || $hints->{path_b} || 'blob');
+ my $raw_link = "(<a\nhref=_$path>raw</a>)";
+ if ($binary) {
+ $log = "<pre>$oid $type $size bytes (binary)" .
+ " $raw_link</pre>" . $log;
return html_page($ctx, 200, \$log);
}
@@ -73,13 +94,14 @@ sub show ($$;$) {
my $pad = length($nl);
# using some of the same CSS class names and ids as cgit
- $log = "<pre>$oid $type $size bytes</pre><hr /><table\nclass=blob>".
+ $log = "<pre>$oid $type $size bytes $raw_link</pre>" .
+ "<hr /><table\nclass=blob>".
"<tr><td\nclass=linenumbers><pre>" . join('', map {
sprintf("<a id=n$_ href=#n$_>% ${pad}u</a>\n", $_)
} (1..$nl)) . '</pre></td>' .
'<td><pre> </pre></td>'. # pad for non-CSS users
"<td\nclass=lines><pre><code>" . ascii_html($$blob) .
- '</pre></td></tr></table>' . $log;
+ '</code></pre></td></tr></table>' . $log;
html_page($ctx, 200, \$log);
}
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 13/37] git: support 'ambiguous' result from --batch-check
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (11 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 12/37] view: enable naming hints for raw blob downloads Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 14/37] solver: more verbose blob resolution Eric Wong
` (23 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
David Turner's patch to return "ambiguous" seems like a reasonable
patch for future versions of git:
https://public-inbox.org/git/672a6fb9e480becbfcb5df23ae37193784811b6b.camel@novalis.org/
---
lib/PublicInbox/Git.pm | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index b655921..a0b934a 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -179,11 +179,13 @@ sub check {
local $/ = "\n";
chomp(my $line = $self->{in_c}->getline);
my ($hex, $type, $size) = split(' ', $line);
- return if $type eq 'missing';
- # "dead" in git.git shows "dangling 4\ndead\n", not sure why
- # https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/
- # so handle the oddball stuff just in case
+ # Future versions of git.git may show 'ambiguous', but for now,
+ # we must handle 'dangling' below (and maybe some other oddball
+ # stuff):
+ # https://public-inbox.org/git/20190118033845.s2vlrb3wd3m2jfzu@dcvr/T/
+ return if $type eq 'missing' || $type eq 'ambiguous';
+
if ($hex eq 'dangling' || $hex eq 'notdir' || $hex eq 'loop') {
$size = $type + length("\n");
my $r = read($self->{in_c}, my $buf, $size);
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 14/37] solver: more verbose blob resolution
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (12 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 13/37] git: support 'ambiguous' result from --batch-check Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 15/37] solver: break up patch application steps Eric Wong
` (22 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
Help users find out where each step of the resolution came from.
Also, we must clean abort the process if we have missing blobs.
And refine the output to avoid unnecessary braces, too.
---
lib/PublicInbox/SolverGit.pm | 26 +++++++++++++++-----------
1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 8dfc52e..29cfd21 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -57,7 +57,7 @@ sub solve_existing ($$$) {
scalar(@ambiguous) ? \@ambiguous : undef;
}
-# returns a hashref with information about a diff:
+# returns a hashref with information about a diff ($di):
# {
# oid_a => abbreviated pre-image oid,
# oid_b => abbreviated post-image oid,
@@ -279,7 +279,7 @@ sub di_url ($) {
# can have different HTTP_HOST on the same instance.
my $url = $di->{ibx}->base_url;
my $mid = $di->{smsg}->{mid};
- defined($url) ? "<$url$mid/>" : "<$mid>";
+ defined($url) ? "$url$mid/" : "<$mid>";
}
sub apply_patches ($$$$$) {
@@ -338,7 +338,7 @@ sub dump_patches ($$) {
}
# recreate $oid_b
-# Returns a 2-element array ref: [ PublicInbox::Git object, oid_full ]
+# Returns an array ref: [ ::Git object, oid_full, type, size, di ]
# or undef if nothing was found.
sub solve ($$$$) {
my ($self, $out, $oid_b, $hints) = @_;
@@ -357,14 +357,12 @@ sub solve ($$$$) {
while (defined(my $want = pop @todo)) {
# see if we can find the blob in an existing git repo:
+ my $want_oid = $want->{oid_b};
if (my $existing = solve_existing($self, $out, $want)) {
- my $want_oid = $want->{oid_b};
- if ($want_oid eq $oid_b) { # DONE!
- my @pub_urls = $existing->[0]->pub_urls;
- print $out "found $want_oid in ",
- join("\n", @pub_urls),"\n";
- return $existing;
- }
+ print $out "found $want_oid in ",
+ join("\n", $existing->[0]->pub_urls), "\n";
+
+ return $existing if $want_oid eq $oid_b; # DONE!
$found->{$want_oid} = $existing;
next; # ok, one blob resolved, more to go?
@@ -372,10 +370,12 @@ sub solve ($$$$) {
# scan through inboxes to look for emails which results in
# the oid we want:
+ my $di;
foreach my $ibx (@{$self->{inboxes}}) {
- my $di = find_extract_diff($self, $ibx, $want) or next;
+ $di = find_extract_diff($self, $ibx, $want) or next;
unshift @$patches, $di;
+ print $out "found $want_oid in ",di_url($di),"\n";
# good, we can find a path to the oid we $want, now
# lets see if we need to apply more patches:
@@ -397,6 +397,10 @@ sub solve ($$$$) {
}
last; # onto the next @todo item
}
+ unless ($di) {
+ print $out "$want_oid could not be found\n";
+ return;
+ }
}
unless (scalar(@$patches)) {
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 15/37] solver: break up patch application steps
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (13 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 14/37] solver: more verbose blob resolution Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 16/37] solver: switch patch application to use a callback Eric Wong
` (21 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
We want more fine-grained scheduling for PSGI use, as
the patch application step can take hundreds of milliseconds
on my modest hardware
---
lib/PublicInbox/SolverGit.pm | 22 ++++++++++++++++------
1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 29cfd21..71494e0 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -236,8 +236,8 @@ sub prepare_index ($$$$) {
"$mode_a $oid_full\t", git_quote($path_a), "\n";
}
-sub do_apply ($$$$) {
- my ($out, $wt_git, $wt_dir, $di) = @_;
+sub do_apply_begin ($$$) {
+ my ($out, $wt_dir, $di) = @_;
my $tmp = delete $di->{tmp} or die "BUG: no tmp ", di_url($di);
$tmp->flush or die "tmp->flush failed: $!";
@@ -248,11 +248,19 @@ sub do_apply ($$$$) {
my $rdr = { 0 => fileno($tmp), 1 => $err_fd, 2 => $err_fd };
my $cmd = [ qw(git -C), $wt_dir,
qw(apply --cached --whitespace=warn --verbose) ];
- reap(spawn($cmd, undef, $rdr), 'apply');
+ spawn($cmd, undef, $rdr);
+}
- local $/ = "\0";
- my $rd = popen_rd([qw(git -C), $wt_dir, qw(ls-files -s -z)]);
+sub do_apply_continue ($$) {
+ my ($wt_dir, $apply_pid) = @_;
+ reap($apply_pid, 'apply');
+ popen_rd([qw(git -C), $wt_dir, qw(ls-files -s -z)]);
+}
+sub do_apply_end ($$$$) {
+ my ($out, $wt_git, $rd, $di) = @_;
+
+ local $/ = "\0";
defined(my $line = <$rd>) or die "failed to read ls-files: $!";
chomp $line or die "no trailing \\0 in [$line] from ls-files";
@@ -314,7 +322,9 @@ sub apply_patches ($$$$$) {
or die "print \$out failed: $!";
# apply the patch!
- $found->{$di->{oid_b}} = do_apply($out, $wt_git, $wt_dir, $di);
+ my $apply_pid = do_apply_begin($out, $wt_dir, $di);
+ my $rd = do_apply_continue($wt_dir, $apply_pid);
+ $found->{$di->{oid_b}} = do_apply_end($out, $wt_git, $rd, $di);
}
}
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 16/37] solver: switch patch application to use a callback
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (14 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 15/37] solver: break up patch application steps Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 17/37] solver: simplify control flow for initial loop Eric Wong
` (20 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
A bit messy at the moment, but we need to break this up
into smaller steps for fairness with other clients, as
applying dozens of patches can take several hundred
milliseconds.
---
lib/PublicInbox/SolverGit.pm | 70 ++++++++++++++++--------------------
1 file changed, 31 insertions(+), 39 deletions(-)
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 71494e0..70d8a93 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -290,16 +290,35 @@ sub di_url ($) {
defined($url) ? "$url$mid/" : "<$mid>";
}
-sub apply_patches ($$$$$) {
- my ($self, $out, $wt, $found, $patches) = @_;
+sub apply_patches_cb ($$$$$) {
+ my ($self, $out, $found, $patches, $oid_b) = @_;
+ my $wt = do_git_init_wt($self);
my $wt_dir = $wt->dirname;
my $wt_git = PublicInbox::Git->new("$wt_dir/.git");
$wt_git->{-wt} = $wt;
my $cur = 0;
my $tot = scalar @$patches;
+ my ($apply_pid, $rd, $di);
+
+ # returns an empty string if in progress, undef if not found,
+ # or the final [ ::Git, oid_full, type, size, $di ] arrayref
+ # if found
+ sub {
+ if ($rd) {
+ $found->{$di->{oid_b}} =
+ do_apply_end($out, $wt_git, $rd, $di);
+ $rd = undef;
+ # continue to shift @$patches
+ } elsif ($apply_pid) {
+ $rd = do_apply_continue($wt_dir, $apply_pid);
+ $apply_pid = undef;
+ return ''; # $rd => do_apply_ned
+ }
+
+ # may return undef here
+ $di = shift @$patches or return $found->{$oid_b};
- foreach my $di (@$patches) {
my $i = ++$cur;
my $oid_a = $di->{oid_a};
my $existing = $found->{$oid_a};
@@ -321,29 +340,10 @@ sub apply_patches ($$$$$) {
join('', @{$di->{hdr_lines}}), "\n"
or die "print \$out failed: $!";
- # apply the patch!
- my $apply_pid = do_apply_begin($out, $wt_dir, $di);
- my $rd = do_apply_continue($wt_dir, $apply_pid);
- $found->{$di->{oid_b}} = do_apply_end($out, $wt_git, $rd, $di);
- }
-}
-
-sub dump_found ($$) {
- my ($out, $found) = @_;
- foreach my $oid (sort keys %$found) {
- my ($git, $oid, undef, undef, $di) = @{$found->{$oid}};
- my $loc = $di ? di_url($di) : $git->src_blob_url($oid);
- print $out "$oid from $loc\n";
- }
-}
-
-sub dump_patches ($$) {
- my ($out, $patches) = @_;
- my $tot = scalar(@$patches);
- my $i = 0;
- foreach my $di (@$patches) {
- ++$i;
- print $out "[$i/$tot] ", di_url($di), "\n";
+ # begin the patch application patch!
+ $apply_pid = do_apply_begin($out, $wt_dir, $di);
+ # next call to this callback will call do_apply_continue
+ '';
}
}
@@ -415,24 +415,16 @@ sub solve ($$$$) {
unless (scalar(@$patches)) {
print $out "no patch(es) for $oid_b\n";
- dump_found($out, $found);
return;
}
# reconstruct the oid_b blob using patches we found:
- eval {
- my $wt = do_git_init_wt($self);
- apply_patches($self, $out, $wt, $found, $patches);
- };
- if ($@) {
- print $out "E: $@\nfound: ";
- dump_found($out, $found);
- print $out "patches: ";
- dump_patches($out, $patches);
- return;
+ my $cb = apply_patches_cb($self, $out, $found, $patches, $oid_b);
+ my $ret;
+ while (1) {
+ $ret = $cb->();
+ return $ret if (ref($ret) || !defined($ret));
}
-
- $found->{$oid_b};
}
1;
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 17/37] solver: simplify control flow for initial loop
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (15 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 16/37] solver: switch patch application to use a callback Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 18/37] solver: break @todo loop into a callback Eric Wong
` (19 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
We'll be breaking this up into several steps, too; since
searching inboxes for patch blobs can take 10s of milliseconds
for me.
---
lib/PublicInbox/SolverGit.pm | 28 ++++++++++------------------
1 file changed, 10 insertions(+), 18 deletions(-)
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 70d8a93..beafa42 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -361,11 +361,13 @@ sub solve ($$$$) {
my @todo = ($req);
my $found = {}; # { abbrev => [ ::Git, oid_full, type, size, $di ] }
my $patches = []; # [ array of $di hashes ]
-
- my $max = $self->{max_steps} || 200;
- my $steps = 0;
+ my $max = $self->{max_patches} || 200;
while (defined(my $want = pop @todo)) {
+ if (scalar(@$patches) > $max) {
+ print $out "Aborting, too many steps to $oid_b\n";
+ return;
+ }
# see if we can find the blob in an existing git repo:
my $want_oid = $want->{oid_b};
if (my $existing = solve_existing($self, $out, $want)) {
@@ -373,9 +375,8 @@ sub solve ($$$$) {
join("\n", $existing->[0]->pub_urls), "\n";
return $existing if $want_oid eq $oid_b; # DONE!
-
$found->{$want_oid} = $existing;
- next; # ok, one blob resolved, more to go?
+ last; # ok, one blob resolved, more to go?
}
# scan through inboxes to look for emails which results in
@@ -390,21 +391,12 @@ sub solve ($$$$) {
# good, we can find a path to the oid we $want, now
# lets see if we need to apply more patches:
my $src = $di->{oid_a};
- if ($src !~ /\A0+\z/) {
- if (++$steps > $max) {
- print $out
-"Aborting, too many steps to $oid_b\n";
- return;
- }
+ last if $src =~ /\A0+\z/;
- # we have to solve it using another oid, fine:
- my $job = {
- oid_b => $src,
- path_b => $di->{path_a},
- };
- push @todo, $job;
- }
+ # we have to solve it using another oid, fine:
+ my $job = { oid_b => $src, path_b => $di->{path_a} };
+ push @todo, $job;
last; # onto the next @todo item
}
unless ($di) {
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 18/37] solver: break @todo loop into a callback
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (16 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 17/37] solver: simplify control flow for initial loop Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 19/37] solver: note the synchronous nature of index preparation Eric Wong
` (18 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
This will allow each patch search via Xapian to "yield" the
current client in favor of another client in the PSGI web
interface for fairness.
---
lib/PublicInbox/SolverGit.pm | 33 ++++++++++++++++++++-------------
1 file changed, 20 insertions(+), 13 deletions(-)
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index beafa42..51be2cd 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -290,15 +290,21 @@ sub di_url ($) {
defined($url) ? "$url$mid/" : "<$mid>";
}
+# reconstruct the oid_b blob using patches we found:
sub apply_patches_cb ($$$$$) {
my ($self, $out, $found, $patches, $oid_b) = @_;
+
+ my $tot = scalar(@$patches) or return sub {
+ print $out "no patch(es) for $oid_b\n";
+ undef;
+ };
+
my $wt = do_git_init_wt($self);
my $wt_dir = $wt->dirname;
my $wt_git = PublicInbox::Git->new("$wt_dir/.git");
$wt_git->{-wt} = $wt;
my $cur = 0;
- my $tot = scalar @$patches;
my ($apply_pid, $rd, $di);
# returns an empty string if in progress, undef if not found,
@@ -362,8 +368,15 @@ sub solve ($$$$) {
my $found = {}; # { abbrev => [ ::Git, oid_full, type, size, $di ] }
my $patches = []; # [ array of $di hashes ]
my $max = $self->{max_patches} || 200;
+ my $apply_cb;
+ my $cb = sub {
+ my $want = pop @todo;
+ unless ($want) {
+ $apply_cb ||= apply_patches_cb($self, $out, $found,
+ $patches, $oid_b);
+ return $apply_cb->();
+ }
- while (defined(my $want = pop @todo)) {
if (scalar(@$patches) > $max) {
print $out "Aborting, too many steps to $oid_b\n";
return;
@@ -376,7 +389,7 @@ sub solve ($$$$) {
return $existing if $want_oid eq $oid_b; # DONE!
$found->{$want_oid} = $existing;
- last; # ok, one blob resolved, more to go?
+ return ''; # ok, one blob resolved, more to go?
}
# scan through inboxes to look for emails which results in
@@ -403,19 +416,13 @@ sub solve ($$$$) {
print $out "$want_oid could not be found\n";
return;
}
- }
-
- unless (scalar(@$patches)) {
- print $out "no patch(es) for $oid_b\n";
- return;
- }
+ ''; # continue onto next @todo item;
+ };
- # reconstruct the oid_b blob using patches we found:
- my $cb = apply_patches_cb($self, $out, $found, $patches, $oid_b);
- my $ret;
while (1) {
- $ret = $cb->();
+ my $ret = $cb->();
return $ret if (ref($ret) || !defined($ret));
+ # $ret == ''; so continue looping here
}
}
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 19/37] solver: note the synchronous nature of index preparation
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (17 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 18/37] solver: break @todo loop into a callback Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 20/37] solver: add a TODO note about making this fully evented Eric Wong
` (17 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
It's not likely to be worth our time to support
a callback-driven model for something which happens
once per patch series.
---
lib/PublicInbox/SolverGit.pm | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 51be2cd..42bb603 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -221,11 +221,13 @@ sub prepare_index ($$$$) {
my ($r, $w);
my $path_a = $di->{path_a} or die "BUG: path_a missing for $oid_full";
my $mode_a = $di->{mode_a} || extract_old_mode($di);
- my @git = (qw(git -C), $wt_dir);
+ # unlike git-apply(1), this only gets called once in a patch
+ # series and happens too quickly to be worth making async:
pipe($r, $w) or die "pipe: $!";
my $rdr = { 0 => fileno($r) };
- my $pid = spawn([@git, qw(update-index -z --index-info)], {}, $rdr);
+ my $pid = spawn([qw(git -C), $wt_dir,
+ qw(update-index -z --index-info)], undef, $rdr);
close $r or die "close pipe(r): $!";
print $w "$mode_a $oid_full\t$path_a\0" or die "print update-index: $!";
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 20/37] solver: add a TODO note about making this fully evented
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (18 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 19/37] solver: note the synchronous nature of index preparation Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 21/37] view: enforce trailing slash for /$INBOX/$OID/s/ endpoints Eric Wong
` (16 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
Applying a 100+ patch series can be a pain and lead to a wayward
client monopolizing the connection. On the other hand, we'll
also need to be careful and limit the number of in-flight file
descriptors and parallel git-apply processes when we move to an
evented model, here.
---
lib/PublicInbox/SolverGit.pm | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 42bb603..1306534 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -358,6 +358,9 @@ sub apply_patches_cb ($$$$$) {
# recreate $oid_b
# Returns an array ref: [ ::Git object, oid_full, type, size, di ]
# or undef if nothing was found.
+#
+# TODO: complete the migration of this and ViewVCS into an evented
+# model for fairness
sub solve ($$$$) {
my ($self, $out, $oid_b, $hints) = @_;
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 21/37] view: enforce trailing slash for /$INBOX/$OID/s/ endpoints
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (19 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 20/37] solver: add a TODO note about making this fully evented Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 22/37] solver: restore diagnostics and deal with CRLF Eric Wong
` (15 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
As with our use of the trailing slash in $MESSAGE_ID/T/ and
'$MESSAGE_ID/t/' endpoints, this for 'wget -r --mirror'
compatibility as well as allowing sysadmins to quickly stand up
a static directory with "index.html" in it to reduce load.
---
lib/PublicInbox/ViewDiff.pm | 8 ++++----
lib/PublicInbox/ViewVCS.pm | 4 ++--
lib/PublicInbox/WWW.pm | 6 ++++--
3 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 94f015f..1aad283 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -45,12 +45,12 @@ sub diff_hunk ($$$$) {
my ($n) = ($ca =~ /^-(\d+)/);
$n = defined($n) ? do { ++$n; "#n$n" } : '';
- my $rv = qq(@@ <a\nhref=$spfx$oid_a/s$dctx->{Q}$n>$ca</a>);
+ my $rv = qq(@@ <a\nhref=$spfx$oid_a/s/$dctx->{Q}$n>$ca</a>);
($n) = ($cb =~ /^\+(\d+)/);
$n = defined($n) ? do { ++$n; "#n$n" } : '';
- $rv .= qq( <a\nhref=$spfx$oid_b/s$dctx->{Q}$n>$cb</a> @@);
+ $rv .= qq( <a\nhref=$spfx$oid_b/s/$dctx->{Q}$n>$cb</a> @@);
}
sub flush_diff ($$$$) {
@@ -90,11 +90,11 @@ sub flush_diff ($$$$) {
}
$$dst .= to_html($linkify, $s);
} elsif ($s =~ s/^(index $OID_NULL\.\.)($OID_BLOB)\b//o) {
- $$dst .= qq($1<a\nhref=$spfx$2/s$dctx->{Q}>$2</a>);
+ $$dst .= qq($1<a\nhref=$spfx$2/s/$dctx->{Q}>$2</a>);
$$dst .= to_html($linkify, $s) ;
} elsif ($s =~ s/^index ($OID_NULL)(\.\.$OID_BLOB)\b//o) {
$$dst .= 'index ';
- $$dst .= qq(<a\nhref=$spfx$1/s$dctx->{Q}>$1</a>$2);
+ $$dst .= qq(<a\nhref=$spfx$1/s/$dctx->{Q}>$1</a>$2);
$$dst .= to_html($linkify, $s);
} elsif ($s =~ /^index ($OID_BLOB)\.\.($OID_BLOB)/o) {
$dctx->{oid_a} = $1;
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 90c0907..61f4deb 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -27,7 +27,7 @@ my $enc_utf8 = find_encoding('UTF-8');
sub html_page ($$$) {
my ($ctx, $code, $strref) = @_;
- $ctx->{-upfx} = '../'; # from "/$INBOX/$OID/s"
+ $ctx->{-upfx} = '../../'; # from "/$INBOX/$OID/s/"
PublicInbox::WwwStream->response($ctx, $code, sub {
my ($nr, undef) = @_;
$nr == 1 ? $$strref : undef;
@@ -82,7 +82,7 @@ sub show ($$;$) {
}
my $path = to_filename($di->{path_b} || $hints->{path_b} || 'blob');
- my $raw_link = "(<a\nhref=_$path>raw</a>)";
+ my $raw_link = "(<a\nhref=$path>raw</a>)";
if ($binary) {
$log = "<pre>$oid $type $size bytes (binary)" .
" $raw_link</pre>" . $log;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index c73370f..a0fd7fa 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -118,10 +118,12 @@ sub call {
r301($ctx, $1, $2);
} elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) {
get_text($ctx, $1, $2);
- } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s\z!o) {
+ } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/\z!o) {
get_vcs_object($ctx, $1, $2);
- } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/_([\w\.\-]+)\z!o) {
+ } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/([\w\.\-]+)\z!o) {
get_vcs_object($ctx, $1, $2, $3);
+ } elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s\z!o) {
+ r301($ctx, $1, $2, 's/');
# convenience redirects order matters
} elsif ($path_info =~ m!$INBOX_RE/([^/]{2,})\z!o) {
r301($ctx, $1, $2);
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 22/37] solver: restore diagnostics and deal with CRLF
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (20 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 21/37] view: enforce trailing slash for /$INBOX/$OID/s/ endpoints Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 23/37] www: admin-configurable CSS via "publicinbox.css" Eric Wong
` (14 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
Apparently Email::MIME returns quoted-printable text
with CRLF. So use --ignore-whitespace with git-apply(1)
and ensure we don't capture '\r' in pathnames from
those emails.
And restore "$@" dumping when we die while solving.
---
lib/PublicInbox/SolverGit.pm | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 1306534..8fde232 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -105,6 +105,11 @@ sub extract_diff ($$$$) {
my ($path_a, $path_b) = ($1, $2);
+ # diff header lines won't have \r because git
+ # will quote them, but Email::MIME gives CRLF
+ # for quoted-printable:
+ $path_b =~ tr/\r//d;
+
# don't care for leading 'a/' and 'b/'
my (undef, @a) = split(m{/}, git_unquote($path_a));
my (undef, @b) = split(m{/}, git_unquote($path_b));
@@ -248,8 +253,11 @@ sub do_apply_begin ($$$) {
defined(my $err_fd = fileno($out)) or die "fileno(out): $!";
my $rdr = { 0 => fileno($tmp), 1 => $err_fd, 2 => $err_fd };
+
+ # we need --ignore-whitespace because some patches are CRLF
my $cmd = [ qw(git -C), $wt_dir,
- qw(apply --cached --whitespace=warn --verbose) ];
+ qw(apply --cached --ignore-whitespace
+ --whitespace=warn --verbose) ];
spawn($cmd, undef, $rdr);
}
@@ -425,8 +433,12 @@ sub solve ($$$$) {
};
while (1) {
- my $ret = $cb->();
- return $ret if (ref($ret) || !defined($ret));
+ my $ret = eval { $cb->() };
+ unless (defined($ret)) {
+ print $out "E: $@\n" if $@;
+ return;
+ }
+ return $ret if ref($ret);
# $ret == ''; so continue looping here
}
}
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 23/37] www: admin-configurable CSS via "publicinbox.css"
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (21 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 22/37] solver: restore diagnostics and deal with CRLF Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 24/37] $INBOX/_/text/color/ and sample user-side CSS Eric Wong
` (13 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
Maybe we'll default to a dark theme to promote energy savings...
See contrib/css/README for details
---
MANIFEST | 3 +
contrib/css/216dark.css | 26 ++++++++
contrib/css/216light.css | 25 +++++++
contrib/css/README | 41 ++++++++++++
examples/public-inbox.psgi | 2 +-
lib/PublicInbox/Config.pm | 3 +
lib/PublicInbox/Hval.pm | 14 ----
lib/PublicInbox/WWW.pm | 123 +++++++++++++++++++++++++++++++++++
lib/PublicInbox/WwwStream.pm | 2 +-
script/public-inbox-httpd | 2 +-
t/view.t | 2 +
11 files changed, 226 insertions(+), 17 deletions(-)
create mode 100644 contrib/css/216dark.css
create mode 100644 contrib/css/216light.css
create mode 100644 contrib/css/README
diff --git a/MANIFEST b/MANIFEST
index 5e980fe..1db7bd1 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -26,6 +26,9 @@ MANIFEST
Makefile.PL
README
TODO
+contrib/css/216dark.css
+contrib/css/216light.css
+contrib/css/README
contrib/selinux/el7/publicinbox.fc
contrib/selinux/el7/publicinbox.te
examples/README
diff --git a/contrib/css/216dark.css b/contrib/css/216dark.css
new file mode 100644
index 0000000..2fd85d0
--- /dev/null
+++ b/contrib/css/216dark.css
@@ -0,0 +1,26 @@
+/*
+ * Dark color scheme using 216 web-safe colors, inspired
+ * somewhat by the default color scheme in mutt.
+ * It reduces eyestrain for me, and energy usage for all:
+ * https://en.wikipedia.org/wiki/Light-on-dark_color_scheme
+ */
+* { background:#000; color:#ccc }
+
+/*
+ * Underlined links add visual noise which make them hard-to-read.
+ * Use colors to make them stand out, instead.
+ */
+a { color:#69f; text-decoration:none }
+a:visited { color:#96f }
+
+/* quoted text gets a different color */
+*.q { color:#09f }
+
+/*
+ * these may be used with cgit, too
+ * (cgit uses <div>, public-inbox uses <span>)
+ */
+*.add { color:#0ff }
+*.del { color:#f0f }
+*.head { color:#fff }
+*.hunk { color:#c93 }
diff --git a/contrib/css/216light.css b/contrib/css/216light.css
new file mode 100644
index 0000000..bf81bc5
--- /dev/null
+++ b/contrib/css/216light.css
@@ -0,0 +1,25 @@
+/*
+ * Light color scheme using 216 web-safe colors.
+ * Suitable for print, and blinding people with brightness.
+ * Haphazardly thrown together because bright colors hurt my eyes
+ */
+* { background:#fff; color:#333 }
+
+/*
+ * Underlined links add visual noise which make them hard-to-read.
+ * Use colors to make them stand out, instead.
+ */
+a { color:#00f; text-decoration:none }
+a:visited { color:#808 }
+
+/* quoted text gets a different color */
+*.q { color:#006 }
+
+/*
+ * these may be used with cgit, too
+ * (cgit uses <div>, public-inbox uses <span>)
+ */
+*.add { color:#060 }
+*.del {color:#900 }
+*.head { color:#000 }
+*.hunk { color:#960 }
diff --git a/contrib/css/README b/contrib/css/README
new file mode 100644
index 0000000..2473c2b
--- /dev/null
+++ b/contrib/css/README
@@ -0,0 +1,41 @@
+Example CSS for use with public-inbox.
+
+CSS::Minifier or CSS::Minifier::XS will be tried for minimizing
+CSS at startup if available(*).
+
+Multiple CSS files may be configured for user-selectability via
+the "title" attribute or for different media. Local CSS files
+are read into memory once at startup.
+
+If only one CSS file is given without "title", it will be inlined.
+
+Snippet from ~/.public-inbox/config, order matters to browsers.
+-----8<-----
+[publicinbox]
+ ; Depending on the browser, the first entry is the default.
+ ; So having "/dev/null" at the top means no colors by default.
+ ; Using the "title" attribute enables `View -> "Page Style"'
+ ; choices in Firefox.
+ css = /dev/null title=default
+
+ ; git-config supports backslash to continue long lines
+ ; Attributes ('media', 'title') must use single quotes(')
+ ; or no quotes at all, but not double-quotes, as git-config(1)
+ ; won't preserve them:
+ css = /path/to/public-inbox/contrib/css/216dark.css \
+ title=216dark \
+ media='screen,(prefers-color-scheme:dark)'
+
+ ; for tree haters who print web pages :P
+ css = /path/to/public-inbox/contrib/css/216light.css \
+ title=216light \
+ media='screen,print,(prefers-color-scheme:light)'
+
+ ; external CSS may be specified with href.
+ ; Using "//" (protocol-relative) URLs is allowed, as is
+ ; "https://" or "http://" for hosts which only support one protocol.
+ css = href=//example.com/fugly.css title=external
+
+
+(*) "libcss-minifier-perl" or "libcss-minifier-xs-perl"
+ on Debian-based systems
diff --git a/examples/public-inbox.psgi b/examples/public-inbox.psgi
index 4dd3306..8886d7f 100644
--- a/examples/public-inbox.psgi
+++ b/examples/public-inbox.psgi
@@ -8,9 +8,9 @@
use strict;
use warnings;
use PublicInbox::WWW;
-PublicInbox::WWW->preload;
use Plack::Builder;
my $www = PublicInbox::WWW->new;
+$www->preload;
# share the public-inbox code itself:
my $src = $ENV{SRC_GIT_DIR}; # '/path/to/public-inbox.git'
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 355e64b..cead7fc 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -49,6 +49,9 @@ sub new {
my $nod = join('|', @domains);
$self->{-no_obfuscate_re} = qr/(?:$nod)\z/i;
}
+ if (my $css = delete $self->{'publicinbox.css'}) {
+ $self->{css} = _array($css);
+ }
$self;
}
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index a120a29..0315d75 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -11,20 +11,6 @@ use PublicInbox::MID qw/mid_clean mid_escape/;
use base qw/Exporter/;
our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename/;
-# User-generated content (UGC) may have excessively long lines
-# and screw up rendering on some browsers, so we use pre-wrap.
-#
-# We also force everything to the same scaled font-size because GUI
-# browsers (tested both Firefox and surf (webkit)) uses a larger font
-# for the Search <form> element than the rest of the page. Font size
-# uniformity is important to people who rely on gigantic fonts.
-# Finally, we use monospace to ensure the Search field and button
-# has the same size and spacing as everything else which is
-# <pre>-formatted anyways.
-use constant STYLE =>
- '<style>pre{white-space:pre-wrap}' .
- '*{font-size:100%;font-family:monospace}</style>';
-
my $enc_ascii = find_encoding('us-ascii');
sub new {
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index a0fd7fa..863da85 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -6,6 +6,7 @@
# We focus on the lowest common denominators here:
# - targeted at text-only console browsers (w3m, links, etc..)
# - Only basic HTML, CSS only for line-wrapping <pre> text content for GUIs
+# and diff/syntax-highlighting (optional)
# - No JavaScript, graphics or icons allowed.
# - Must not rely on static content
# - UTF-8 is only for user-content, 7-bit US-ASCII for us
@@ -118,6 +119,8 @@ sub call {
r301($ctx, $1, $2);
} elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) {
get_text($ctx, $1, $2);
+ } elsif ($path_info =~ m!$INBOX_RE/([\w\-\.]+)\.css\z!o) {
+ get_css($self, $2);
} elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/\z!o) {
get_vcs_object($ctx, $1, $2);
} elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/([\w\.\-]+)\z!o) {
@@ -135,6 +138,7 @@ sub call {
# for CoW-friendliness, MOOOOO!
sub preload {
+ my ($self) = @_;
require PublicInbox::Feed;
require PublicInbox::View;
require PublicInbox::SearchThread;
@@ -147,6 +151,9 @@ sub preload {
PublicInbox::NewsWWW)) {
eval "require $_;";
}
+ if (ref($self)) {
+ $self->stylesheets_prepare($_) for ('', '../', '../../');
+ }
}
# private functions below
@@ -464,4 +471,120 @@ sub get_attach {
PublicInbox::WwwAttach::get_attach($ctx, $idx, $fn);
}
+# User-generated content (UGC) may have excessively long lines
+# and screw up rendering on some browsers, so we use pre-wrap.
+#
+# We also force everything to the same scaled font-size because GUI
+# browsers (tested both Firefox and surf (webkit)) uses a larger font
+# for the Search <form> element than the rest of the page. Font size
+# uniformity is important to people who rely on gigantic fonts.
+# Finally, we use monospace to ensure the Search field and button
+# has the same size and spacing as everything else which is
+# <pre>-formatted anyways.
+our $STYLE = 'pre{white-space:pre-wrap}*{font-size:100%;font-family:monospace}';
+
+sub stylesheets_prepare ($$) {
+ my ($self, $upfx) = @_;
+ my $mini = eval {
+ require CSS::Minifier;
+ sub { CSS::Minifier::minify(input => $_[0]) };
+ } || eval {
+ require CSS::Minifier::XS;
+ sub { CSS::Minifier::XS::minify($_[0]) };
+ } || sub { $_[0] };
+
+ my $css_map = {};
+ my $stylesheets = $self->{pi_config}->{css} || [];
+ my $links = [];
+ my $inline_ok = 1;
+
+ foreach my $s (@$stylesheets) {
+ my $attr = {};
+ local $_ = $s;
+ foreach my $k (qw(media title href)) {
+ if (s/\s*$k='([^']+)'// || s/\s*$k=(\S+)//) {
+ $attr->{$k} = $1;
+ }
+ }
+
+ if (defined $attr->{href}) {
+ $inline_ok = 0;
+ } else {
+ open(my $fh, '<', $_) or do {
+ warn "failed to open $_: $!\n";
+ next;
+ };
+ my ($key) = (m!([^/]+?)(?:\.css)?\z!i);
+ my $ctime = 0;
+ my $local = do { local $/; <$fh> };
+ if ($local =~ /\S/) {
+ $ctime = sprintf('%x',(stat($fh))[10]);
+ $local = $mini->($local);
+ }
+ $css_map->{$key} = $local;
+ $attr->{href} = "$upfx$key.css?$ctime";
+ if (defined($attr->{title})) {
+ $inline_ok = 0;
+ } elsif (($attr->{media}||'screen') eq 'screen') {
+ $attr->{-inline} = $local;
+ }
+ }
+ push @$links, $attr;
+ }
+
+ my $buf = "<style>$STYLE";
+ if ($inline_ok) {
+ my @ext; # for media=print and whatnot
+ foreach my $attr (@$links) {
+ if (defined(my $str = delete $attr->{-inline})) {
+ $buf .= $str;
+ } else {
+ push @ext, $attr;
+ }
+ }
+ $links = \@ext;
+ }
+ $buf .= '</style>';
+
+ if (@$links) {
+ foreach my $attr (@$links) {
+ delete $attr->{-inline};
+ $buf .= "<link\ntype=text/css\nrel=stylesheet";
+ while (my ($k, $v) = each %$attr) {
+ $v = qq{"$v"} if $v =~ /[\s=]/;
+ $buf .= qq{\n$k=$v};
+ }
+ $buf .= ' />';
+ }
+ $self->{"-style-$upfx"} = $buf;
+ } else {
+ $self->{-style_inline} = $buf;
+ }
+ $self->{-css_map} = $css_map;
+}
+
+# returns an HTML fragment with <style> or <link> tags in them
+# Called by WwwStream by nearly every HTML page
+sub style {
+ my ($self, $upfx) = @_;
+ $self->{-style_inline} || $self->{"-style-$upfx"} || do {
+ stylesheets_prepare($self, $upfx);
+ $self->{-style_inline} || $self->{"-style-$upfx"}
+ };
+}
+
+# /$INBOX/$KEY.css endpoint
+# CSS is configured globally for all inboxes, but we access them on
+# a per-inbox basis. This allows administrators to setup per-inbox
+# static routes to intercept the request before it hits PSGI
+sub get_css ($$) {
+ my ($self, $key) = @_;
+ my $css_map = $self->{-css_map} || stylesheets_prepare($self, '');
+ defined(my $css = $css_map->{$key}) or return r404();
+ my $h = [ 'Content-Length', bytes::length($css),
+ 'Content-Type', 'text/css' ];
+ PublicInbox::GitHTTPBackend::cache_one_year($h);
+ [ 200, $h, [ $css ] ];
+}
+
1;
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index e548f00..c3aeb6b 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -65,7 +65,7 @@ sub _html_top ($) {
"<html><head><title>$title</title>" .
"<link\nrel=alternate\ntitle=\"Atom feed\"\n".
"href=\"$atom\"\ntype=\"application/atom+xml\"/>" .
- PublicInbox::Hval::STYLE .
+ $ctx->{www}->style($upfx) .
"</head><body>". $top . $tip;
}
diff --git a/script/public-inbox-httpd b/script/public-inbox-httpd
index 43f1818..47e38ec 100755
--- a/script/public-inbox-httpd
+++ b/script/public-inbox-httpd
@@ -21,8 +21,8 @@ my $refresh = sub {
}
} else {
require PublicInbox::WWW;
- PublicInbox::WWW->preload;
my $www = PublicInbox::WWW->new;
+ $www->preload;
$app = builder {
eval {
enable 'Deflater',
diff --git a/t/view.t b/t/view.t
index b829ecf..ef7d695 100644
--- a/t/view.t
+++ b/t/view.t
@@ -6,6 +6,7 @@ use Test::More;
use Email::MIME;
use Plack::Util;
use_ok 'PublicInbox::View';
+use_ok 'PublicInbox::Config';
# FIXME: make this test less fragile
my $ctx = {
@@ -18,6 +19,7 @@ my $ctx = {
nntp_url => sub {[]},
max_git_part => sub { undef },
description => sub { '' }),
+ www => Plack::Util::inline_object(style => sub { '' }),
};
$ctx->{-inbox}->{-primary_address} = 'test@example.com';
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 24/37] $INBOX/_/text/color/ and sample user-side CSS
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (22 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 23/37] www: admin-configurable CSS via "publicinbox.css" Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 25/37] viewdiff: support diff-highlighting w/o coderepo Eric Wong
` (12 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
Since we now support more CSS classes for coloring,
give this feature more visibility.
---
Documentation/design_www.txt | 6 +--
MANIFEST | 1 +
Makefile.PL | 3 ++
TODO | 2 -
lib/PublicInbox/UserContent.pm | 78 ++++++++++++++++++++++++++++++++++
lib/PublicInbox/WWW.pm | 17 ++++++--
lib/PublicInbox/WwwStream.pm | 2 +
lib/PublicInbox/WwwText.pm | 35 +++++++++++++++
8 files changed, 134 insertions(+), 10 deletions(-)
create mode 100644 lib/PublicInbox/UserContent.pm
diff --git a/Documentation/design_www.txt b/Documentation/design_www.txt
index 514f8ff..c7d7fcb 100644
--- a/Documentation/design_www.txt
+++ b/Documentation/design_www.txt
@@ -107,8 +107,6 @@ browsers default to.
CSS classes (for user-supplied CSS)
-----------------------------------
-span.q - quoted text in email messages
-TODO: consider using highlight(1) via libhighlight-perl in Debian,
- optionally
-...
+See examples in contrib/css/ and lib/PublicInbox/WwwText.pm
+(or https://public-inbox.org/meta/_/text/color/ soon)
diff --git a/MANIFEST b/MANIFEST
index 1db7bd1..53d51b2 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -110,6 +110,7 @@ lib/PublicInbox/Spamcheck/Spamc.pm
lib/PublicInbox/Spawn.pm
lib/PublicInbox/SpawnPP.pm
lib/PublicInbox/Unsubscribe.pm
+lib/PublicInbox/UserContent.pm
lib/PublicInbox/V2Writable.pm
lib/PublicInbox/View.pm
lib/PublicInbox/ViewDiff.pm
diff --git a/Makefile.PL b/Makefile.PL
index c134ff9..e00c015 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -56,5 +56,8 @@ check-manifest :: MANIFEST
check:: pure_all check-manifest
\$(EATMYDATA) prove -lv -j\$(N)
+lib/PublicInbox/UserContent.pm :: contrib/css/216dark.css
+ @\$(PERL) -I lib \$@ \$<
+
EOF
}
diff --git a/TODO b/TODO
index 374d8df..57ea8c4 100644
--- a/TODO
+++ b/TODO
@@ -66,8 +66,6 @@ all need to be considered for everything we introduce)
* linkify thread skeletons better
https://public-inbox.org/git/6E3699DEA672430CAEA6DEFEDE6918F4@PhilipOakley/
-* generate sample CSS for use with userContent.css/dillo/etc
-
* streaming Email::MIME replacement: currently we generate many
allocations/strings for headers we never look at and slurp
entire message bodies into memory.
diff --git a/lib/PublicInbox/UserContent.pm b/lib/PublicInbox/UserContent.pm
new file mode 100644
index 0000000..b34ebf9
--- /dev/null
+++ b/lib/PublicInbox/UserContent.pm
@@ -0,0 +1,78 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Self-updating module containing a sample CSS for client-side
+# customization by users of public-inbox. Used by Makefile.PL
+package PublicInbox::UserContent;
+use strict;
+use warnings;
+
+# this sub is updated automatically:
+sub CSS () {
+ <<'_'
+ /*
+ * Dark color scheme using 216 web-safe colors, inspired
+ * somewhat by the default color scheme in mutt.
+ * It reduces eyestrain for me, and energy usage for all:
+ * https://en.wikipedia.org/wiki/Light-on-dark_color_scheme
+ */
+ * { background:#000; color:#ccc }
+
+ /*
+ * Underlined links add visual noise which make them hard-to-read.
+ * Use colors to make them stand out, instead.
+ */
+ a { color:#69f; text-decoration:none }
+ a:visited { color:#96f }
+
+ /* quoted text gets a different color */
+ *.q { color:#09f }
+
+ /*
+ * these may be used with cgit, too
+ * (cgit uses <div>, public-inbox uses <span>)
+ */
+ *.add { color:#0ff }
+ *.del { color:#f0f }
+ *.head { color:#fff }
+ *.hunk { color:#c93 }
+_
+}
+# end of auto-updated sub
+
+# return a sample CSS
+sub sample ($$) {
+ my ($ibx, $env) = @_;
+ my $url_prefix = $ibx->base_url($env);
+ my $preamble = <<"";
+/*
+ * Firefox users: this goes in \$PROFILE_FOLDER/chrome/userContent.css
+ * where \$PROFILE_FOLDER is platform-specific
+ *
+ * cf. http://kb.mozillazine.org/UserContent.css
+ * http://kb.mozillazine.org/Profile_folder_-_Firefox
+ *
+ * Users of dillo can remove the entire lines with "moz-only"
+ * in them and place the resulting file in ~/.dillo/style.css
+ */
+\@-moz-document url-prefix($url_prefix) { /* moz-only */
+
+ $preamble . CSS() . "\n} /* moz-only */\n";
+}
+
+# Auto-update this file based on the contents of a CSS file:
+# usage: perl -I lib __FILE__ contrib/css/216dark.css
+# (See Makefile.PL)
+if (scalar(@ARGV) == 1 && -r __FILE__) {
+ use autodie;
+ open my $ro, '<', $ARGV[0];
+ my $css = do { local $/; <$ro> };
+ $css =~ s/^([ \t]*\S)/\t$1/smg;
+ open my $rw, '+<', __FILE__;
+ my $out = do { local $/; <$rw> };
+ $out =~ s/^sub CSS.*^_\n\}/sub CSS () {\n\t<<'_'\n${css}_\n}/sm;
+ seek $rw, 0, 0;
+ print $rw $out;
+}
+
+1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 863da85..406802a 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -20,6 +20,7 @@ use URI::Escape qw(uri_unescape);
use PublicInbox::MID qw(mid_escape);
require PublicInbox::Git;
use PublicInbox::GitHTTPBackend;
+use PublicInbox::UserContent;
# TODO: consider a routing tree now that we have more endpoints:
our $INBOX_RE = qr!\A/([\w\-][\w\.\-]*)!;
@@ -120,7 +121,7 @@ sub call {
} elsif ($path_info =~ m!$INBOX_RE/_/text(?:/(.*))?\z!o) {
get_text($ctx, $1, $2);
} elsif ($path_info =~ m!$INBOX_RE/([\w\-\.]+)\.css\z!o) {
- get_css($self, $2);
+ get_css($ctx, $1, $2);
} elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/\z!o) {
get_vcs_object($ctx, $1, $2);
} elsif ($path_info =~ m!$INBOX_RE/($OID_RE)/s/([\w\.\-]+)\z!o) {
@@ -577,10 +578,18 @@ sub style {
# CSS is configured globally for all inboxes, but we access them on
# a per-inbox basis. This allows administrators to setup per-inbox
# static routes to intercept the request before it hits PSGI
-sub get_css ($$) {
- my ($self, $key) = @_;
+sub get_css ($$$) {
+ my ($ctx, $inbox, $key) = @_;
+ my $r404 = invalid_inbox($ctx, $inbox);
+ return $r404 if $r404;
+ my $self = $ctx->{www};
my $css_map = $self->{-css_map} || stylesheets_prepare($self, '');
- defined(my $css = $css_map->{$key}) or return r404();
+ my $css = $css_map->{$key};
+ if (!defined($css) && $key eq 'userContent') {
+ my $env = $ctx->{env};
+ $css = PublicInbox::UserContent::sample($ctx->{-inbox}, $env);
+ }
+ defined $css or return r404();
my $h = [ 'Content-Length', bytes::length($css),
'Content-Type', 'text/css' ];
PublicInbox::GitHTTPBackend::cache_one_year($h);
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index c3aeb6b..8ae35c7 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -38,10 +38,12 @@ sub _html_top ($) {
my $title = $ctx->{-title_html} || $desc;
my $upfx = $ctx->{-upfx} || '';
my $help = $upfx.'_/text/help';
+ my $color = $upfx.'_/text/color';
my $atom = $ctx->{-atom} || $upfx.'new.atom';
my $tip = $ctx->{-html_tip} || '';
my $top = "<b>$desc</b>";
my $links = "<a\nhref=\"$help\">help</a> / ".
+ "<a\nhref=\"$color\">color</a> / ".
"<a\nhref=\"$atom\">Atom feed</a>";
if ($obj->search) {
my $q_val = $ctx->{-q_value_html};
diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm
index b5874cf..d3413ad 100644
--- a/lib/PublicInbox/WwwText.pm
+++ b/lib/PublicInbox/WwwText.pm
@@ -88,9 +88,44 @@ sub _srch_prefix ($$) {
1;
}
+sub _colors_help ($$) {
+ my ($ctx, $txt) = @_;
+ my $ibx = $ctx->{-inbox};
+ my $base_url = $ibx->base_url($ctx->{env});
+ $$txt .= "color customization for $base_url\n";
+ $$txt .= <<EOF;
+
+public-inbox provides a stable set of CSS classes for users to
+customize colors for highlighting diffs and code.
+
+Users of browsers such as dillo, Firefox, or some browser
+extensions may start by downloading the following sample CSS file
+to control the colors they see:
+
+ ${base_url}userContent.css
+
+CSS classes
+-----------
+
+ span.q - quoted text in email messages
+
+For diff highlighting, we try to match class names with those
+used by cgit: https://git.zx2c4.com/cgit/
+
+ span.add - diff post-image lines
+
+ span.del - diff pre-image lines
+
+ span.head - diff header (metainformation)
+
+ span.hunk - diff hunk-header
+
+EOF
+}
sub _default_text ($$$) {
my ($ctx, $key, $txt) = @_;
+ return _colors_help($ctx, $txt) if $key eq 'color';
return if $key ne 'help'; # TODO more keys?
my $ibx = $ctx->{-inbox};
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 25/37] viewdiff: support diff-highlighting w/o coderepo
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (23 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 24/37] $INBOX/_/text/color/ and sample user-side CSS Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 26/37] viewdiff: cleanup state transitions a bit Eric Wong
` (11 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
Having diff highlighting alone is still useful, even
if blob-resolution/recreation is too expensive or
unfeasible.
---
lib/PublicInbox/View.pm | 18 ++++++++++--------
lib/PublicInbox/ViewDiff.pm | 13 +++++++++----
2 files changed, 19 insertions(+), 12 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 0187ec3..41a45b0 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -558,15 +558,17 @@ sub add_text_body {
return attach_link($upfx, $ct, $p, $fn) unless defined $s;
my ($diff, $spfx);
- if ($ibx->{-repo_objs} && $s =~ /^(?:diff|---|\+{3}) /ms) {
+ if ($s =~ /^(?:diff|---|\+{3}) /ms) {
$diff = [];
- my $n_slash = $upfx =~ tr!/!/!;
- if ($n_slash == 0) {
- $spfx = '../';
- } elsif ($n_slash == 1) {
- $spfx = '';
- } else { # nslash == 2
- $spfx = '../../';
+ if ($ibx->{-repo_objs}) {
+ my $n_slash = $upfx =~ tr!/!/!;
+ if ($n_slash == 0) {
+ $spfx = '../';
+ } elsif ($n_slash == 1) {
+ $spfx = '';
+ } else { # nslash == 2
+ $spfx = '../../';
+ }
}
};
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 1aad283..0d1aefb 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -40,7 +40,8 @@ sub diff_hunk ($$$$) {
my $oid_a = $dctx->{oid_a};
my $oid_b = $dctx->{oid_b};
- (defined($oid_a) && defined($oid_b)) or return "@@ $ca $cb @@";
+ (defined($spfx) && defined($oid_a) && defined($oid_b)) or
+ return "@@ $ca $cb @@";
my ($n) = ($ca =~ /^-(\d+)/);
$n = defined($n) ? do { ++$n; "#n$n" } : '';
@@ -53,6 +54,11 @@ sub diff_hunk ($$$$) {
$rv .= qq( <a\nhref=$spfx$oid_b/s/$dctx->{Q}$n>$cb</a> @@);
}
+sub oid ($$$) {
+ my ($dctx, $spfx, $oid) = @_;
+ defined($spfx) ? qq(<a\nhref=$spfx$oid/s/$dctx->{Q}>$oid</a>) : $oid;
+}
+
sub flush_diff ($$$$) {
my ($dst, $spfx, $linkify, $diff) = @_;
my $state = DSTATE_INIT;
@@ -90,11 +96,10 @@ sub flush_diff ($$$$) {
}
$$dst .= to_html($linkify, $s);
} elsif ($s =~ s/^(index $OID_NULL\.\.)($OID_BLOB)\b//o) {
- $$dst .= qq($1<a\nhref=$spfx$2/s/$dctx->{Q}>$2</a>);
+ $$dst .= $1 . oid($dctx, $spfx, $2);
$$dst .= to_html($linkify, $s) ;
} elsif ($s =~ s/^index ($OID_NULL)(\.\.$OID_BLOB)\b//o) {
- $$dst .= 'index ';
- $$dst .= qq(<a\nhref=$spfx$1/s/$dctx->{Q}>$1</a>$2);
+ $$dst .= 'index ' . oid($dctx, $spfx, $1) . $2;
$$dst .= to_html($linkify, $s);
} elsif ($s =~ /^index ($OID_BLOB)\.\.($OID_BLOB)/o) {
$dctx->{oid_a} = $1;
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 26/37] viewdiff: cleanup state transitions a bit
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (24 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 25/37] viewdiff: support diff-highlighting w/o coderepo Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 27/37] viewdiff: quote attributes for Atom feed Eric Wong
` (10 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
This makes things less error-prone and allows us to only
highlight the "@@ -\S+ \+\S+ @@" part of the hunk header
line, without highlighting the function context.
This more closely matches the coloring behavior of git-diff(1)
---
lib/PublicInbox/ViewDiff.pm | 84 ++++++++++++++++++-------------------
1 file changed, 40 insertions(+), 44 deletions(-)
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 0d1aefb..1fa1845 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -18,10 +18,18 @@ use PublicInbox::Git qw(git_unquote);
sub DSTATE_INIT () { 0 }
sub DSTATE_STAT () { 1 } # TODO
sub DSTATE_HEAD () { 2 } # /^diff --git /, /^index /, /^--- /, /^\+\+\+ /
-sub DSTATE_HUNK () { 3 } # /^@@ /
-sub DSTATE_CTX () { 4 } # /^ /
-sub DSTATE_ADD () { 5 } # /^\+/
-sub DSTATE_DEL () { 6 } # /^\-/
+sub DSTATE_CTX () { 3 } # /^ /
+sub DSTATE_ADD () { 4 } # /^\+/
+sub DSTATE_DEL () { 5 } # /^\-/
+my @state2class = (
+ '', # init
+ '', # stat
+ 'head',
+ '', # ctx
+ 'add',
+ 'del'
+);
+
sub UNSAFE () { "^A-Za-z0-9\-\._~/" }
my $OID_NULL = '0{7,40}';
@@ -59,6 +67,14 @@ sub oid ($$$) {
defined($spfx) ? qq(<a\nhref=$spfx$oid/s/$dctx->{Q}>$oid</a>) : $oid;
}
+sub to_state ($$$) {
+ my ($dst, $state, $new_state) = @_;
+ $$dst .= '</span>' if $state2class[$state];
+ $_[1] = $new_state;
+ my $class = $state2class[$new_state] or return;
+ $$dst .= "<span\nclass=$class>";
+}
+
sub flush_diff ($$$$) {
my ($dst, $spfx, $linkify, $diff) = @_;
my $state = DSTATE_INIT;
@@ -66,24 +82,18 @@ sub flush_diff ($$$$) {
foreach my $s (@$diff) {
if ($s =~ /^ /) {
- if ($state == DSTATE_HUNK || $state == DSTATE_ADD ||
- $state == DSTATE_DEL || $state == DSTATE_HEAD) {
- $$dst .= "</span><span\nclass=ctx>";
- $state = DSTATE_CTX;
+ if ($state2class[$state]) {
+ to_state($dst, $state, DSTATE_CTX);
}
$$dst .= to_html($linkify, $s);
} elsif ($s =~ /^-- $/) { # email signature begins
- if ($state != DSTATE_INIT) {
- $state = DSTATE_INIT;
- $$dst .= '</span>';
- }
+ $state == DSTATE_INIT or
+ to_state($dst, $state, DSTATE_INIT);
$$dst .= $s;
} elsif ($s =~ m!^diff --git ($PATH_A) ($PATH_B)$!) {
if ($state != DSTATE_HEAD) {
my ($pa, $pb) = ($1, $2);
- $$dst .= '</span>' if $state != DSTATE_INIT;
- $$dst .= "<span\nclass=head>";
- $state = DSTATE_HEAD;
+ to_state($dst, $state, DSTATE_HEAD);
$pa = (split('/', git_unquote($pa), 2))[1];
$pb = (split('/', git_unquote($pb), 2))[1];
$dctx = {
@@ -106,38 +116,26 @@ sub flush_diff ($$$$) {
$dctx->{oid_b} = $2;
$$dst .= to_html($linkify, $s);
} elsif ($s =~ s/^@@ (\S+) (\S+) @@//) {
- my ($ca, $cb) = ($1, $2);
- if ($state == DSTATE_HEAD || $state == DSTATE_CTX ||
- $state == DSTATE_ADD || $state == DSTATE_DEL) {
- $$dst .= "</span><span\nclass=hunk>";
- $state = DSTATE_HUNK;
- $$dst .= diff_hunk($dctx, $spfx, $ca, $cb);
- } else {
- $$dst .= to_html($linkify, "@@ $ca $cb @@");
- }
+ $$dst .= '</span>' if $state2class[$state];
+ $$dst .= "<span\nclass=hunk>";
+ $$dst .= diff_hunk($dctx, $spfx, $1, $2);
+ $$dst .= '</span>';
+ $state = DSTATE_CTX;
$$dst .= to_html($linkify, $s);
- } elsif ($s =~ m!^--- $PATH_A!) {
- if ($state == DSTATE_INIT) { # color only (no oid link)
- $state = DSTATE_HEAD;
- $$dst .= "<span\nclass=head>";
- }
- $$dst .= to_html($linkify, $s);
- } elsif ($s =~ m!^\+{3} $PATH_B!) {
- if ($state == DSTATE_INIT) { # color only (no oid link)
- $state = DSTATE_HEAD;
- $$dst .= "<span\nclass=head>";
- }
+ } elsif ($s =~ m!^--- $PATH_A! ||
+ $s =~ m!^\+{3} $PATH_B!) {
+ # color only (no oid link)
+ $state == DSTATE_INIT and
+ to_state($dst, $state, DSTATE_HEAD);
$$dst .= to_html($linkify, $s);
} elsif ($s =~ /^\+/) {
if ($state != DSTATE_ADD && $state != DSTATE_INIT) {
- $$dst .= "</span><span\nclass=add>";
- $state = DSTATE_ADD;
+ to_state($dst, $state, DSTATE_ADD);
}
$$dst .= to_html($linkify, $s);
} elsif ($s =~ /^-/) {
if ($state != DSTATE_DEL && $state != DSTATE_INIT) {
- $$dst .= "</span><span\nclass=del>";
- $state = DSTATE_DEL;
+ to_state($dst, $state, DSTATE_DEL);
}
$$dst .= to_html($linkify, $s);
# ignore the following lines in headers:
@@ -148,15 +146,13 @@ sub flush_diff ($$$$) {
$s =~ /^(?:dis)?similarity index /) {
$$dst .= to_html($linkify, $s);
} else {
- if ($state != DSTATE_INIT) {
- $$dst .= '</span>';
- $state = DSTATE_INIT;
- }
+ $state == DSTATE_INIT or
+ to_state($dst, $state, DSTATE_INIT);
$$dst .= to_html($linkify, $s);
}
}
@$diff = ();
- $$dst .= '</span>' if $state != DSTATE_INIT;
+ $$dst .= '</span>' if $state2class[$state];
undef;
}
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 27/37] viewdiff: quote attributes for Atom feed
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (25 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 26/37] viewdiff: cleanup state transitions a bit Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 28/37] t/check-www-inbox: use xmlstarlet to validate Atom if available Eric Wong
` (9 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
We still need to use XHTML the Atom feed, and XHTML requires
attributes to be quoted, whereas HTML 5 does not.
---
lib/PublicInbox/ViewDiff.pm | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 1fa1845..45d28e3 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -54,17 +54,17 @@ sub diff_hunk ($$$$) {
my ($n) = ($ca =~ /^-(\d+)/);
$n = defined($n) ? do { ++$n; "#n$n" } : '';
- my $rv = qq(@@ <a\nhref=$spfx$oid_a/s/$dctx->{Q}$n>$ca</a>);
+ my $rv = qq(@@ <a\nhref="$spfx$oid_a/s/$dctx->{Q}$n">$ca</a>);
($n) = ($cb =~ /^\+(\d+)/);
$n = defined($n) ? do { ++$n; "#n$n" } : '';
- $rv .= qq( <a\nhref=$spfx$oid_b/s/$dctx->{Q}$n>$cb</a> @@);
+ $rv .= qq( <a\nhref="$spfx$oid_b/s/$dctx->{Q}$n">$cb</a> @@);
}
sub oid ($$$) {
my ($dctx, $spfx, $oid) = @_;
- defined($spfx) ? qq(<a\nhref=$spfx$oid/s/$dctx->{Q}>$oid</a>) : $oid;
+ defined($spfx) ? qq(<a\nhref="$spfx$oid/s/$dctx->{Q}">$oid</a>) : $oid;
}
sub to_state ($$$) {
@@ -72,7 +72,7 @@ sub to_state ($$$) {
$$dst .= '</span>' if $state2class[$state];
$_[1] = $new_state;
my $class = $state2class[$new_state] or return;
- $$dst .= "<span\nclass=$class>";
+ $$dst .= qq(<span\nclass="$class">);
}
sub flush_diff ($$$$) {
@@ -117,7 +117,7 @@ sub flush_diff ($$$$) {
$$dst .= to_html($linkify, $s);
} elsif ($s =~ s/^@@ (\S+) (\S+) @@//) {
$$dst .= '</span>' if $state2class[$state];
- $$dst .= "<span\nclass=hunk>";
+ $$dst .= qq(<span\nclass="hunk">);
$$dst .= diff_hunk($dctx, $spfx, $1, $2);
$$dst .= '</span>';
$state = DSTATE_CTX;
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 28/37] t/check-www-inbox: use xmlstarlet to validate Atom if available
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (26 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 27/37] viewdiff: quote attributes for Atom feed Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 29/37] viewdiff: do not link to 0{7,40} blobs (again) Eric Wong
` (8 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
I almost forgot about this script; but remembering to test
it against real-world data can be useful to hunt for bugs.
---
t/check-www-inbox.perl | 22 ++++++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/t/check-www-inbox.perl b/t/check-www-inbox.perl
index 08e6247..7dd1eeb 100644
--- a/t/check-www-inbox.perl
+++ b/t/check-www-inbox.perl
@@ -1,5 +1,5 @@
#!/usr/bin/perl -w
-# Copyright (C) 2016-2018 all contributors <meta@public-inbox.org>
+# Copyright (C) 2016-2019 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Parallel WWW checker
my $usage = "$0 [-j JOBS] [-s SLOW_THRESHOLD] URL_OF_INBOX\n";
@@ -23,6 +23,16 @@ my %opts = (
GetOptions(%opts) or die "bad command-line args\n$usage";
my $root_url = shift or die $usage;
+chomp(my $xmlstarlet = `which xmlstarlet 2>/dev/null`);
+my $atom_check = eval {
+ require IPC::Run;
+ my $cmd = [ qw(xmlstarlet val -e -) ];
+ sub {
+ my ($in, $out, $err) = @_;
+ IPC::Run::run($cmd, $in, $out, $err);
+ }
+} if $xmlstarlet;
+
my %workers;
$SIG{TERM} = sub { exit 0 };
$SIG{CHLD} = sub {
@@ -146,7 +156,15 @@ sub worker_loop {
# make sure the HTML source doesn't screw up terminals
# when people curl the source (not remotely an expert
# on languages or encodings, here).
- next if $r->header('Content-Type') !~ m!\btext/html\b!;
+ my $ct = $r->header('Content-Type');
+ if ($atom_check && $ct =~ m!\bapplication/atom\+xml\b!) {
+ my $raw = $r->decoded_content;
+ my ($out, $err) = ('', '');
+ $atom_check->(\$raw, \$out, \$err) and
+ warn "Atom ($?) - $u - <1:$out> <2:$err>\n";
+ }
+
+ next if $ct !~ m!\btext/html\b!;
my $dc = $r->decoded_content;
if ($dc =~ /([\x00-\x08\x0d-\x1f\x7f-\x{99999999}]+)/s) {
my $o = $1;
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 29/37] viewdiff: do not link to 0{7,40} blobs (again)
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (27 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 28/37] t/check-www-inbox: use xmlstarlet to validate Atom if available Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 30/37] viewvcs: disable white-space prewrap in blob view Eric Wong
` (7 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
We must reset diff context when starting a new file;
and we must check for all-zeroes object_ids as the
post-image correctly.
---
lib/PublicInbox/ViewDiff.pm | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index 45d28e3..a804568 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -107,9 +107,11 @@ sub flush_diff ($$$$) {
$$dst .= to_html($linkify, $s);
} elsif ($s =~ s/^(index $OID_NULL\.\.)($OID_BLOB)\b//o) {
$$dst .= $1 . oid($dctx, $spfx, $2);
+ $dctx = { Q => '' };
$$dst .= to_html($linkify, $s) ;
- } elsif ($s =~ s/^index ($OID_NULL)(\.\.$OID_BLOB)\b//o) {
+ } elsif ($s =~ s/^index ($OID_BLOB)(\.\.$OID_NULL)\b//o) {
$$dst .= 'index ' . oid($dctx, $spfx, $1) . $2;
+ $dctx = { Q => '' };
$$dst .= to_html($linkify, $s);
} elsif ($s =~ /^index ($OID_BLOB)\.\.($OID_BLOB)/o) {
$dctx->{oid_a} = $1;
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 30/37] viewvcs: disable white-space prewrap in blob view
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (28 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 29/37] viewdiff: do not link to 0{7,40} blobs (again) Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 31/37] solver: force quoted-printable bodies to LF Eric Wong
` (6 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
We need to keep line-numbers from <a> tags synced to the actual
line numbers in the code when working in smaller viewports.
Maybe I only work on reasonable projects, but excessively
long lines seem to be less of a problem in code than they are
in emails.
---
lib/PublicInbox/ViewVCS.pm | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 61f4deb..4a3896d 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -100,7 +100,8 @@ sub show ($$;$) {
sprintf("<a id=n$_ href=#n$_>% ${pad}u</a>\n", $_)
} (1..$nl)) . '</pre></td>' .
'<td><pre> </pre></td>'. # pad for non-CSS users
- "<td\nclass=lines><pre><code>" . ascii_html($$blob) .
+ "<td\nclass=lines><pre\nstyle='white-space:pre'><code>" .
+ ascii_html($$blob) .
'</code></pre></td></tr></table>' . $log;
html_page($ctx, 200, \$log);
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 31/37] solver: force quoted-printable bodies to LF
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (29 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 30/37] viewvcs: disable white-space prewrap in blob view Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 32/37] solver: remove extra "^index $OID..$OID" line Eric Wong
` (5 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
..if the Email::MIME ->crlf is LF.
Email::MIME::Encodings forces everything to CRLF on
quoted-printable messages for RFC-compliance; and
git-apply --ignore-whitespace seems to miss a context
line which is just "\r\n" (w/o leading space).
---
lib/PublicInbox/SolverGit.pm | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 8fde232..612f495 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -78,6 +78,14 @@ sub extract_diff ($$$$) {
my ($s, undef) = msg_part_text($part, $ct);
defined $s or return;
my $di = {};
+
+ # Email::MIME::Encodings forces QP to be CRLF upon decoding,
+ # change it back to LF:
+ my $cte = $part->header('Content-Transfer-Encoding') || '';
+ if ($cte =~ /\bquoted-printable\b/i && $part->crlf eq "\n") {
+ $s =~ s/\r\n/\n/sg;
+ }
+
foreach my $l (split(/^/m, $s)) {
if ($l =~ $re) {
$di->{oid_a} = $1;
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 32/37] solver: remove extra "^index $OID..$OID" line
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (30 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 31/37] solver: force quoted-printable bodies to LF Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 33/37] config: each_inbox iteration preserves config order Eric Wong
` (4 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
It was harmless, besides wasting space and memory.
---
lib/PublicInbox/SolverGit.pm | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm
index 612f495..53a6262 100644
--- a/lib/PublicInbox/SolverGit.pm
+++ b/lib/PublicInbox/SolverGit.pm
@@ -103,7 +103,7 @@ sub extract_diff ($$$$) {
push @$hdr_lines, $l;
$di->{hdr_lines} = $hdr_lines;
- print $tmp @$hdr_lines, $l or die "print(tmp): $!";
+ print $tmp @$hdr_lines or die "print(tmp): $!";
# for debugging/diagnostics:
$di->{ibx} = $ibx;
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 33/37] config: each_inbox iteration preserves config order
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (31 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 32/37] solver: remove extra "^index $OID..$OID" line Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 34/37] t/check-www-inbox: warn on missing Content-Type Eric Wong
` (3 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
For cross-inbox Message-ID resolution; having some sort of
stable ordering makes the most sense. Relying on the
order of the config file seems most natural and allows us
to avoid introducing yet another configuration knob.
---
lib/PublicInbox/Config.pm | 34 +++++++++++++++++++++++++---------
t/config.t | 19 +++++++++++++++++++
2 files changed, 44 insertions(+), 9 deletions(-)
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index cead7fc..ccfc114 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -90,13 +90,22 @@ sub lookup_name ($$) {
sub each_inbox {
my ($self, $cb) = @_;
- my %seen;
- foreach my $k (keys %$self) {
- $k =~ m!\Apublicinbox\.([^/]+)\.mainrepo\z! or next;
- next if $seen{$1};
- $seen{$1} = 1;
- my $ibx = lookup_name($self, $1) or next;
- $cb->($ibx);
+ if (my $section_order = $self->{-section_order}) {
+ foreach my $section (@$section_order) {
+ next if $section !~ m!\Apublicinbox\.([^/]+)\z!;
+ $self->{"publicinbox.$1.mainrepo"} or next;
+ my $ibx = lookup_name($self, $1) or next;
+ $cb->($ibx);
+ }
+ } else {
+ my %seen;
+ foreach my $k (keys %$self) {
+ $k =~ m!\Apublicinbox\.([^/]+)\.mainrepo\z! or next;
+ next if $seen{$1};
+ $seen{$1} = 1;
+ my $ibx = lookup_name($self, $1) or next;
+ $cb->($ibx);
+ }
}
}
@@ -137,7 +146,7 @@ sub default_file {
sub git_config_dump {
my ($file) = @_;
- my ($in, $out);
+ my (%section_seen, @section_order);
my @cmd = (qw/git config/, "--file=$file", '-l');
my $cmd = join(' ', @cmd);
my $fh = popen_rd(\@cmd) or die "popen_rd failed for $file: $!\n";
@@ -146,8 +155,14 @@ sub git_config_dump {
while (defined(my $line = <$fh>)) {
chomp $line;
my ($k, $v) = split(/=/, $line, 2);
- my $cur = $rv{$k};
+ my ($section) = ($k =~ /\A(\S+)\.[^\.]+\z/);
+ unless (defined $section_seen{$section}) {
+ $section_seen{$section} = 1;
+ push @section_order, $section;
+ }
+
+ my $cur = $rv{$k};
if (defined $cur) {
if (ref($cur) eq "ARRAY") {
push @$cur, $v;
@@ -159,6 +174,7 @@ sub git_config_dump {
}
}
close $fh or die "failed to close ($cmd) pipe: $?";
+ $rv{-section_order} = \@section_order;
\%rv;
}
diff --git a/t/config.t b/t/config.t
index 5f0a95b..7531fd7 100644
--- a/t/config.t
+++ b/t/config.t
@@ -150,4 +150,23 @@ for my $s (@valid) {
ok(PublicInbox::Config::valid_inbox_name($s), "$d name accepted");
}
+{
+ my $f = "$tmpdir/ordered";
+ open my $fh, '>', $f or die "open: $!";
+ my @expect;
+ foreach my $i (0..3) {
+ push @expect, "$i";
+ print $fh <<"" or die "print: $!";
+[publicinbox "$i"]
+ mainrepo = /path/to/$i.git
+ address = $i\@example.com
+
+ }
+ close $fh or die "close: $!";
+ my $cfg = PublicInbox::Config->new($f);
+ my @result;
+ $cfg->each_inbox(sub { push @result, $_[0]->{name} });
+ is_deeply(\@result, \@expect);
+}
+
done_testing();
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 34/37] t/check-www-inbox: warn on missing Content-Type
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (32 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 33/37] config: each_inbox iteration preserves config order Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 35/37] highlight: initial wrapper and PSGI service Eric Wong
` (2 subsequent siblings)
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
Oops, I might've left it out, somewhere.
---
t/check-www-inbox.perl | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/t/check-www-inbox.perl b/t/check-www-inbox.perl
index 7dd1eeb..6232f16 100644
--- a/t/check-www-inbox.perl
+++ b/t/check-www-inbox.perl
@@ -156,7 +156,9 @@ sub worker_loop {
# make sure the HTML source doesn't screw up terminals
# when people curl the source (not remotely an expert
# on languages or encodings, here).
- my $ct = $r->header('Content-Type');
+ my $ct = $r->header('Content-Type') || '';
+ warn "no Content-Type: $u\n" if $ct eq '';
+
if ($atom_check && $ct =~ m!\bapplication/atom\+xml\b!) {
my $raw = $r->decoded_content;
my ($out, $err) = ('', '');
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 35/37] highlight: initial wrapper and PSGI service
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (33 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 34/37] t/check-www-inbox: warn on missing Content-Type Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 36/37] hval: split out escape sequences to a separate table Eric Wong
2019-01-21 20:52 ` [PATCH 37/37] t/check-www-inbox: trap SIGINT for File::Temp destruction Eric Wong
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
I'll probably expose the PSGI service for cgit;
but it could be useful to others as well.
---
MANIFEST | 4 +
examples/highlight.psgi | 13 ++++
lib/PublicInbox/HlMod.pm | 126 ++++++++++++++++++++++++++++++++
lib/PublicInbox/WwwHighlight.pm | 73 ++++++++++++++++++
t/hl_mod.t | 54 ++++++++++++++
5 files changed, 270 insertions(+)
create mode 100644 examples/highlight.psgi
create mode 100644 lib/PublicInbox/HlMod.pm
create mode 100644 lib/PublicInbox/WwwHighlight.pm
create mode 100644 t/hl_mod.t
diff --git a/MANIFEST b/MANIFEST
index 53d51b2..e627206 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -38,6 +38,7 @@ examples/apache2_perl.conf
examples/apache2_perl_old.conf
examples/cgi-webrick.rb
examples/cgit-commit-filter.lua
+examples/highlight.psgi
examples/logrotate.conf
examples/public-inbox-config
examples/public-inbox-httpd.socket
@@ -74,6 +75,7 @@ lib/PublicInbox/GitHTTPBackend.pm
lib/PublicInbox/HTTP.pm
lib/PublicInbox/HTTPD.pm
lib/PublicInbox/HTTPD/Async.pm
+lib/PublicInbox/HlMod.pm
lib/PublicInbox/Hval.pm
lib/PublicInbox/Import.pm
lib/PublicInbox/Inbox.pm
@@ -120,6 +122,7 @@ lib/PublicInbox/WWW.pod
lib/PublicInbox/WatchMaildir.pm
lib/PublicInbox/WwwAtomStream.pm
lib/PublicInbox/WwwAttach.pm
+lib/PublicInbox/WwwHighlight.pm
lib/PublicInbox/WwwStream.pm
lib/PublicInbox/WwwText.pm
sa_config/Makefile
@@ -170,6 +173,7 @@ t/git-http-backend.psgi
t/git-http-backend.t
t/git.fast-import-data
t/git.t
+t/hl_mod.t
t/html_index.t
t/httpd-corner.psgi
t/httpd-corner.t
diff --git a/examples/highlight.psgi b/examples/highlight.psgi
new file mode 100644
index 0000000..244b128
--- /dev/null
+++ b/examples/highlight.psgi
@@ -0,0 +1,13 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Usage: plackup [OPTIONS] /path/to/this/file
+# A startup command for development which monitors changes:
+# plackup -I lib -o 127.0.0.1 -R lib -r examples/highlight.psgi
+use strict;
+use warnings;
+use PublicInbox::WwwHighlight;
+use Plack::Builder;
+my $hl = PublicInbox::WwwHighlight->new;
+builder { sub { $hl->call(@_) }; }
diff --git a/lib/PublicInbox/HlMod.pm b/lib/PublicInbox/HlMod.pm
new file mode 100644
index 0000000..5cbfb29
--- /dev/null
+++ b/lib/PublicInbox/HlMod.pm
@@ -0,0 +1,126 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# I have no idea how stable or safe this is for handling untrusted
+# input, but it seems to have been around for a while, and the
+# highlight(1) executable is supported by gitweb and cgit.
+#
+# I'm also unsure about API stability, but highlight 3.x seems to
+# have been around a few years and ikiwiki (apparently the only
+# user of the SWIG/Perl bindings, at least in Debian) hasn't needed
+# major changes to support it in recent years.
+#
+# Some code stolen from ikiwiki (GPL-2.0+)
+# wrapper for SWIG-generated highlight.pm bindings
+package PublicInbox::HlMod;
+use strict;
+use warnings;
+use highlight; # SWIG-generated stuff
+
+sub _parse_filetypes ($) {
+ my $ft_conf = $_[0]->searchFile('filetypes.conf') or
+ die 'filetypes.conf not found by highlight';
+ open my $fh, '<', $ft_conf or die "failed to open($ft_conf): $!";
+ local $/;
+ my $cfg = <$fh>;
+ my %ext2lang;
+ my @shebang; # order matters
+
+ # Hrm... why isn't this exposed by the highlight API?
+ # highlight >= 3.2 format (bind-style) (from ikiwiki)
+ while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+
+ Extensions\s*=\s*{([^}]+)}/sgx) {
+ my $lang = $1;
+ foreach my $bit (split(/,/, $2)) {
+ $bit =~ s/.*"(.*)".*/$1/s;
+ $ext2lang{$bit} = $lang;
+ }
+ }
+ # AFAIK, all the regexps used by in filetypes.conf distributed
+ # by highlight work as Perl REs
+ while ($cfg =~ /\bLang\s*=\s*\"([^"]+)\"[,\s]+
+ Shebang\s*=\s*\[\s*\[([^}]+)\s*\]\s*\]\s*}\s*,/sgx) {
+ my ($lang, $re) = ($1, $2);
+ eval {
+ my $perl_re = qr/$re/;
+ push @shebang, [ $lang, $perl_re ];
+ };
+ if ($@) {
+ warn "$lang shebang=[[$re]] did not work in Perl: $@";
+ }
+ }
+ (\%ext2lang, \@shebang);
+}
+
+sub new {
+ my ($class) = @_;
+ my $dir = highlight::DataDir->new;
+ $dir->initSearchDirectories('');
+ my ($ext2lang, $shebang) = _parse_filetypes($dir);
+ bless {
+ -dir => $dir,
+ -ext2lang => $ext2lang,
+ -shebang => $shebang,
+ }, $class;
+}
+
+sub _shebang2lang ($$) {
+ my ($self, $str) = @_;
+ my $shebang = $self->{-shebang};
+ foreach my $s (@$shebang) {
+ return $s->[0] if $$str =~ $s->[1];
+ }
+ undef;
+}
+
+sub _path2lang ($$) {
+ my ($self, $path) = @_;
+ my ($ext) = ($path =~ m!([^\\/\.]+)\z!);
+ $ext = lc($ext);
+ $self->{-ext2lang}->{$ext} || $ext;
+}
+
+sub do_hl {
+ my ($self, $str, $path) = @_;
+ my $lang = _path2lang($self, $path) if defined $path;
+ my $dir = $self->{-dir};
+ my $langpath;
+ if (defined $lang) {
+ $langpath = $dir->getLangPath("$lang.lang") or return;
+ $langpath = undef unless -f $langpath;
+ }
+ unless (defined $langpath) {
+ $lang = _shebang2lang($self, $str) or return;
+ $langpath = $dir->getLangPath("$lang.lang") or return;
+ $langpath = undef unless -f $langpath;
+ }
+ return unless defined $langpath;
+
+ my $gen = $self->{$langpath} ||= do {
+ my $g = highlight::CodeGenerator::getInstance($highlight::HTML);
+ $g->setFragmentCode(1); # generate html fragment
+ $g->setHTMLEnclosePreTag(1); # include <pre>
+
+ # whatever theme works
+ my $themepath = $dir->getThemePath('print.theme');
+ $g->initTheme($themepath);
+ $g->loadLanguage($langpath);
+ $g->setEncoding('utf-8');
+ $g;
+ };
+ \($gen->generateString($$str))
+}
+
+# SWIG instances aren't reference-counted, but $self is;
+# so we need to delete all the CodeGenerator instances manually
+# at our own destruction
+sub DESTROY {
+ my ($self) = @_;
+ foreach my $gen (values %$self) {
+ if (ref($gen) eq 'highlight::CodeGenerator') {
+ highlight::CodeGenerator::deleteInstance($gen);
+ }
+ }
+}
+
+1;
diff --git a/lib/PublicInbox/WwwHighlight.pm b/lib/PublicInbox/WwwHighlight.pm
new file mode 100644
index 0000000..3d6ca03
--- /dev/null
+++ b/lib/PublicInbox/WwwHighlight.pm
@@ -0,0 +1,73 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Standalone PSGI app to provide syntax highlighting as-a-service
+# via "highlight" Perl module ("libhighlight-perl" in Debian).
+#
+# This allows exposing highlight as a persistent HTTP service for
+# other scripts via HTTP PUT requests. PATH_INFO will be used
+# as a hint for detecting the language for highlight.
+#
+# The following example using curl(1) will do the right thing
+# regarding the file extension:
+#
+# curl -HExpect: -T /path/to/file http://example.com/
+#
+# You can also force a file extension by giving a path
+# (in this case, "c") via:
+#
+# curl -HExpect: -T /path/to/file http://example.com/x.c
+
+package PublicInbox::WwwHighlight;
+use strict;
+use warnings;
+use HTTP::Status qw(status_message);
+use parent qw(PublicInbox::HlMod);
+
+# TODO: support highlight(1) for distros which don't package the
+# SWIG extension. Also, there may be admins who don't want to
+# have ugly SWIG-generated code in a long-lived Perl process.
+
+sub r ($) {
+ my ($code) = @_;
+ my $msg = status_message($code);
+ my $len = length($msg);
+ [ $code, [qw(Content-Type text/plain Content-Length), $len], [$msg] ]
+}
+
+# another slurp API hogging up all my memory :<
+# This is capped by whatever the PSGI server allows,
+# $ENV{GIT_HTTP_MAX_REQUEST_BUFFER} for PublicInbox::HTTP (10 MB)
+sub read_in_full ($) {
+ my ($env) = @_;
+
+ my $in = $env->{'psgi.input'};
+ my $off = 0;
+ my $buf = '';
+ my $len = $env->{CONTENT_LENGTH} || 8192;
+ while (1) {
+ my $r = $in->read($buf, $len, $off);
+ last unless defined $r;
+ return \$buf if $r == 0;
+ $off += $r;
+ }
+ $env->{'psgi.errors'}->print("input read error: $!\n");
+}
+
+# entry point for PSGI
+sub call {
+ my ($self, $env) = @_;
+ my $req_method = $env->{REQUEST_METHOD};
+
+ return r(405) if $req_method ne 'PUT';
+
+ my $bref = read_in_full($env) or return r(500);
+ $bref = $self->do_hl($bref, $env->{PATH_INFO});
+
+ my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ];
+ push @$h, 'Content-Length', bytes::length($$bref);
+
+ [ 200, $h, [ $$bref ] ]
+}
+
+1;
diff --git a/t/hl_mod.t b/t/hl_mod.t
new file mode 100644
index 0000000..b8b8eb9
--- /dev/null
+++ b/t/hl_mod.t
@@ -0,0 +1,54 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+eval { require highlight } or
+ plan skip_all => 'failed to load highlight.pm';
+use_ok 'PublicInbox::HlMod';
+my $hls = PublicInbox::HlMod->new;
+ok($hls, 'initialized OK');
+is($hls->_shebang2lang(\"#!/usr/bin/perl -w\n"), 'perl', 'perl shebang OK');
+is($hls->{-ext2lang}->{'pm'}, 'perl', '.pm suffix OK');
+is($hls->{-ext2lang}->{'pl'}, 'perl', '.pl suffix OK');
+is($hls->_path2lang('Makefile'), 'make', 'Makefile OK');
+my $str = do { local $/; open(my $fh, __FILE__); <$fh> };
+my $orig = $str;
+
+{
+ my $ref = $hls->do_hl(\$str, 'foo.perl');
+ is(ref($ref), 'SCALAR', 'got a scalar reference back');
+ like($$ref, qr/I can see you!/, 'we can see ourselves in output');
+
+ use PublicInbox::Spawn qw(which);
+ if (eval { require IPC::Run } && which('w3m')) {
+ require File::Temp;
+ my $cmd = [ qw(w3m -T text/html -dump -config /dev/null) ];
+ my ($out, $err) = ('', '');
+ IPC::Run::run($cmd, $ref, \$out, \$err);
+ # expand tabs and normalize whitespace,
+ # w3m doesn't preserve tabs
+ $orig =~ s/\t/ /gs;
+ $out =~ s/\s*\z//sg;
+ $orig =~ s/\s*\z//sg;
+ is($out, $orig, 'w3m output matches');
+ }
+}
+
+my $nr = $ENV{TEST_MEMLEAK};
+if ($nr && -r "/proc/$$/status") {
+ my $fh;
+ open $fh, '<', "/proc/$$/status";
+ diag "starting at memtest at ".join('', grep(/VmRSS:/, <$fh>));
+ PublicInbox::HlMod->new->do_hl(\$orig) for (1..$nr);
+ open $fh, '<', "/proc/$$/status";
+ diag "creating $nr instances: ".join('', grep(/VmRSS:/, <$fh>));
+ my $hls = PublicInbox::HlMod->new;
+ $hls->do_hl(\$orig) for (1..$nr);
+ $hls = undef;
+ open $fh, '<', "/proc/$$/status";
+ diag "reused instance $nr times: ".join('', grep(/VmRSS:/, <$fh>));
+}
+
+done_testing;
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 36/37] hval: split out escape sequences to a separate table
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (34 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 35/37] highlight: initial wrapper and PSGI service Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
2019-01-21 20:52 ` [PATCH 37/37] t/check-www-inbox: trap SIGINT for File::Temp destruction Eric Wong
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
We'll want to handle those escape sequences independently,
"highlight" already does HTML escaping.
---
lib/PublicInbox/Hval.pm | 28 ++++++++++++++++------------
1 file changed, 16 insertions(+), 12 deletions(-)
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index 0315d75..4d70d5e 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -37,6 +37,21 @@ sub new_oneline {
$class->new($raw);
}
+# some of these overrides are standard C escapes so they're
+# easy-to-understand when rendered.
+my %escape_sequence = (
+ "\x00" => '\\0', # NUL
+ "\x07" => '\\a', # bell
+ "\x08" => '\\b', # backspace
+ "\x09" => "\t", # obvious to show as-is
+ "\x0a" => "\n", # obvious to show as-is
+ "\x0b" => '\\v', # vertical tab
+ "\x0c" => '\\f', # form feed
+ "\x0d" => '\\r', # carriage ret (not preceding \n)
+ "\x1b" => '^[', # ASCII escape (mutt seems to escape this way)
+ "\x7f" => '\\x7f', # DEL
+);
+
my %xhtml_map = (
'"' => '"',
'&' => '&',
@@ -46,18 +61,7 @@ my %xhtml_map = (
);
$xhtml_map{chr($_)} = sprintf('\\x%02x', $_) for (0..31);
-# some of these overrides are standard C escapes so they're
-# easy-to-understand when rendered.
-$xhtml_map{"\x00"} = '\\0'; # NUL
-$xhtml_map{"\x07"} = '\\a'; # bell
-$xhtml_map{"\x08"} = '\\b'; # backspace
-$xhtml_map{"\x09"} = "\t"; # obvious to show as-is
-$xhtml_map{"\x0a"} = "\n"; # obvious to show as-is
-$xhtml_map{"\x0b"} = '\\v'; # vertical tab
-$xhtml_map{"\x0c"} = '\\f'; # form feed
-$xhtml_map{"\x0d"} = '\\r'; # carriage ret (not preceding \n)
-$xhtml_map{"\x1b"} = '^['; # ASCII escape (mutt seems to escape this way)
-$xhtml_map{"\x7f"} = '\\x7f'; # DEL
+%xhtml_map = (%xhtml_map, %escape_sequence);
sub ascii_html {
my ($s) = @_;
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH 37/37] t/check-www-inbox: trap SIGINT for File::Temp destruction
2019-01-21 20:52 [PATCH 00/37] viewvcs: diff highlighting and more Eric Wong
` (35 preceding siblings ...)
2019-01-21 20:52 ` [PATCH 36/37] hval: split out escape sequences to a separate table Eric Wong
@ 2019-01-21 20:52 ` Eric Wong
36 siblings, 0 replies; 38+ messages in thread
From: Eric Wong @ 2019-01-21 20:52 UTC (permalink / raw)
To: meta
Otherwise, temporary GDBM files don't get unlinked
when I SIGINT the process.
---
t/check-www-inbox.perl | 2 ++
1 file changed, 2 insertions(+)
diff --git a/t/check-www-inbox.perl b/t/check-www-inbox.perl
index 6232f16..1e88e95 100644
--- a/t/check-www-inbox.perl
+++ b/t/check-www-inbox.perl
@@ -14,6 +14,7 @@ use POSIX qw(:sys_wait_h);
use Time::HiRes qw(gettimeofday tv_interval);
use WWW::Mechanize;
use Data::Dumper;
+our $tmp_owner = $$;
my $nproc = 4;
my $slow = 0.5;
my %opts = (
@@ -34,6 +35,7 @@ my $atom_check = eval {
} if $xmlstarlet;
my %workers;
+$SIG{INT} = sub { exit 130 };
$SIG{TERM} = sub { exit 0 };
$SIG{CHLD} = sub {
while (1) {
--
EW
^ permalink raw reply related [flat|nested] 38+ messages in thread