* [PATCH 1/6] git: workaround occasional -watch error message
2024-01-09 11:39 [PATCH 0/6] WWW updates Eric Wong
@ 2024-01-09 11:39 ` Eric Wong
2024-01-09 11:39 ` [PATCH 2/6] doc: txt2pre: linkify -extindex(1), dedupe -config(5) Eric Wong
` (4 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2024-01-09 11:39 UTC (permalink / raw)
To: meta
I'm not sure how this happens (perl 5.34.1 on FreeBSD 13.2)
but it appears the {sock} check can succeed and then go undef
and become unable to call ->owner_pid.
This happens when libgit2 is in use, so perhaps that's a factor.
In any case, the rest of the tests succeed.
---
lib/PublicInbox/Git.pm | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 6c4fcf93..7b991c6b 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -208,8 +208,17 @@ sub cat_async_retry ($$) {
sub gcf_inflight ($) {
my ($self) = @_;
+ # FIXME: the first {sock} check can succeed but Perl can complain
+ # about calling ->owner_pid on an undefined value. Not sure why or
+ # how this happens but t/imapd.t can complain about it, sometimes.
if ($self->{sock}) {
- return $self->{inflight} if $self->{sock}->owner_pid == $$;
+ if (eval { $self->{sock}->owner_pid == $$ }) {
+ return $self->{inflight};
+ } elsif ($@) {
+ no warnings 'uninitialized';
+ warn "E: $self sock=$self->{sock}: owner_pid failed: ".
+ "$@ (continuing...)";
+ }
delete @$self{qw(sock inflight)};
} else {
$self->close;
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/6] doc: txt2pre: linkify -extindex(1), dedupe -config(5)
2024-01-09 11:39 [PATCH 0/6] WWW updates Eric Wong
2024-01-09 11:39 ` [PATCH 1/6] git: workaround occasional -watch error message Eric Wong
@ 2024-01-09 11:39 ` Eric Wong
2024-01-09 11:39 ` [PATCH 3/6] test_common: key2sub: don't require final ';' in scripts Eric Wong
` (3 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2024-01-09 11:39 UTC (permalink / raw)
To: meta
I noticed the HTML manpages didn't have -extindex linkification
while checking over the docs. While adding it, I also noticed
-config(5) had two entries :x
---
Documentation/txt2pre | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Documentation/txt2pre b/Documentation/txt2pre
index 89a77199..b45c52e8 100755
--- a/Documentation/txt2pre
+++ b/Documentation/txt2pre
@@ -53,10 +53,10 @@ for (qw[lei(1)
public-inbox-cindex(1)
public-inbox-clone(1)
public-inbox-config(5)
- public-inbox-config(5)
public-inbox-convert(1)
public-inbox-daemon(8)
public-inbox-edit(1)
+ public-inbox-extindex(1)
public-inbox-fetch(1)
public-inbox-glossary(7)
public-inbox-httpd(1)
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 3/6] test_common: key2sub: don't require final ';' in scripts
2024-01-09 11:39 [PATCH 0/6] WWW updates Eric Wong
2024-01-09 11:39 ` [PATCH 1/6] git: workaround occasional -watch error message Eric Wong
2024-01-09 11:39 ` [PATCH 2/6] doc: txt2pre: linkify -extindex(1), dedupe -config(5) Eric Wong
@ 2024-01-09 11:39 ` Eric Wong
2024-01-09 11:39 ` [PATCH 4/6] git: lowercase host in host_prefix_url Eric Wong
` (2 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2024-01-09 11:39 UTC (permalink / raw)
To: meta
I noticed this when I wrote a new (but probably unnecessary) *.t
test and `make check-run' failed since I omitted the final
semi-colon after `done_testing'.
---
lib/PublicInbox/TestCommon.pm | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index d20bff28..25caaaa9 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -342,7 +342,7 @@ use subs qw(exit);
sub main {
# the below "line" directive is a magic comment, see perlsyn(1) manpage
# line 1 "$f"
-$str
+{ $str }
0;
}
1;
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 4/6] git: lowercase host in host_prefix_url
2024-01-09 11:39 [PATCH 0/6] WWW updates Eric Wong
` (2 preceding siblings ...)
2024-01-09 11:39 ` [PATCH 3/6] test_common: key2sub: don't require final ';' in scripts Eric Wong
@ 2024-01-09 11:39 ` Eric Wong
2024-01-09 11:39 ` [PATCH 5/6] www: linkify inbox addresses in To/Cc headers Eric Wong
2024-01-09 11:39 ` [PATCH 6/6] www: use autodie in more coderepo places Eric Wong
5 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2024-01-09 11:39 UTC (permalink / raw)
To: meta
This will make it more effective for use as a cache key.
I'm not entirely happy with this sub being in the Git module
since it's used by lei and command-line tools, but that's
for another day to deal with...
---
lib/PublicInbox/Git.pm | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 7b991c6b..f125b029 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -505,7 +505,7 @@ sub host_prefix_url ($$) {
my $host_port = $env->{HTTP_HOST} //
"$env->{SERVER_NAME}:$env->{SERVER_PORT}";
my $sn = $env->{SCRIPT_NAME} // '';
- "$env->{'psgi.url_scheme'}://$host_port$sn/$url";
+ "$env->{'psgi.url_scheme'}://\L$host_port\E$sn/$url";
}
sub base_url { # for coderepos, PSGI-only
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 5/6] www: linkify inbox addresses in To/Cc headers
2024-01-09 11:39 [PATCH 0/6] WWW updates Eric Wong
` (3 preceding siblings ...)
2024-01-09 11:39 ` [PATCH 4/6] git: lowercase host in host_prefix_url Eric Wong
@ 2024-01-09 11:39 ` Eric Wong
2024-01-09 12:49 ` [PATCH 7/6] address: avoid [ undef, undef ] address pairs Eric Wong
2024-01-09 11:39 ` [PATCH 6/6] www: use autodie in more coderepo places Eric Wong
5 siblings, 1 reply; 8+ messages in thread
From: Eric Wong @ 2024-01-09 11:39 UTC (permalink / raw)
To: meta
This makes it easier to discover contemporary messages
crossposted to other groups within the same WWW instance.
The internal cache is necessary for giant threads, and the
expiry mechanism is necessary to prevent attackers from
trivially OOM-ing.
---
lib/PublicInbox/SearchView.pm | 2 +-
lib/PublicInbox/View.pm | 70 +++++++++++++++++++++++++++++++----
2 files changed, 64 insertions(+), 8 deletions(-)
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 8f851738..2d3e942c 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -322,7 +322,7 @@ EOM
# link $INBOX_DIR/description text to "recent" view around
# the newest message in this result set:
- $ctx->{-t_max} = max(map { delete $_->{ts} } @$msgs);
+ $ctx->{-t_max} = max(map { $_->{ts} } @$msgs);
@$msgs = reverse @$msgs if $r;
$ctx->{msgs} = $msgs;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 02b93d7b..39ec35c3 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -38,7 +38,7 @@ sub msg_page_i {
: $ctx->gone('over');
$ctx->{mhref} = ($ctx->{nr} || $ctx->{smsg}) ?
"../${\mid_href($smsg->{mid})}/" : '';
- if (_msg_page_prepare($eml, $ctx)) {
+ if (_msg_page_prepare($eml, $ctx, $smsg->{ts})) {
$eml->each_part(\&add_text_body, $ctx, 1);
print { $ctx->{zfh} } '</pre><hr>';
}
@@ -183,6 +183,59 @@ sub nr_to_s ($$$) {
$nr == 1 ? "$nr $singular" : "$nr $plural";
}
+sub addr2urlmap ($) {
+ my ($ctx) = @_;
+ # cache makes a huge difference with /[tT] and large threads
+ my $key = PublicInbox::Git::host_prefix_url($ctx->{env}, '');
+ my $ent = $ctx->{www}->{pi_cfg}->{-addr2urlmap}->{$key} // do {
+ my $by_addr = $ctx->{www}->{pi_cfg}->{-by_addr};
+ my (%addr2url, $url);
+ while (my ($addr, $ibx) = each %$by_addr) {
+ $url = $ibx->base_url // $ibx->base_url($ctx->{env});
+ $addr2url{$addr} = ascii_html($url) if defined $url;
+ }
+ # don't allow attackers to randomly change Host: headers
+ # and OOM us if the server handles all hostnames:
+ my $tmp = $ctx->{www}->{pi_cfg}->{-addr2urlmap};
+ my @k = keys %$tmp; # random order
+ delete @$tmp{@k[0..3]} if scalar(@k) > 7;
+ my $re = join('|', map { quotemeta } keys %addr2url);
+ $tmp->{$key} = [ qr/\b($re)\b/i, \%addr2url ];
+ };
+ @$ent;
+}
+
+sub to_cc_html ($$$$) {
+ my ($ctx, $eml, $field, $t) = @_;
+ my @vals = $eml->header($field) or return ('', 0);
+ my (undef, $addr2url) = addr2urlmap($ctx);
+ my $pairs = PublicInbox::Address::pairs(join(', ', @vals));
+ my ($len, $line_len, $html) = (0, 0, '');
+ my ($pair, $url);
+ my ($cur_ibx, $env) = @$ctx{qw(ibx env)};
+ # avoid excessive ascii_html calls (already hot in profiles):
+ my @html = split /\n/, ascii_html(join("\n", map {
+ $_->[0] // (split(/\@/, $_->[1]))[0]; # addr user if no name
+ } @$pairs));
+ for my $n (@html) {
+ $pair = shift @$pairs;
+ if ($line_len) { # 9 = display width of ",\t":
+ if ($line_len + length($n) > COLS - 9) {
+ $html .= ",\n\t";
+ $len += $line_len;
+ $line_len = 0;
+ } else {
+ $html .= ', ';
+ $line_len += 2;
+ }
+ }
+ $line_len += length($n);
+ $url = $addr2url->{lc $pair->[1]};
+ $html .= $url ? qq(<a\nhref="$url$t">$n</a>) : $n;
+ }
+ ($html, $len + $line_len);
+}
+
# Displays the text of of the message for /$INBOX/$MSGID/[Tt]/ endpoint
# this is already inside a <pre>
sub eml_entry {
@@ -207,7 +260,8 @@ sub eml_entry {
my $ds = delete $smsg->{ds}; # for v1 non-Xapian/SQLite users
# Deleting these fields saves about 400K as we iterate across 1K msgs
- delete @$smsg{qw(ts blob)};
+ my ($t, undef) = delete @$smsg{qw(ts blob)};
+ $t = $t ? '?t='.ts2str($t) : '';
my $from = _hdr_names_html($eml, 'From');
obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
@@ -216,9 +270,8 @@ sub eml_entry {
my $mhref = $upfx . mid_href($mid_raw) . '/';
$rv .= qq{ (<a\nhref="$mhref">permalink</a> / };
$rv .= qq{<a\nhref="${mhref}raw">raw</a>)\n};
- my $to = fold_addresses(_hdr_names_html($eml, 'To'));
- my $cc = fold_addresses(_hdr_names_html($eml, 'Cc'));
- my ($tlen, $clen) = (length($to), length($cc));
+ my ($to, $tlen) = to_cc_html($ctx, $eml, 'To', $t);
+ my ($cc, $clen) = to_cc_html($ctx, $eml, 'Cc', $t);
my $to_cc = '';
if (($tlen + $clen) > COLS) {
$to_cc .= ' To: '.$to."\n" if $tlen;
@@ -447,7 +500,7 @@ sub thread_html {
# link $INBOX_DIR/description text to "index_topics" view around
# the newest message in this thread
- my $t = ts2str($ctx->{-t_max} = max(map { delete $_->{ts} } @$msgs));
+ my $t = ts2str($ctx->{-t_max} = max(map { $_->{ts} } @$msgs));
my $t_fmt = fmt_ts($ctx->{-t_max});
my $skel = '<hr><pre>';
@@ -613,7 +666,7 @@ sub add_text_body { # callback for each_part
}
sub _msg_page_prepare {
- my ($eml, $ctx) = @_;
+ my ($eml, $ctx, $ts) = @_;
my $have_over = !!$ctx->{ibx}->over;
my $mids = mids_for_index($eml);
my $nr = $ctx->{nr}++;
@@ -649,6 +702,9 @@ href="d/">diff</a>)</pre><pre>];
$title[0] = $subj[0] // '(no subject)';
$hbuf .= "Date: $_\n" for $eml->header('Date');
$hbuf = ascii_html($hbuf);
+ my $t = $ts ? '?t='.ts2str($ts) : '';
+ my ($re, $addr2url) = addr2urlmap($ctx);
+ $hbuf =~ s!$re!qq(<a\nhref=").$addr2url->{lc $1}.qq($t">$1</a>)!sge;
$ctx->{-title_html} = ascii_html(join(' - ', @title));
if (my $obfs_ibx = $ctx->{-obfs_ibx}) {
obfuscate_addrs($obfs_ibx, $hbuf);
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 7/6] address: avoid [ undef, undef ] address pairs
2024-01-09 11:39 ` [PATCH 5/6] www: linkify inbox addresses in To/Cc headers Eric Wong
@ 2024-01-09 12:49 ` Eric Wong
0 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2024-01-09 12:49 UTC (permalink / raw)
To: meta
For totally bogus things in address fields, we'll fall back to
showing the original entry in the name column when using
Email::Address::XS.
The pure Perl version differs here, but we'll just let them be
different when it comes to handling bogus data.
---
This fixes some warning spew I noticed in syslog while letting
crawlers run on https://yhbt.net/lore/
lib/PublicInbox/Address.pm | 7 +++++--
lib/PublicInbox/View.pm | 2 +-
t/address.t | 4 ++++
3 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/lib/PublicInbox/Address.pm b/lib/PublicInbox/Address.pm
index a5902cfd..3a59945c 100644
--- a/lib/PublicInbox/Address.pm
+++ b/lib/PublicInbox/Address.pm
@@ -19,8 +19,11 @@ sub xs_names {
}
sub xs_pairs { # for JMAP, RFC 8621 section 4.1.2.3
- [ map { # LHS (name) may be undef
- [ $_->phrase // $_->comment, $_->address ]
+ [ map { # LHS (name) may be undef if there's an address
+ my @p = ($_->phrase // $_->comment, $_->address);
+ # show original if totally bogus:
+ $p[0] = $_->original unless defined $p[1];
+ \@p;
} parse_email_addresses($_[0]) ];
}
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 39ec35c3..9d4262c1 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -230,7 +230,7 @@ sub to_cc_html ($$$$) {
}
}
$line_len += length($n);
- $url = $addr2url->{lc $pair->[1]};
+ $url = $addr2url->{lc($pair->[1] // '')};
$html .= $url ? qq(<a\nhref="$url$t">$n</a>) : $n;
}
($html, $len + $line_len);
diff --git a/t/address.t b/t/address.t
index 16000d2d..86f47395 100644
--- a/t/address.t
+++ b/t/address.t
@@ -77,6 +77,10 @@ sub test_pkg {
is_deeply([], \@emails , 'no address for local address');
@names = $emails->('Local User <user>');
is_deeply([], \@names, 'no address, no name');
+
+ my $p = $pairs->('NAME, a@example, wtf@');
+ is scalar(grep { defined($_->[0] // $_->[1]) } @$p),
+ scalar(@$p), 'something is always defined in bogus pairs';
}
test_pkg('PublicInbox::Address');
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 6/6] www: use autodie in more coderepo places
2024-01-09 11:39 [PATCH 0/6] WWW updates Eric Wong
` (4 preceding siblings ...)
2024-01-09 11:39 ` [PATCH 5/6] www: linkify inbox addresses in To/Cc headers Eric Wong
@ 2024-01-09 11:39 ` Eric Wong
5 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2024-01-09 11:39 UTC (permalink / raw)
To: meta
This cuts down on code somewhat (before I add more :x)
---
lib/PublicInbox/ViewVCS.pm | 11 ++++-------
lib/PublicInbox/WwwCoderepo.pm | 5 +++--
2 files changed, 7 insertions(+), 9 deletions(-)
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index be062f36..3d835289 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -28,6 +28,7 @@ use PublicInbox::Eml;
use Text::Wrap qw(wrap);
use PublicInbox::Hval qw(ascii_html to_filename prurl utf8_maybe);
use POSIX qw(strftime);
+use autodie qw(open);
my $hl = eval {
require PublicInbox::HlMod;
PublicInbox::HlMod->new;
@@ -154,8 +155,7 @@ sub show_commit_start { # ->psgi_qx callback
}
my $patchid = (split(/ /, $$bref))[0]; # ignore commit
$ctx->{-q_value_html} = "patchid:$patchid" if defined $patchid;
- open my $fh, '<', "$ctx->{-tmp}/h" or
- die "open $ctx->{-tmp}/h: $!";
+ open my $fh, '<', "$ctx->{-tmp}/h";
chop(my $buf = do { local $/ = "\0"; <$fh> });
utf8_maybe($buf); # non-UTF-8 commits exist
chomp $buf;
@@ -244,7 +244,7 @@ committer $co
EOM
print $zfh "\n", $ctx->{-linkify}->to_html($bdy) if length($bdy);
$bdy = '';
- open my $fh, '<', "$ctx->{-tmp}/p" or die "open $ctx->{-tmp}/p: $!";
+ open my $fh, '<', "$ctx->{-tmp}/p";
if (-s $fh > $MAX_SIZE) {
print $zfh "---\n patch is too large to show\n";
} else { # prepare flush_diff:
@@ -599,10 +599,7 @@ sub show ($$;$) {
}
$ctx->{fn} = $fn;
$ctx->{-tmp} = File::Temp->newdir("solver.$oid_b-XXXX", TMPDIR => 1);
- unless ($ctx->{lh}) {
- open $ctx->{lh}, '+>>', "$ctx->{-tmp}/solve.log" or
- die "open: $!";
- }
+ $ctx->{lh} or open $ctx->{lh}, '+>>', "$ctx->{-tmp}/solve.log";
my $solver = PublicInbox::SolverGit->new($ctx->{ibx},
\&solve_result, $ctx);
$solver->{gits} //= [ $ctx->{git} ];
diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm
index 3814f719..4ab9a77c 100644
--- a/lib/PublicInbox/WwwCoderepo.pm
+++ b/lib/PublicInbox/WwwCoderepo.pm
@@ -23,6 +23,7 @@ use PublicInbox::RepoList;
use PublicInbox::OnDestroy;
use URI::Escape qw(uri_escape_utf8);
use File::Spec;
+use autodie qw(fcntl open);
my @EACH_REF = (qw(git for-each-ref --sort=-creatordate),
"--format=%(HEAD)%00".join('%00', map { "%($_)" }
@@ -81,11 +82,11 @@ sub new {
$self->{$_} = 10 for qw(summary_log);
# try reuse STDIN if it's already /dev/null
- open $self->{log_fh}, '+>', '/dev/null' or die "open: $!";
+ open $self->{log_fh}, '+>', '/dev/null';
my @l = stat($self->{log_fh}) or die "stat: $!";
my @s = stat(STDIN) or die "stat(STDIN): $!";
if ("@l[0, 1]" eq "@s[0, 1]") {
- my $f = fcntl(STDIN, F_GETFL, 0) // die "F_GETFL: $!";
+ my $f = fcntl(STDIN, F_GETFL, 0);
$self->{log_fh} = *STDIN{IO} if $f & O_RDWR;
}
$self;
^ permalink raw reply related [flat|nested] 8+ messages in thread