From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id E95691F677 for ; Tue, 9 Jan 2024 11:39:29 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1704800370; bh=D4bL/kt8x2xwuJN2gP50tsPVU0bDjeV5V31BlKjCuOE=; h=From:To:Subject:Date:In-Reply-To:References:From; b=2LAVaFFcsR7fDcjmLwNzmuGps3Ko2HYTtmve5L5SmXCuKbgUkufGR5stQ7V8uzB7f U4ncNWF5tF5DGbSGl/g6Pn7hPoSdnCGAWGWvAn5cPXnMoCNjv5w1yfCvXH7aq5nN62 oAxrDjhTD40OMQOgsOjPGIpp8+ExenhWtM+Qns94= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 5/6] www: linkify inbox addresses in To/Cc headers Date: Tue, 9 Jan 2024 11:39:27 +0000 Message-Id: <20240109113928.992464-6-e@80x24.org> In-Reply-To: <20240109113928.992464-1-e@80x24.org> References: <20240109113928.992464-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This makes it easier to discover contemporary messages crossposted to other groups within the same WWW instance. The internal cache is necessary for giant threads, and the expiry mechanism is necessary to prevent attackers from trivially OOM-ing. --- lib/PublicInbox/SearchView.pm | 2 +- lib/PublicInbox/View.pm | 70 +++++++++++++++++++++++++++++++---- 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 8f851738..2d3e942c 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -322,7 +322,7 @@ EOM # link $INBOX_DIR/description text to "recent" view around # the newest message in this result set: - $ctx->{-t_max} = max(map { delete $_->{ts} } @$msgs); + $ctx->{-t_max} = max(map { $_->{ts} } @$msgs); @$msgs = reverse @$msgs if $r; $ctx->{msgs} = $msgs; diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 02b93d7b..39ec35c3 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -38,7 +38,7 @@ sub msg_page_i { : $ctx->gone('over'); $ctx->{mhref} = ($ctx->{nr} || $ctx->{smsg}) ? "../${\mid_href($smsg->{mid})}/" : ''; - if (_msg_page_prepare($eml, $ctx)) { + if (_msg_page_prepare($eml, $ctx, $smsg->{ts})) { $eml->each_part(\&add_text_body, $ctx, 1); print { $ctx->{zfh} } '
'; } @@ -183,6 +183,59 @@ sub nr_to_s ($$$) { $nr == 1 ? "$nr $singular" : "$nr $plural"; } +sub addr2urlmap ($) { + my ($ctx) = @_; + # cache makes a huge difference with /[tT] and large threads + my $key = PublicInbox::Git::host_prefix_url($ctx->{env}, ''); + my $ent = $ctx->{www}->{pi_cfg}->{-addr2urlmap}->{$key} // do { + my $by_addr = $ctx->{www}->{pi_cfg}->{-by_addr}; + my (%addr2url, $url); + while (my ($addr, $ibx) = each %$by_addr) { + $url = $ibx->base_url // $ibx->base_url($ctx->{env}); + $addr2url{$addr} = ascii_html($url) if defined $url; + } + # don't allow attackers to randomly change Host: headers + # and OOM us if the server handles all hostnames: + my $tmp = $ctx->{www}->{pi_cfg}->{-addr2urlmap}; + my @k = keys %$tmp; # random order + delete @$tmp{@k[0..3]} if scalar(@k) > 7; + my $re = join('|', map { quotemeta } keys %addr2url); + $tmp->{$key} = [ qr/\b($re)\b/i, \%addr2url ]; + }; + @$ent; +} + +sub to_cc_html ($$$$) { + my ($ctx, $eml, $field, $t) = @_; + my @vals = $eml->header($field) or return ('', 0); + my (undef, $addr2url) = addr2urlmap($ctx); + my $pairs = PublicInbox::Address::pairs(join(', ', @vals)); + my ($len, $line_len, $html) = (0, 0, ''); + my ($pair, $url); + my ($cur_ibx, $env) = @$ctx{qw(ibx env)}; + # avoid excessive ascii_html calls (already hot in profiles): + my @html = split /\n/, ascii_html(join("\n", map { + $_->[0] // (split(/\@/, $_->[1]))[0]; # addr user if no name + } @$pairs)); + for my $n (@html) { + $pair = shift @$pairs; + if ($line_len) { # 9 = display width of ",\t": + if ($line_len + length($n) > COLS - 9) { + $html .= ",\n\t"; + $len += $line_len; + $line_len = 0; + } else { + $html .= ', '; + $line_len += 2; + } + } + $line_len += length($n); + $url = $addr2url->{lc $pair->[1]}; + $html .= $url ? qq($n) : $n; + } + ($html, $len + $line_len); +} + # Displays the text of of the message for /$INBOX/$MSGID/[Tt]/ endpoint # this is already inside a
 sub eml_entry {
@@ -207,7 +260,8 @@ sub eml_entry {
 	my $ds = delete $smsg->{ds}; # for v1 non-Xapian/SQLite users
 
 	# Deleting these fields saves about 400K as we iterate across 1K msgs
-	delete @$smsg{qw(ts blob)};
+	my ($t, undef) = delete @$smsg{qw(ts blob)};
+	$t = $t ? '?t='.ts2str($t) : '';
 
 	my $from = _hdr_names_html($eml, 'From');
 	obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
@@ -216,9 +270,8 @@ sub eml_entry {
 	my $mhref = $upfx . mid_href($mid_raw) . '/';
 	$rv .= qq{ (permalink / };
 	$rv .= qq{raw)\n};
-	my $to = fold_addresses(_hdr_names_html($eml, 'To'));
-	my $cc = fold_addresses(_hdr_names_html($eml, 'Cc'));
-	my ($tlen, $clen) = (length($to), length($cc));
+	my ($to, $tlen) = to_cc_html($ctx, $eml, 'To', $t);
+	my ($cc, $clen) = to_cc_html($ctx, $eml, 'Cc', $t);
 	my $to_cc = '';
 	if (($tlen + $clen) > COLS) {
 		$to_cc .= '  To: '.$to."\n" if $tlen;
@@ -447,7 +500,7 @@ sub thread_html {
 
 	# link $INBOX_DIR/description text to "index_topics" view around
 	# the newest message in this thread
-	my $t = ts2str($ctx->{-t_max} = max(map { delete $_->{ts} } @$msgs));
+	my $t = ts2str($ctx->{-t_max} = max(map { $_->{ts} } @$msgs));
 	my $t_fmt = fmt_ts($ctx->{-t_max});
 
 	my $skel = '
';
@@ -613,7 +666,7 @@ sub add_text_body { # callback for each_part
 }
 
 sub _msg_page_prepare {
-	my ($eml, $ctx) = @_;
+	my ($eml, $ctx, $ts) = @_;
 	my $have_over = !!$ctx->{ibx}->over;
 	my $mids = mids_for_index($eml);
 	my $nr = $ctx->{nr}++;
@@ -649,6 +702,9 @@ href="d/">diff)
];
 	$title[0] = $subj[0] // '(no subject)';
 	$hbuf .= "Date: $_\n" for $eml->header('Date');
 	$hbuf = ascii_html($hbuf);
+	my $t = $ts ? '?t='.ts2str($ts) : '';
+	my ($re, $addr2url) = addr2urlmap($ctx);
+	$hbuf =~ s!$re!qq({lc $1}.qq($t">$1)!sge;
 	$ctx->{-title_html} = ascii_html(join(' - ', @title));
 	if (my $obfs_ibx = $ctx->{-obfs_ibx}) {
 		obfuscate_addrs($obfs_ibx, $hbuf);