unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] linkify: avoid digits and dashes in placeholders
Date: Sun, 28 Aug 2022 03:59:50 +0000	[thread overview]
Message-ID: <20220828035950.10620-1-e@80x24.org> (raw)

The `highlight' module seems to highlight every digit in
YAML (and possibly other) source files.  This causes problems
in linkify_2 which replaces the placeholders with proper URIs.
I suspect `-' and other punctuation characters will cause
similar problems, so we must stick to [A-Za-z].

Thus transliterate 0-9 to A-J in the hex key to ensure highlight
doesn't see digit characters, and rename the prefix to be
project-name independent.
---
 lib/PublicInbox/Linkify.pm | 27 +++++++++++++--------------
 t/linkify.t                |  5 +++++
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index 2ac74e2a..9fc3128f 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # two-step linkification.
@@ -11,7 +11,7 @@
 # Maybe this could be done more efficiently...
 package PublicInbox::Linkify;
 use strict;
-use warnings;
+use v5.10.1;
 use Digest::SHA qw/sha1_hex/;
 use PublicInbox::Hval qw(ascii_html mid_href);
 use PublicInbox::MID qw($MID_EXTRACT);
@@ -68,23 +68,22 @@ sub linkify_1 {
 		# salt this, as this could be exploited to show
 		# links in the HTML which don't show up in the raw mail.
 		my $key = sha1_hex($url . $SALT);
-
+		$key =~ tr/0-9/A-J/; # no digits for YAML highlight
 		$_[0]->{$key} = $url;
-		$beg . 'PI-LINK-'. $key . $end;
+		$beg . 'LINKIFY' . $key . $end;
 	^geo;
 	$_[1];
 }
 
 sub linkify_2 {
-	# Added "PI-LINK-" prefix to avoid false-positives on git commits
-	$_[1] =~ s!\bPI-LINK-([a-f0-9]{40})\b!
+	# Added "LINKIFY" prefix to avoid false-positives on git commits
+	$_[1] =~ s!\bLINKIFY([a-fA-J]{40})\b!
 		my $key = $1;
 		my $url = $_[0]->{$key};
 		if (defined $url) {
 			"<a\nhref=\"$url\">$url</a>";
-		} else {
-			# false positive or somebody tried to mess with us
-			$key;
+		} else { # false positive or somebody tried to mess with us
+			'LINKIFY'.$key;
 		}
 	!ge;
 	$_[1];
@@ -102,20 +101,20 @@ sub linkify_mids {
 		# salt this, as this could be exploited to show
 		# links in the HTML which don't show up in the raw mail.
 		my $key = sha1_hex($html . $SALT);
+		$key =~ tr/0-9/A-J/;
 		my $repl = qq(&lt;<a\nhref="$pfx/$href/">$html</a>&gt;);
 		$repl .= qq{ (<a\nhref="$pfx/$href/raw">raw</a>)} if $raw;
 		$self->{$key} = $repl;
-		'PI-LINK-'. $key;
+		'LINKIFY'.$key;
 		!ge;
 	$$str = ascii_html($$str);
-	$$str =~ s!\bPI-LINK-([a-f0-9]{40})\b!
+	$$str =~ s!\bLINKIFY([a-fA-J]{40})\b!
 		my $key = $1;
 		my $repl = $_[0]->{$key};
 		if (defined $repl) {
 			$repl;
-		} else {
-			# false positive or somebody tried to mess with us
-			$key;
+		} else { # false positive or somebody tried to mess with us
+			'LINKIFY'.$key;
 		}
 	!ge;
 }
diff --git a/t/linkify.t b/t/linkify.t
index e42e1efe..9280fd91 100644
--- a/t/linkify.t
+++ b/t/linkify.t
@@ -144,4 +144,9 @@ href="http://www.$hc.example.com/">http://www.$hc.example.com/</a>};
 	is($s, $expect, 'IDN message escaped properly');
 }
 
+{
+	my $false_positive = 'LINKIFY'.('A' x 40);
+	is(PublicInbox::Linkify->new->to_html($false_positive),
+		$false_positive, 'false-positive left as-is');
+}
 done_testing();

                 reply	other threads:[~2022-08-28  3:59 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220828035950.10620-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).