From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id DC3051F54E for ; Sun, 28 Aug 2022 03:59:50 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1661659190; bh=CRlGGmmy5b9yA+RUtIjgTdqLjuK4ntaDN12BYbOmnyE=; h=From:To:Subject:Date:From; b=F2o0hWU3dlguU5ogtyTcFwcp628X7b++/+FFHnaD+5bD/FEppDY2PYzFpvVOohR0Y nA/JrdwPhKjGFCo/KSnDb0Yrk9iKVvMuK1CazUbzsp43RcBzcmKgIno7S1HBjlWmhr y2tCEVu8DMXOsqTC9GeY1E/P53jtxhaAJaB8NCkg= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] linkify: avoid digits and dashes in placeholders Date: Sun, 28 Aug 2022 03:59:50 +0000 Message-Id: <20220828035950.10620-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: The `highlight' module seems to highlight every digit in YAML (and possibly other) source files. This causes problems in linkify_2 which replaces the placeholders with proper URIs. I suspect `-' and other punctuation characters will cause similar problems, so we must stick to [A-Za-z]. Thus transliterate 0-9 to A-J in the hex key to ensure highlight doesn't see digit characters, and rename the prefix to be project-name independent. --- lib/PublicInbox/Linkify.pm | 27 +++++++++++++-------------- t/linkify.t | 5 +++++ 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index 2ac74e2a..9fc3128f 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2014-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # two-step linkification. @@ -11,7 +11,7 @@ # Maybe this could be done more efficiently... package PublicInbox::Linkify; use strict; -use warnings; +use v5.10.1; use Digest::SHA qw/sha1_hex/; use PublicInbox::Hval qw(ascii_html mid_href); use PublicInbox::MID qw($MID_EXTRACT); @@ -68,23 +68,22 @@ sub linkify_1 { # salt this, as this could be exploited to show # links in the HTML which don't show up in the raw mail. my $key = sha1_hex($url . $SALT); - + $key =~ tr/0-9/A-J/; # no digits for YAML highlight $_[0]->{$key} = $url; - $beg . 'PI-LINK-'. $key . $end; + $beg . 'LINKIFY' . $key . $end; ^geo; $_[1]; } sub linkify_2 { - # Added "PI-LINK-" prefix to avoid false-positives on git commits - $_[1] =~ s!\bPI-LINK-([a-f0-9]{40})\b! + # Added "LINKIFY" prefix to avoid false-positives on git commits + $_[1] =~ s!\bLINKIFY([a-fA-J]{40})\b! my $key = $1; my $url = $_[0]->{$key}; if (defined $url) { "$url"; - } else { - # false positive or somebody tried to mess with us - $key; + } else { # false positive or somebody tried to mess with us + 'LINKIFY'.$key; } !ge; $_[1]; @@ -102,20 +101,20 @@ sub linkify_mids { # salt this, as this could be exploited to show # links in the HTML which don't show up in the raw mail. my $key = sha1_hex($html . $SALT); + $key =~ tr/0-9/A-J/; my $repl = qq(<$html>); $repl .= qq{ (raw)} if $raw; $self->{$key} = $repl; - 'PI-LINK-'. $key; + 'LINKIFY'.$key; !ge; $$str = ascii_html($$str); - $$str =~ s!\bPI-LINK-([a-f0-9]{40})\b! + $$str =~ s!\bLINKIFY([a-fA-J]{40})\b! my $key = $1; my $repl = $_[0]->{$key}; if (defined $repl) { $repl; - } else { - # false positive or somebody tried to mess with us - $key; + } else { # false positive or somebody tried to mess with us + 'LINKIFY'.$key; } !ge; } diff --git a/t/linkify.t b/t/linkify.t index e42e1efe..9280fd91 100644 --- a/t/linkify.t +++ b/t/linkify.t @@ -144,4 +144,9 @@ href="http://www.$hc.example.com/">http://www.$hc.example.com/}; is($s, $expect, 'IDN message escaped properly'); } +{ + my $false_positive = 'LINKIFY'.('A' x 40); + is(PublicInbox::Linkify->new->to_html($false_positive), + $false_positive, 'false-positive left as-is'); +} done_testing();