* [PATCH] linkify: implement Markdown link compatibility
@ 2016-12-06 23:01 Eric Wong
2016-12-06 23:17 ` Eric Wong
0 siblings, 1 reply; 3+ messages in thread
From: Eric Wong @ 2016-12-06 23:01 UTC (permalink / raw)
To: meta
Although unescaped parentheses in URLs are technically allowed,
they are uncommon. However, Markdown-like syntaxes are
unfortunately common for URLs, so we might as well support them.
---
lib/PublicInbox/Linkify.pm | 15 +++++++++++----
t/linkify.t | 9 +++++++++
2 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index ea7fd71..cc0f7e3 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -15,7 +15,7 @@ use warnings;
use Digest::SHA qw/sha1_hex/;
my $SALT = rand;
-my $LINK_RE = qr{\b((?:ftps?|https?|nntps?|gopher)://
+my $LINK_RE = qr{(\()?\b((?:ftps?|https?|nntps?|gopher)://
[\@:\w\.-]+/
(?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*)
(?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)?
@@ -27,15 +27,22 @@ sub new { bless {}, shift }
sub linkify_1 {
my ($self, $s) = @_;
$s =~ s!$LINK_RE!
- my $url = $1;
+ my $beg = $1 || '';
+ my $url = $2;
my $end = '';
+ # Markdown compatibility:
+ if ($beg eq '(') {
+ $url =~ s/\)\z//;
+ $end = ')';
+ }
+
# it's fairly common to end URLs in messages with
# '.', ',' or ';' to denote the end of a statement;
# assume the intent was to end the statement/sentence
# in English
if ($url =~ s/([\.,;])\z//) {
- $end = $1;
+ $end = $1 . $end;
}
# salt this, as this could be exploited to show
@@ -45,7 +52,7 @@ sub linkify_1 {
# only escape ampersands, others do not match LINK_RE
$url =~ s/&/&/g;
$self->{$key} = $url;
- 'PI-LINK-'. $key . $end;
+ $beg . 'PI-LINK-'. $key . $end;
!ge;
$s;
}
diff --git a/t/linkify.t b/t/linkify.t
index 49cbbd6..a794c78 100644
--- a/t/linkify.t
+++ b/t/linkify.t
@@ -57,4 +57,13 @@ use PublicInbox::Linkify;
is($s, qq(hello <a\nhref="$u">$u</a> world), "root + fragment");
}
+{
+ my $l = PublicInbox::Linkify->new;
+ my $u = 'http://example.com/';
+ my $s = "[markdown]($u)";
+ $s = $l->linkify_1($s);
+ $s = $l->linkify_2($s);
+ is($s, qq![markdown](<a\nhref="$u">$u</a>)!, 'markdown compatible');
+}
+
done_testing();
--
EW
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] linkify: implement Markdown link compatibility
2016-12-06 23:01 [PATCH] linkify: implement Markdown link compatibility Eric Wong
@ 2016-12-06 23:17 ` Eric Wong
2016-12-06 23:42 ` [PATCH] linkify: implement Markdown link compatibility (again) Eric Wong
0 siblings, 1 reply; 3+ messages in thread
From: Eric Wong @ 2016-12-06 23:17 UTC (permalink / raw)
To: meta
Oops, reverted :x v2 coming.
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH] linkify: implement Markdown link compatibility (again)
2016-12-06 23:17 ` Eric Wong
@ 2016-12-06 23:42 ` Eric Wong
0 siblings, 0 replies; 3+ messages in thread
From: Eric Wong @ 2016-12-06 23:42 UTC (permalink / raw)
To: meta
Although unescaped parentheses in URLs are technically allowed,
they are uncommon. However, Markdown-like syntaxes are
unfortunately common for URLs, so we might as well support them.
This fixes parentheses detection at sentence endings, as seen
in practice on emails.
---
lib/PublicInbox/Linkify.pm | 14 ++++++++++----
t/linkify.t | 22 ++++++++++++++++++++++
2 files changed, 32 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index ea7fd71..acd2a47 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -15,7 +15,7 @@ use warnings;
use Digest::SHA qw/sha1_hex/;
my $SALT = rand;
-my $LINK_RE = qr{\b((?:ftps?|https?|nntps?|gopher)://
+my $LINK_RE = qr{(\()?\b((?:ftps?|https?|nntps?|gopher)://
[\@:\w\.-]+/
(?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*)
(?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)?
@@ -27,14 +27,20 @@ sub new { bless {}, shift }
sub linkify_1 {
my ($self, $s) = @_;
$s =~ s!$LINK_RE!
- my $url = $1;
+ my $beg = $1 || '';
+ my $url = $2;
my $end = '';
# it's fairly common to end URLs in messages with
# '.', ',' or ';' to denote the end of a statement;
# assume the intent was to end the statement/sentence
# in English
- if ($url =~ s/([\.,;])\z//) {
+ # Markdown compatibility:
+ if ($beg eq '(') {
+ if ($url =~ s/(\)[\.,;]?)\z//) {
+ $end = $1;
+ }
+ } elsif ($url =~ s/([\.,;])\z//) {
$end = $1;
}
@@ -45,7 +51,7 @@ sub linkify_1 {
# only escape ampersands, others do not match LINK_RE
$url =~ s/&/&/g;
$self->{$key} = $url;
- 'PI-LINK-'. $key . $end;
+ $beg . 'PI-LINK-'. $key . $end;
!ge;
$s;
}
diff --git a/t/linkify.t b/t/linkify.t
index 49cbbd6..99acf17 100644
--- a/t/linkify.t
+++ b/t/linkify.t
@@ -57,4 +57,26 @@ use PublicInbox::Linkify;
is($s, qq(hello <a\nhref="$u">$u</a> world), "root + fragment");
}
+# Markdown compatibility
+{
+ my $l = PublicInbox::Linkify->new;
+ my $u = 'http://example.com/';
+ my $s = "[markdown]($u)";
+ $s = $l->linkify_1($s);
+ $s = $l->linkify_2($s);
+ is($s, qq![markdown](<a\nhref="$u">$u</a>)!, 'Markdown-compatible');
+
+ $s = qq![markdown]($u "title")!;
+ $s = $l->linkify_1($s);
+ $s = $l->linkify_2($s);
+ is($s, qq![markdown](<a\nhref="$u">$u</a> "title")!,
+ 'Markdown title compatible');
+
+ $s = qq![markdown]($u).!;
+ $s = $l->linkify_1($s);
+ $s = $l->linkify_2($s);
+ is($s, qq![markdown](<a\nhref="$u">$u</a>).!,
+ 'Markdown-compatible end of sentence');
+}
+
done_testing();
--
EW
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2016-12-06 23:42 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-12-06 23:01 [PATCH] linkify: implement Markdown link compatibility Eric Wong
2016-12-06 23:17 ` Eric Wong
2016-12-06 23:42 ` [PATCH] linkify: implement Markdown link compatibility (again) Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).