From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-2.4 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, RP_MATCHES_RCVD shortcircuit=no autolearn=no version=3.3.2 X-Original-To: meta@public-inbox.org Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 1C5A21F42D; Mon, 15 Sep 2014 02:46:13 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Cc: Eric Wong Subject: [PATCH] view: support SHA-1 of Message-IDs for message links Date: Mon, 15 Sep 2014 02:46:12 +0000 Message-Id: <1410749172-20012-1-git-send-email-e@80x24.org> X-Mailer: git-send-email 2.1.0.243.g30d45f7 List-Id: Some Message-IDs are crazy long, so support SHA-1s for them instead. This allows shorter URLs to be generated and are less likely However, we'll still favor short Message-IDs whenever possible. --- lib/PublicInbox/Hval.pm | 22 +++++++++++++++------- lib/PublicInbox/WWW.pm | 16 +++++++++++----- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm index a93e8c2..9be163a 100644 --- a/lib/PublicInbox/Hval.pm +++ b/lib/PublicInbox/Hval.pm @@ -5,28 +5,36 @@ package PublicInbox::Hval; use strict; use warnings; -use fields qw(raw); +use fields qw(raw href); use Encode qw(find_encoding); use URI::Escape qw(uri_escape_utf8); my $enc_ascii = find_encoding('us-ascii'); sub new { - my ($class, $raw) = @_; + my ($class, $raw, $href) = @_; my $self = fields::new($class); # we never care about leading/trailing whitespace $raw =~ s/\A\s*//; $raw =~ s/\s*\z//; $self->{raw} = $raw; + $self->{href} = defined $href ? $href : $raw; $self; } sub new_msgid { - my ($class, $raw) = @_; - $raw =~ s/\A\z//; - $class->new($raw); + my ($class, $msgid) = @_; + $msgid =~ s/\A\s*?\s*\z//; + + if (length($msgid) <= 40) { + $class->new($msgid); + } else { + require Digest::SHA; + my $hex = Digest::SHA::sha1_hex($msgid); + $class->new($msgid, $hex); + } } sub new_oneline { @@ -52,7 +60,7 @@ sub ascii_html { } sub as_html { ascii_html($_[0]->{raw}) } -sub as_href { ascii_html(uri_escape_utf8($_[0]->{raw})) } +sub as_href { ascii_html(uri_escape_utf8($_[0]->{href})) } sub raw { if (defined $_[1]) { diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index f67e72c..1814286 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -130,13 +130,19 @@ sub get_index { # just returns a string ref for the blob in the current ctx sub mid2blob { my ($ctx) = @_; - require Digest::SHA; - my $hex = Digest::SHA::sha1_hex($ctx->{mid}); - $hex =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/i or - die "BUG: not a SHA-1 hex: $hex"; + my $hex = $ctx->{mid}; + my ($x2, $x38) = ($hex =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/); + + unless (defined $x38) { + # compatibility with old links + require Digest::SHA; + $hex = Digest::SHA::sha1_hex($hex); + ($x2, $x38) = ($hex =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/); + defined $x38 or die "BUG: not a SHA-1 hex: $hex"; + } my @cmd = ('git', "--git-dir=$ctx->{git_dir}", - qw(cat-file blob), "HEAD:$1/$2"); + qw(cat-file blob), "HEAD:$x2/$x38"); my $cmd = join(' ', @cmd); my $pid = open my $fh, '-|'; defined $pid or die "fork failed: $!\n"; -- EW