From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-2.8 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, T_RP_MATCHES_RCVD shortcircuit=no autolearn=unavailable version=3.3.2 X-Original-To: meta@public-inbox.org Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 77BCA63384D for ; Sat, 3 Oct 2015 11:14:13 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/4] rename mid_compress to id_compress Date: Sat, 3 Oct 2015 11:14:09 +0000 Message-Id: <20151003111411.1003-3-e@80x24.org> In-Reply-To: <20151003111411.1003-1-e@80x24.org> References: <20151003111411.1003-1-e@80x24.org> List-Id: We use it as a general compressor for identifiers such as subject paths, so using the "mid_" prefix probably is not appropriate. --- lib/PublicInbox/MID.pm | 19 +++++++------------ lib/PublicInbox/Search.pm | 6 +++--- lib/PublicInbox/SearchIdx.pm | 4 ++-- lib/PublicInbox/View.pm | 4 ++-- t/view.t | 11 +++++------ 5 files changed, 19 insertions(+), 25 deletions(-) diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm index 677a9d5..3d404ef 100644 --- a/lib/PublicInbox/MID.pm +++ b/lib/PublicInbox/MID.pm @@ -4,7 +4,7 @@ package PublicInbox::MID; use strict; use warnings; use base qw/Exporter/; -our @EXPORT_OK = qw/mid_clean mid_compress mid2path/; +our @EXPORT_OK = qw/mid_clean id_compress mid2path/; use Digest::SHA qw/sha1_hex/; use constant MID_MAX => 40; # SHA-1 hex length @@ -19,18 +19,13 @@ sub mid_clean { } # this is idempotent -sub mid_compress { - my ($mid, $force) = @_; +sub id_compress { + my ($id, $force) = @_; - # XXX dirty hack! FIXME! - # Some HTTP servers (apache2 2.2.22-13+deb7u5 on my system) - # apparently do not handle "%25" in the URL path component correctly. - # I'm not yet sure if it's something weird with my rewrite rules - # or what; will need to debug... - return sha1_hex($mid) if (index($mid, '%') >= 0); - - return $mid if (!$force && length($mid) <= MID_MAX); - sha1_hex($mid); + if ($force || $id =~ /[^\w\-]/ || length($id) > MID_MAX) { + return sha1_hex($id); + } + $id; } sub mid2path { diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 1d13f4b..fbc6882 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -14,7 +14,7 @@ use constant LINES => 3; # :lines as defined in RFC 3977 use Search::Xapian qw/:standard/; use PublicInbox::SearchMsg; use Email::MIME; -use PublicInbox::MID qw/mid_clean mid_compress/; +use PublicInbox::MID qw/mid_clean id_compress/; # This is English-only, everything else is non-standard and may be confused as # a prefix common in patch emails @@ -25,7 +25,7 @@ use constant { # SCHEMA_VERSION history # 0 - initial # 1 - subject_path is lower-cased - # 2 - subject_path is mid_compress in the index, only + # 2 - subject_path is id_compress in the index, only # 3 - message-ID is compressed if it includes '%' (hack!) # 4 - change "Re: " normalization, avoid circular Reference ghosts # 5 - subject_path drops trailing '.' @@ -104,7 +104,7 @@ sub get_thread { return { total => 0, msgs => [] } unless $smsg; my $qtid = Search::Xapian::Query->new(xpfx('thread').$smsg->thread_id); - my $path = mid_compress($smsg->path); + my $path = id_compress($smsg->path); my $qsub = Search::Xapian::Query->new(xpfx('path').$path); my $query = Search::Xapian::Query->new(OP_OR, $qtid, $qsub); $self->do_enquire($query, $opts); diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index f98ba3e..8184dc7 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -5,7 +5,7 @@ package PublicInbox::SearchIdx; use strict; use warnings; use base qw(PublicInbox::Search); -use PublicInbox::MID qw/mid_clean mid_compress/; +use PublicInbox::MID qw/mid_clean id_compress/; *xpfx = *PublicInbox::Search::xpfx; use constant { @@ -81,7 +81,7 @@ sub add_message { if ($subj ne '') { my $path = $self->subject_path($subj); - $doc->add_term(xpfx('path') . mid_compress($path)); + $doc->add_term(xpfx('path') . id_compress($path)); } add_val($doc, &PublicInbox::Search::TS, $smsg->ts); diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index ccdcde2..c9be770 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -9,7 +9,7 @@ use Encode qw/find_encoding/; use Encode::MIME::Header; use Email::MIME::ContentType qw/parse_content_type/; use PublicInbox::Hval; -use PublicInbox::MID qw/mid_clean mid_compress mid2path/; +use PublicInbox::MID qw/mid_clean id_compress mid2path/; use Digest::SHA qw/sha1_hex/; my $SALT = rand; require POSIX; @@ -586,7 +586,7 @@ sub anchor_for { my ($msgid) = @_; my $id = $msgid; if ($id !~ /\A[a-f0-9]{40}\z/) { - $id = mid_compress(mid_clean($id), 1); + $id = id_compress(mid_clean($id), 1); } 'm' . $id; } diff --git a/t/view.t b/t/view.t index 325f509..568ab30 100644 --- a/t/view.t +++ b/t/view.t @@ -145,13 +145,12 @@ EOF like($html, qr/\bhi = bye\b/, "HTML output decoded QP"); } - -{ # XXX dirty hack - use PublicInbox::MID qw/mid_compress/; - like(mid_compress('foo%bar@wtf'), qr/\A[a-f0-9]{40}\z/, +{ + use PublicInbox::MID qw/id_compress/; + like(id_compress('foo%bar@wtf'), qr/\A[a-f0-9]{40}\z/, "percent always converted to sha1 to workaround buggy httpds"); - is(mid_compress('foobar@wtf'), 'foobar@wtf', - 'regular MID not compressed'); + is(id_compress('foobar-wtf'), 'foobar-wtf', + 'regular ID not compressed'); } done_testing(); -- EW