From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: AS63949 64.71.152.0/24 X-Spam-Status: No, score=-2.2 required=3.0 tests=BAYES_00,RCVD_IN_DNSWL_NONE, RDNS_NONE,SPF_HELO_PASS,SPF_PASS shortcircuit=no autolearn=no autolearn_force=no version=3.4.0 Received: from 80x24.org (unknown [64.71.152.64]) by dcvr.yhbt.net (Postfix) with ESMTP id 350D81F404 for ; Thu, 21 Dec 2017 01:00:12 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] view: avoid deduping a single word in subject skeletons Date: Thu, 21 Dec 2017 01:00:12 +0000 Message-Id: <20171221010012.23680-1-e@80x24.org> List-Id: It is usually pointless to replace a single word with a '"' character. --- lib/PublicInbox/View.pm | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 41f1321..81e83d9 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -755,6 +755,25 @@ sub _msg_date { sub fmt_ts { POSIX::strftime('%Y-%m-%d %k:%M', gmtime($_[0])) } +sub dedupe_subject { + my ($prev_subj, $subj, $val) = @_; + + my $omit = ''; # '"' denotes identical text omitted + my (@prev_pop, @curr_pop); + while (@$prev_subj && @$subj && $subj->[-1] eq $prev_subj->[-1]) { + push(@prev_pop, pop(@$prev_subj)); + push(@curr_pop, pop(@$subj)); + $omit ||= $val; + } + pop @$subj if @$subj && $subj->[-1] =~ /^re:\s*/i; + if (scalar(@curr_pop) == 1) { + $omit = ''; + push @$prev_subj, @prev_pop; + push @$subj, @curr_pop; + } + $omit; +} + sub skel_dump { my ($ctx, $level, $node) = @_; my $smsg = $node->{smsg} or return _skel_ghost($ctx, $level, $node); @@ -798,13 +817,7 @@ sub skel_dump { # so we do not show redundant text at the end. my $prev_subj = $ctx->{prev_subj} || []; $ctx->{prev_subj} = [ @subj ]; - my $omit = ''; # '"' denotes identical text omitted - while (@$prev_subj && @subj && $subj[-1] eq $prev_subj->[-1]) { - pop @$prev_subj; - pop @subj; - $omit ||= '" '; - } - pop @subj if @subj && $subj[-1] =~ /^re:\s*/i; + my $omit = dedupe_subject($prev_subj, \@subj, '" '); my $end; if (@subj) { my $subj = join(' ', @subj); @@ -944,14 +957,7 @@ sub dump_topics { $mid = delete $seen->{$subj}; my @subj = split(/ /, $srch->subject_normalized($subj)); my @next_prev = @subj; # full copy - my $omit = ''; # '"' denotes identical text omitted - while (@$prev_subj && @subj && - $subj[-1] eq $prev_subj->[-1]) { - pop @$prev_subj; - pop @subj; - $omit ||= ' "'; - } - pop @subj if @subj && $subj[-1] =~ /^re:\s*/i; + my $omit = dedupe_subject($prev_subj, \@subj, ' "'); $prev_subj = \@next_prev; $subj = ascii_html(join(' ', @subj)); obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx; -- EW