From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 2A3FD1F43D for ; Fri, 17 Mar 2023 20:31:38 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1679085098; bh=VxYn3cxK97Gm1pdsfYL4oDVs3DlEcNElJB5OYzw1G2Q=; h=From:To:Subject:Date:In-Reply-To:References:From; b=yKmOIyhOwtjmrMffc4EBh9yofYWqgarnoQyODvD7HpSBiiWJwghzdaQigVpNGaowD 9Iq5NXDpl6uUVI08erXQfcP1sEuFQvofHr5KTsIXHPcuk0+8YTwEEr3+l1APb3/rIg vGPD7XWok9L+JCKjCx/N8GKkeQGOrslSVRyuE0E0= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 1/2] treewide: move glob2re to PublicInbox::Config Date: Fri, 17 Mar 2023 20:31:36 +0000 Message-Id: <20230317203137.1373098-2-e@80x24.org> In-Reply-To: <20230317203137.1373098-1-e@80x24.org> References: <20230317203137.1373098-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: It seems suitable for the config class since globs are a config/option thing. --- lib/PublicInbox/Config.pm | 34 ++++++++++++++++++++++++++++++ lib/PublicInbox/LeiExternal.pm | 36 +------------------------------- lib/PublicInbox/LeiLsExternal.pm | 3 ++- lib/PublicInbox/LeiLsMailSync.pm | 3 ++- lib/PublicInbox/LeiMirror.pm | 6 +++--- t/config.t | 18 ++++++++++++++-- t/lei_external.t | 20 ++---------------- 7 files changed, 60 insertions(+), 60 deletions(-) diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index cdf06d85..34abcea3 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -10,6 +10,8 @@ package PublicInbox::Config; use strict; use v5.10.1; +use parent qw(Exporter); +our @EXPORT_OK = qw(glob2re); use PublicInbox::Inbox; use PublicInbox::Spawn qw(popen_rd); our $LD_PRELOAD = $ENV{LD_PRELOAD}; # only valid at startup @@ -577,4 +579,36 @@ sub squote_maybe ($) { $val; } +my %re_map = ( '*' => '[^/]*?', '?' => '[^/]', + '[' => '[', ']' => ']', ',' => ',' ); + +sub glob2re ($) { + my ($re) = @_; + my $p = ''; + my $in_bracket = 0; + my $qm = 0; + my $schema_host_port = ''; + + # don't glob URL-looking things that look like IPv6 + if ($re =~ s!\A([a-z0-9\+]+://\[[a-f0-9\:]+\](?::[0-9]+)?/)!!i) { + $schema_host_port = quotemeta $1; # "http://[::1]:1234" + } + my $changes = ($re =~ s!(.)! + $re_map{$p eq '\\' ? '' : do { + if ($1 eq '[') { ++$in_bracket } + elsif ($1 eq ']') { --$in_bracket } + elsif ($1 eq ',') { ++$qm } # no change + $p = $1; + }} // do { + $p = $1; + ($p eq '-' && $in_bracket) ? $p : (++$qm, "\Q$p") + }!sge); + # bashism (also supported by curl): {a,b,c} => (a|b|c) + $changes += ($re =~ s/([^\\]*)\\\{([^,]*,[^\\]*)\\\}/ + (my $in_braces = $2) =~ tr!,!|!; + $1."($in_braces)"; + /sge); + ($changes - $qm) ? $schema_host_port.$re : undef; +} + 1; diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm index a6562e7f..3e2a2288 100644 --- a/lib/PublicInbox/LeiExternal.pm +++ b/lib/PublicInbox/LeiExternal.pm @@ -5,7 +5,7 @@ package PublicInbox::LeiExternal; use strict; use v5.10.1; -use PublicInbox::Config; +use PublicInbox::Config qw(glob2re); sub externals_each { my ($self, $cb, @arg) = @_; @@ -44,40 +44,6 @@ sub ext_canonicalize { } } -# TODO: we will probably extract glob2re into a separate module for -# PublicInbox::Filter::Base and maybe other places -my %re_map = ( '*' => '[^/]*?', '?' => '[^/]', - '[' => '[', ']' => ']', ',' => ',' ); - -sub glob2re { - my $re = $_[-1]; # $_[0] may be $lei - my $p = ''; - my $in_bracket = 0; - my $qm = 0; - my $schema_host_port = ''; - - # don't glob URL-looking things that look like IPv6 - if ($re =~ s!\A([a-z0-9\+]+://\[[a-f0-9\:]+\](?::[0-9]+)?/)!!i) { - $schema_host_port = quotemeta $1; # "http://[::1]:1234" - } - my $changes = ($re =~ s!(.)! - $re_map{$p eq '\\' ? '' : do { - if ($1 eq '[') { ++$in_bracket } - elsif ($1 eq ']') { --$in_bracket } - elsif ($1 eq ',') { ++$qm } # no change - $p = $1; - }} // do { - $p = $1; - ($p eq '-' && $in_bracket) ? $p : (++$qm, "\Q$p") - }!sge); - # bashism (also supported by curl): {a,b,c} => (a|b|c) - $changes += ($re =~ s/([^\\]*)\\\{([^,]*,[^\\]*)\\\}/ - (my $in_braces = $2) =~ tr!,!|!; - $1."($in_braces)"; - /sge); - ($changes - $qm) ? $schema_host_port.$re : undef; -} - # get canonicalized externals list matching $loc # $is_exclude denotes it's for --exclude # otherwise it's for --only/--include is assumed diff --git a/lib/PublicInbox/LeiLsExternal.pm b/lib/PublicInbox/LeiLsExternal.pm index e624cbd4..2cdd0c4d 100644 --- a/lib/PublicInbox/LeiLsExternal.pm +++ b/lib/PublicInbox/LeiLsExternal.pm @@ -5,6 +5,7 @@ package PublicInbox::LeiLsExternal; use strict; use v5.10.1; +use PublicInbox::Config qw(glob2re); # TODO: does this need JSON output? sub lei_ls_external { @@ -12,7 +13,7 @@ sub lei_ls_external { my $do_glob = !$lei->{opt}->{globoff}; # glob by default my ($OFS, $ORS) = $lei->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n"); $filter //= '*'; - my $re = $do_glob ? $lei->glob2re($filter) : undef; + my $re = $do_glob ? glob2re($filter) : undef; $re .= '/?\\z' if defined $re; $re //= index($filter, '/') < 0 ? qr!/\Q$filter\E/?\z! : # exact basename match diff --git a/lib/PublicInbox/LeiLsMailSync.pm b/lib/PublicInbox/LeiLsMailSync.pm index 8da0c284..1400d488 100644 --- a/lib/PublicInbox/LeiLsMailSync.pm +++ b/lib/PublicInbox/LeiLsMailSync.pm @@ -6,12 +6,13 @@ package PublicInbox::LeiLsMailSync; use strict; use v5.10.1; use PublicInbox::LeiMailSync; +use PublicInbox::Config qw(glob2re); sub lei_ls_mail_sync { my ($lei, $filter) = @_; my $lms = $lei->lms or return; my $opt = $lei->{opt}; - my $re = $opt->{globoff} ? undef : $lei->glob2re($filter // '*'); + my $re = $opt->{globoff} ? undef : glob2re($filter // '*'); $re .= '/?\\z' if defined $re; $re //= index($filter, '/') < 0 ? qr!/\Q$filter\E/?\z! : # exact basename match diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index 18932cf4..c8d28eba 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -14,7 +14,7 @@ use File::Spec (); use Fcntl qw(SEEK_SET O_CREAT O_EXCL O_WRONLY); use Carp qw(croak); use URI; -use PublicInbox::Config; +use PublicInbox::Config qw(glob2re); use PublicInbox::Inbox; use PublicInbox::Git; use PublicInbox::LeiCurl; @@ -983,7 +983,7 @@ sub multi_inbox ($$$) { my @orig = defined($incl // $excl) ? (keys %$v2, @v1) : (); if (defined $incl) { my $re = '(?:'.join('\\z|', map { - $self->{lei}->glob2re($_) // qr/\A\Q$_\E/ + glob2re($_) // qr/\A\Q$_\E/ } @$incl).'\\z)'; my @gone = delete @$v2{grep(!/$re/, keys %$v2)}; delete @$m{map { @$_ } @gone} and $self->{chg}->{manifest} = 1; @@ -992,7 +992,7 @@ sub multi_inbox ($$$) { } if (defined $excl) { my $re = '(?:'.join('\\z|', map { - $self->{lei}->glob2re($_) // qr/\A\Q$_\E/ + glob2re($_) // qr/\A\Q$_\E/ } @$excl).'\\z)'; my @gone = delete @$v2{grep(/$re/, keys %$v2)}; delete @$m{map { @$_ } @gone} and $self->{chg}->{manifest} = 1; diff --git a/t/config.t b/t/config.t index ba83e63f..d67931da 100644 --- a/t/config.t +++ b/t/config.t @@ -1,7 +1,6 @@ # Copyright (C) all contributors # License: AGPL-3.0+ -use strict; -use v5.10.1; +use v5.12; use PublicInbox::TestCommon; use PublicInbox::Import; use_ok 'PublicInbox'; @@ -260,5 +259,20 @@ EOF is($cfg->urlmatch('imap.idleInterval', $url), undef, 'urlmatch miss'); }; +my $glob2re = PublicInbox::Config->can('glob2re'); +is($glob2re->('http://[::1]:1234/foo/'), undef, 'IPv6 URL not globbed'); +is($glob2re->('foo'), undef, 'plain string unchanged'); +is_deeply($glob2re->('[f-o]'), '[f-o]' , 'range accepted'); +is_deeply($glob2re->('*'), '[^/]*?' , 'wildcard accepted'); +is_deeply($glob2re->('{a,b,c}'), '(a|b|c)' , 'braces'); +is_deeply($glob2re->('{,b,c}'), '(|b|c)' , 'brace with empty @ start'); +is_deeply($glob2re->('{a,b,}'), '(a|b|)' , 'brace with empty @ end'); +is_deeply($glob2re->('{a}'), undef, 'ungrouped brace'); +is_deeply($glob2re->('{a'), undef, 'open left brace'); +is_deeply($glob2re->('a}'), undef, 'open right brace'); +is_deeply($glob2re->('*.[ch]'), '[^/]*?\\.[ch]', 'suffix glob'); +is_deeply($glob2re->('{[a-z],9,}'), '([a-z]|9|)' , 'brace with range'); +is_deeply($glob2re->('\\{a,b\\}'), undef, 'escaped brace'); +is_deeply($glob2re->('\\\\{a,b}'), '\\\\\\\\(a|b)', 'fake escape brace'); done_testing(); diff --git a/t/lei_external.t b/t/lei_external.t index 51d0af5c..573cbc60 100644 --- a/t/lei_external.t +++ b/t/lei_external.t @@ -1,8 +1,8 @@ #!perl -w -# Copyright (C) 2020-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # internal unit test, see t/lei-externals.t for functional tests -use strict; use v5.10.1; use Test::More; +use v5.12; use Test::More; my $cls = 'PublicInbox::LeiExternal'; require_ok $cls; my $canon = $cls->can('ext_canonicalize'); @@ -16,20 +16,4 @@ is($canon->('/this/path/is/nonexistent/'), '/this/path/is/nonexistent', is($canon->('/this//path/'), '/this/path', 'extra slashes gone'); is($canon->('/ALL/CAPS'), '/ALL/CAPS', 'caps preserved'); -my $glob2re = $cls->can('glob2re'); -is($glob2re->('http://[::1]:1234/foo/'), undef, 'IPv6 URL not globbed'); -is($glob2re->('foo'), undef, 'plain string unchanged'); -is_deeply($glob2re->('[f-o]'), '[f-o]' , 'range accepted'); -is_deeply($glob2re->('*'), '[^/]*?' , 'wildcard accepted'); -is_deeply($glob2re->('{a,b,c}'), '(a|b|c)' , 'braces'); -is_deeply($glob2re->('{,b,c}'), '(|b|c)' , 'brace with empty @ start'); -is_deeply($glob2re->('{a,b,}'), '(a|b|)' , 'brace with empty @ end'); -is_deeply($glob2re->('{a}'), undef, 'ungrouped brace'); -is_deeply($glob2re->('{a'), undef, 'open left brace'); -is_deeply($glob2re->('a}'), undef, 'open right brace'); -is_deeply($glob2re->('*.[ch]'), '[^/]*?\\.[ch]', 'suffix glob'); -is_deeply($glob2re->('{[a-z],9,}'), '([a-z]|9|)' , 'brace with range'); -is_deeply($glob2re->('\\{a,b\\}'), undef, 'escaped brace'); -is_deeply($glob2re->('\\\\{a,b}'), '\\\\\\\\(a|b)', 'fake escape brace'); - done_testing;