* [PATCH 1/2] treewide: move glob2re to PublicInbox::Config
2023-03-17 20:31 [PATCH 0/2] globbing updates Eric Wong
@ 2023-03-17 20:31 ` Eric Wong
2023-03-17 20:31 ` [PATCH 2/2] config: glob2re supports `**' to match multiple path components Eric Wong
1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2023-03-17 20:31 UTC (permalink / raw)
To: meta
It seems suitable for the config class since globs are a
config/option thing.
---
lib/PublicInbox/Config.pm | 34 ++++++++++++++++++++++++++++++
lib/PublicInbox/LeiExternal.pm | 36 +-------------------------------
lib/PublicInbox/LeiLsExternal.pm | 3 ++-
lib/PublicInbox/LeiLsMailSync.pm | 3 ++-
lib/PublicInbox/LeiMirror.pm | 6 +++---
t/config.t | 18 ++++++++++++++--
t/lei_external.t | 20 ++----------------
7 files changed, 60 insertions(+), 60 deletions(-)
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index cdf06d85..34abcea3 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -10,6 +10,8 @@
package PublicInbox::Config;
use strict;
use v5.10.1;
+use parent qw(Exporter);
+our @EXPORT_OK = qw(glob2re);
use PublicInbox::Inbox;
use PublicInbox::Spawn qw(popen_rd);
our $LD_PRELOAD = $ENV{LD_PRELOAD}; # only valid at startup
@@ -577,4 +579,36 @@ sub squote_maybe ($) {
$val;
}
+my %re_map = ( '*' => '[^/]*?', '?' => '[^/]',
+ '[' => '[', ']' => ']', ',' => ',' );
+
+sub glob2re ($) {
+ my ($re) = @_;
+ my $p = '';
+ my $in_bracket = 0;
+ my $qm = 0;
+ my $schema_host_port = '';
+
+ # don't glob URL-looking things that look like IPv6
+ if ($re =~ s!\A([a-z0-9\+]+://\[[a-f0-9\:]+\](?::[0-9]+)?/)!!i) {
+ $schema_host_port = quotemeta $1; # "http://[::1]:1234"
+ }
+ my $changes = ($re =~ s!(.)!
+ $re_map{$p eq '\\' ? '' : do {
+ if ($1 eq '[') { ++$in_bracket }
+ elsif ($1 eq ']') { --$in_bracket }
+ elsif ($1 eq ',') { ++$qm } # no change
+ $p = $1;
+ }} // do {
+ $p = $1;
+ ($p eq '-' && $in_bracket) ? $p : (++$qm, "\Q$p")
+ }!sge);
+ # bashism (also supported by curl): {a,b,c} => (a|b|c)
+ $changes += ($re =~ s/([^\\]*)\\\{([^,]*,[^\\]*)\\\}/
+ (my $in_braces = $2) =~ tr!,!|!;
+ $1."($in_braces)";
+ /sge);
+ ($changes - $qm) ? $schema_host_port.$re : undef;
+}
+
1;
diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index a6562e7f..3e2a2288 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -5,7 +5,7 @@
package PublicInbox::LeiExternal;
use strict;
use v5.10.1;
-use PublicInbox::Config;
+use PublicInbox::Config qw(glob2re);
sub externals_each {
my ($self, $cb, @arg) = @_;
@@ -44,40 +44,6 @@ sub ext_canonicalize {
}
}
-# TODO: we will probably extract glob2re into a separate module for
-# PublicInbox::Filter::Base and maybe other places
-my %re_map = ( '*' => '[^/]*?', '?' => '[^/]',
- '[' => '[', ']' => ']', ',' => ',' );
-
-sub glob2re {
- my $re = $_[-1]; # $_[0] may be $lei
- my $p = '';
- my $in_bracket = 0;
- my $qm = 0;
- my $schema_host_port = '';
-
- # don't glob URL-looking things that look like IPv6
- if ($re =~ s!\A([a-z0-9\+]+://\[[a-f0-9\:]+\](?::[0-9]+)?/)!!i) {
- $schema_host_port = quotemeta $1; # "http://[::1]:1234"
- }
- my $changes = ($re =~ s!(.)!
- $re_map{$p eq '\\' ? '' : do {
- if ($1 eq '[') { ++$in_bracket }
- elsif ($1 eq ']') { --$in_bracket }
- elsif ($1 eq ',') { ++$qm } # no change
- $p = $1;
- }} // do {
- $p = $1;
- ($p eq '-' && $in_bracket) ? $p : (++$qm, "\Q$p")
- }!sge);
- # bashism (also supported by curl): {a,b,c} => (a|b|c)
- $changes += ($re =~ s/([^\\]*)\\\{([^,]*,[^\\]*)\\\}/
- (my $in_braces = $2) =~ tr!,!|!;
- $1."($in_braces)";
- /sge);
- ($changes - $qm) ? $schema_host_port.$re : undef;
-}
-
# get canonicalized externals list matching $loc
# $is_exclude denotes it's for --exclude
# otherwise it's for --only/--include is assumed
diff --git a/lib/PublicInbox/LeiLsExternal.pm b/lib/PublicInbox/LeiLsExternal.pm
index e624cbd4..2cdd0c4d 100644
--- a/lib/PublicInbox/LeiLsExternal.pm
+++ b/lib/PublicInbox/LeiLsExternal.pm
@@ -5,6 +5,7 @@
package PublicInbox::LeiLsExternal;
use strict;
use v5.10.1;
+use PublicInbox::Config qw(glob2re);
# TODO: does this need JSON output?
sub lei_ls_external {
@@ -12,7 +13,7 @@ sub lei_ls_external {
my $do_glob = !$lei->{opt}->{globoff}; # glob by default
my ($OFS, $ORS) = $lei->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
$filter //= '*';
- my $re = $do_glob ? $lei->glob2re($filter) : undef;
+ my $re = $do_glob ? glob2re($filter) : undef;
$re .= '/?\\z' if defined $re;
$re //= index($filter, '/') < 0 ?
qr!/\Q$filter\E/?\z! : # exact basename match
diff --git a/lib/PublicInbox/LeiLsMailSync.pm b/lib/PublicInbox/LeiLsMailSync.pm
index 8da0c284..1400d488 100644
--- a/lib/PublicInbox/LeiLsMailSync.pm
+++ b/lib/PublicInbox/LeiLsMailSync.pm
@@ -6,12 +6,13 @@ package PublicInbox::LeiLsMailSync;
use strict;
use v5.10.1;
use PublicInbox::LeiMailSync;
+use PublicInbox::Config qw(glob2re);
sub lei_ls_mail_sync {
my ($lei, $filter) = @_;
my $lms = $lei->lms or return;
my $opt = $lei->{opt};
- my $re = $opt->{globoff} ? undef : $lei->glob2re($filter // '*');
+ my $re = $opt->{globoff} ? undef : glob2re($filter // '*');
$re .= '/?\\z' if defined $re;
$re //= index($filter, '/') < 0 ?
qr!/\Q$filter\E/?\z! : # exact basename match
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index 18932cf4..c8d28eba 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -14,7 +14,7 @@ use File::Spec ();
use Fcntl qw(SEEK_SET O_CREAT O_EXCL O_WRONLY);
use Carp qw(croak);
use URI;
-use PublicInbox::Config;
+use PublicInbox::Config qw(glob2re);
use PublicInbox::Inbox;
use PublicInbox::Git;
use PublicInbox::LeiCurl;
@@ -983,7 +983,7 @@ sub multi_inbox ($$$) {
my @orig = defined($incl // $excl) ? (keys %$v2, @v1) : ();
if (defined $incl) {
my $re = '(?:'.join('\\z|', map {
- $self->{lei}->glob2re($_) // qr/\A\Q$_\E/
+ glob2re($_) // qr/\A\Q$_\E/
} @$incl).'\\z)';
my @gone = delete @$v2{grep(!/$re/, keys %$v2)};
delete @$m{map { @$_ } @gone} and $self->{chg}->{manifest} = 1;
@@ -992,7 +992,7 @@ sub multi_inbox ($$$) {
}
if (defined $excl) {
my $re = '(?:'.join('\\z|', map {
- $self->{lei}->glob2re($_) // qr/\A\Q$_\E/
+ glob2re($_) // qr/\A\Q$_\E/
} @$excl).'\\z)';
my @gone = delete @$v2{grep(/$re/, keys %$v2)};
delete @$m{map { @$_ } @gone} and $self->{chg}->{manifest} = 1;
diff --git a/t/config.t b/t/config.t
index ba83e63f..d67931da 100644
--- a/t/config.t
+++ b/t/config.t
@@ -1,7 +1,6 @@
# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict;
-use v5.10.1;
+use v5.12;
use PublicInbox::TestCommon;
use PublicInbox::Import;
use_ok 'PublicInbox';
@@ -260,5 +259,20 @@ EOF
is($cfg->urlmatch('imap.idleInterval', $url), undef, 'urlmatch miss');
};
+my $glob2re = PublicInbox::Config->can('glob2re');
+is($glob2re->('http://[::1]:1234/foo/'), undef, 'IPv6 URL not globbed');
+is($glob2re->('foo'), undef, 'plain string unchanged');
+is_deeply($glob2re->('[f-o]'), '[f-o]' , 'range accepted');
+is_deeply($glob2re->('*'), '[^/]*?' , 'wildcard accepted');
+is_deeply($glob2re->('{a,b,c}'), '(a|b|c)' , 'braces');
+is_deeply($glob2re->('{,b,c}'), '(|b|c)' , 'brace with empty @ start');
+is_deeply($glob2re->('{a,b,}'), '(a|b|)' , 'brace with empty @ end');
+is_deeply($glob2re->('{a}'), undef, 'ungrouped brace');
+is_deeply($glob2re->('{a'), undef, 'open left brace');
+is_deeply($glob2re->('a}'), undef, 'open right brace');
+is_deeply($glob2re->('*.[ch]'), '[^/]*?\\.[ch]', 'suffix glob');
+is_deeply($glob2re->('{[a-z],9,}'), '([a-z]|9|)' , 'brace with range');
+is_deeply($glob2re->('\\{a,b\\}'), undef, 'escaped brace');
+is_deeply($glob2re->('\\\\{a,b}'), '\\\\\\\\(a|b)', 'fake escape brace');
done_testing();
diff --git a/t/lei_external.t b/t/lei_external.t
index 51d0af5c..573cbc60 100644
--- a/t/lei_external.t
+++ b/t/lei_external.t
@@ -1,8 +1,8 @@
#!perl -w
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# internal unit test, see t/lei-externals.t for functional tests
-use strict; use v5.10.1; use Test::More;
+use v5.12; use Test::More;
my $cls = 'PublicInbox::LeiExternal';
require_ok $cls;
my $canon = $cls->can('ext_canonicalize');
@@ -16,20 +16,4 @@ is($canon->('/this/path/is/nonexistent/'), '/this/path/is/nonexistent',
is($canon->('/this//path/'), '/this/path', 'extra slashes gone');
is($canon->('/ALL/CAPS'), '/ALL/CAPS', 'caps preserved');
-my $glob2re = $cls->can('glob2re');
-is($glob2re->('http://[::1]:1234/foo/'), undef, 'IPv6 URL not globbed');
-is($glob2re->('foo'), undef, 'plain string unchanged');
-is_deeply($glob2re->('[f-o]'), '[f-o]' , 'range accepted');
-is_deeply($glob2re->('*'), '[^/]*?' , 'wildcard accepted');
-is_deeply($glob2re->('{a,b,c}'), '(a|b|c)' , 'braces');
-is_deeply($glob2re->('{,b,c}'), '(|b|c)' , 'brace with empty @ start');
-is_deeply($glob2re->('{a,b,}'), '(a|b|)' , 'brace with empty @ end');
-is_deeply($glob2re->('{a}'), undef, 'ungrouped brace');
-is_deeply($glob2re->('{a'), undef, 'open left brace');
-is_deeply($glob2re->('a}'), undef, 'open right brace');
-is_deeply($glob2re->('*.[ch]'), '[^/]*?\\.[ch]', 'suffix glob');
-is_deeply($glob2re->('{[a-z],9,}'), '([a-z]|9|)' , 'brace with range');
-is_deeply($glob2re->('\\{a,b\\}'), undef, 'escaped brace');
-is_deeply($glob2re->('\\\\{a,b}'), '\\\\\\\\(a|b)', 'fake escape brace');
-
done_testing;
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 2/2] config: glob2re supports `**' to match multiple path components
2023-03-17 20:31 [PATCH 0/2] globbing updates Eric Wong
2023-03-17 20:31 ` [PATCH 1/2] treewide: move glob2re to PublicInbox::Config Eric Wong
@ 2023-03-17 20:31 ` Eric Wong
1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2023-03-17 20:31 UTC (permalink / raw)
To: meta
This should match behavior documented in gitglossary(7)
---
lib/PublicInbox/Config.pm | 3 ++-
t/config.t | 5 +++++
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 34abcea3..4065b256 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -580,6 +580,7 @@ sub squote_maybe ($) {
}
my %re_map = ( '*' => '[^/]*?', '?' => '[^/]',
+ '/**' => '/.*', '**/' => '.*/', '/**/' => '/.*?',
'[' => '[', ']' => ']', ',' => ',' );
sub glob2re ($) {
@@ -593,7 +594,7 @@ sub glob2re ($) {
if ($re =~ s!\A([a-z0-9\+]+://\[[a-f0-9\:]+\](?::[0-9]+)?/)!!i) {
$schema_host_port = quotemeta $1; # "http://[::1]:1234"
}
- my $changes = ($re =~ s!(.)!
+ my $changes = ($re =~ s!(/\*\*/|\A\*\*/|/\*\*\z|.)!
$re_map{$p eq '\\' ? '' : do {
if ($1 eq '[') { ++$in_bracket }
elsif ($1 eq ']') { --$in_bracket }
diff --git a/t/config.t b/t/config.t
index d67931da..80f214cd 100644
--- a/t/config.t
+++ b/t/config.t
@@ -274,5 +274,10 @@ is_deeply($glob2re->('*.[ch]'), '[^/]*?\\.[ch]', 'suffix glob');
is_deeply($glob2re->('{[a-z],9,}'), '([a-z]|9|)' , 'brace with range');
is_deeply($glob2re->('\\{a,b\\}'), undef, 'escaped brace');
is_deeply($glob2re->('\\\\{a,b}'), '\\\\\\\\(a|b)', 'fake escape brace');
+is_deeply($glob2re->('**/foo'), '.*/foo', 'double asterisk start');
+is_deeply($glob2re->('foo/**'), 'foo/.*', 'double asterisk end');
+my $re = $glob2re->('a/**/b');
+is_deeply($re, 'a/.*?b', 'double asterisk middle');
+like($_, qr!$re!, "a/**/b matches $_") for ('a/b', 'a/c/b', 'a/c/a/b');
done_testing();
^ permalink raw reply related [flat|nested] 3+ messages in thread