unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/2] globbing updates
@ 2023-03-17 20:31 Eric Wong
  2023-03-17 20:31 ` [PATCH 1/2] treewide: move glob2re to PublicInbox::Config Eric Wong
  2023-03-17 20:31 ` [PATCH 2/2] config: glob2re supports `**' to match multiple path components Eric Wong
  0 siblings, 2 replies; 3+ messages in thread
From: Eric Wong @ 2023-03-17 20:31 UTC (permalink / raw)
  To: meta

The code indexer will be using this to implement --exclude= when
combined with --project-list=, at least (because indexing
certain `code'repos takes forever).

Eric Wong (2):
  treewide: move glob2re to PublicInbox::Config
  config: glob2re supports `**' to match multiple path components

 lib/PublicInbox/Config.pm        | 35 +++++++++++++++++++++++++++++++
 lib/PublicInbox/LeiExternal.pm   | 36 +-------------------------------
 lib/PublicInbox/LeiLsExternal.pm |  3 ++-
 lib/PublicInbox/LeiLsMailSync.pm |  3 ++-
 lib/PublicInbox/LeiMirror.pm     |  6 +++---
 t/config.t                       | 23 ++++++++++++++++++--
 t/lei_external.t                 | 20 ++----------------
 7 files changed, 66 insertions(+), 60 deletions(-)

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] treewide: move glob2re to PublicInbox::Config
  2023-03-17 20:31 [PATCH 0/2] globbing updates Eric Wong
@ 2023-03-17 20:31 ` Eric Wong
  2023-03-17 20:31 ` [PATCH 2/2] config: glob2re supports `**' to match multiple path components Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2023-03-17 20:31 UTC (permalink / raw)
  To: meta

It seems suitable for the config class since globs are a
config/option thing.
---
 lib/PublicInbox/Config.pm        | 34 ++++++++++++++++++++++++++++++
 lib/PublicInbox/LeiExternal.pm   | 36 +-------------------------------
 lib/PublicInbox/LeiLsExternal.pm |  3 ++-
 lib/PublicInbox/LeiLsMailSync.pm |  3 ++-
 lib/PublicInbox/LeiMirror.pm     |  6 +++---
 t/config.t                       | 18 ++++++++++++++--
 t/lei_external.t                 | 20 ++----------------
 7 files changed, 60 insertions(+), 60 deletions(-)

diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index cdf06d85..34abcea3 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -10,6 +10,8 @@
 package PublicInbox::Config;
 use strict;
 use v5.10.1;
+use parent qw(Exporter);
+our @EXPORT_OK = qw(glob2re);
 use PublicInbox::Inbox;
 use PublicInbox::Spawn qw(popen_rd);
 our $LD_PRELOAD = $ENV{LD_PRELOAD}; # only valid at startup
@@ -577,4 +579,36 @@ sub squote_maybe ($) {
 	$val;
 }
 
+my %re_map = ( '*' => '[^/]*?', '?' => '[^/]',
+		'[' => '[', ']' => ']', ',' => ',' );
+
+sub glob2re ($) {
+	my ($re) = @_;
+	my $p = '';
+	my $in_bracket = 0;
+	my $qm = 0;
+	my $schema_host_port = '';
+
+	# don't glob URL-looking things that look like IPv6
+	if ($re =~ s!\A([a-z0-9\+]+://\[[a-f0-9\:]+\](?::[0-9]+)?/)!!i) {
+		$schema_host_port = quotemeta $1; # "http://[::1]:1234"
+	}
+	my $changes = ($re =~ s!(.)!
+		$re_map{$p eq '\\' ? '' : do {
+			if ($1 eq '[') { ++$in_bracket }
+			elsif ($1 eq ']') { --$in_bracket }
+			elsif ($1 eq ',') { ++$qm } # no change
+			$p = $1;
+		}} // do {
+			$p = $1;
+			($p eq '-' && $in_bracket) ? $p : (++$qm, "\Q$p")
+		}!sge);
+	# bashism (also supported by curl): {a,b,c} => (a|b|c)
+	$changes += ($re =~ s/([^\\]*)\\\{([^,]*,[^\\]*)\\\}/
+			(my $in_braces = $2) =~ tr!,!|!;
+			$1."($in_braces)";
+			/sge);
+	($changes - $qm) ? $schema_host_port.$re : undef;
+}
+
 1;
diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index a6562e7f..3e2a2288 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -5,7 +5,7 @@
 package PublicInbox::LeiExternal;
 use strict;
 use v5.10.1;
-use PublicInbox::Config;
+use PublicInbox::Config qw(glob2re);
 
 sub externals_each {
 	my ($self, $cb, @arg) = @_;
@@ -44,40 +44,6 @@ sub ext_canonicalize {
 	}
 }
 
-# TODO: we will probably extract glob2re into a separate module for
-# PublicInbox::Filter::Base and maybe other places
-my %re_map = ( '*' => '[^/]*?', '?' => '[^/]',
-		'[' => '[', ']' => ']', ',' => ',' );
-
-sub glob2re {
-	my $re = $_[-1]; # $_[0] may be $lei
-	my $p = '';
-	my $in_bracket = 0;
-	my $qm = 0;
-	my $schema_host_port = '';
-
-	# don't glob URL-looking things that look like IPv6
-	if ($re =~ s!\A([a-z0-9\+]+://\[[a-f0-9\:]+\](?::[0-9]+)?/)!!i) {
-		$schema_host_port = quotemeta $1; # "http://[::1]:1234"
-	}
-	my $changes = ($re =~ s!(.)!
-		$re_map{$p eq '\\' ? '' : do {
-			if ($1 eq '[') { ++$in_bracket }
-			elsif ($1 eq ']') { --$in_bracket }
-			elsif ($1 eq ',') { ++$qm } # no change
-			$p = $1;
-		}} // do {
-			$p = $1;
-			($p eq '-' && $in_bracket) ? $p : (++$qm, "\Q$p")
-		}!sge);
-	# bashism (also supported by curl): {a,b,c} => (a|b|c)
-	$changes += ($re =~ s/([^\\]*)\\\{([^,]*,[^\\]*)\\\}/
-			(my $in_braces = $2) =~ tr!,!|!;
-			$1."($in_braces)";
-			/sge);
-	($changes - $qm) ? $schema_host_port.$re : undef;
-}
-
 # get canonicalized externals list matching $loc
 # $is_exclude denotes it's for --exclude
 # otherwise it's for --only/--include is assumed
diff --git a/lib/PublicInbox/LeiLsExternal.pm b/lib/PublicInbox/LeiLsExternal.pm
index e624cbd4..2cdd0c4d 100644
--- a/lib/PublicInbox/LeiLsExternal.pm
+++ b/lib/PublicInbox/LeiLsExternal.pm
@@ -5,6 +5,7 @@
 package PublicInbox::LeiLsExternal;
 use strict;
 use v5.10.1;
+use PublicInbox::Config qw(glob2re);
 
 # TODO: does this need JSON output?
 sub lei_ls_external {
@@ -12,7 +13,7 @@ sub lei_ls_external {
 	my $do_glob = !$lei->{opt}->{globoff}; # glob by default
 	my ($OFS, $ORS) = $lei->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
 	$filter //= '*';
-	my $re = $do_glob ? $lei->glob2re($filter) : undef;
+	my $re = $do_glob ? glob2re($filter) : undef;
 	$re .= '/?\\z' if defined $re;
 	$re //= index($filter, '/') < 0 ?
 			qr!/\Q$filter\E/?\z! : # exact basename match
diff --git a/lib/PublicInbox/LeiLsMailSync.pm b/lib/PublicInbox/LeiLsMailSync.pm
index 8da0c284..1400d488 100644
--- a/lib/PublicInbox/LeiLsMailSync.pm
+++ b/lib/PublicInbox/LeiLsMailSync.pm
@@ -6,12 +6,13 @@ package PublicInbox::LeiLsMailSync;
 use strict;
 use v5.10.1;
 use PublicInbox::LeiMailSync;
+use PublicInbox::Config qw(glob2re);
 
 sub lei_ls_mail_sync {
 	my ($lei, $filter) = @_;
 	my $lms = $lei->lms or return;
 	my $opt = $lei->{opt};
-	my $re = $opt->{globoff} ? undef : $lei->glob2re($filter // '*');
+	my $re = $opt->{globoff} ? undef : glob2re($filter // '*');
 	$re .= '/?\\z' if defined $re;
 	$re //= index($filter, '/') < 0 ?
 			qr!/\Q$filter\E/?\z! : # exact basename match
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index 18932cf4..c8d28eba 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -14,7 +14,7 @@ use File::Spec ();
 use Fcntl qw(SEEK_SET O_CREAT O_EXCL O_WRONLY);
 use Carp qw(croak);
 use URI;
-use PublicInbox::Config;
+use PublicInbox::Config qw(glob2re);
 use PublicInbox::Inbox;
 use PublicInbox::Git;
 use PublicInbox::LeiCurl;
@@ -983,7 +983,7 @@ sub multi_inbox ($$$) {
 	my @orig = defined($incl // $excl) ? (keys %$v2, @v1) : ();
 	if (defined $incl) {
 		my $re = '(?:'.join('\\z|', map {
-				$self->{lei}->glob2re($_) // qr/\A\Q$_\E/
+				glob2re($_) // qr/\A\Q$_\E/
 			} @$incl).'\\z)';
 		my @gone = delete @$v2{grep(!/$re/, keys %$v2)};
 		delete @$m{map { @$_ } @gone} and $self->{chg}->{manifest} = 1;
@@ -992,7 +992,7 @@ sub multi_inbox ($$$) {
 	}
 	if (defined $excl) {
 		my $re = '(?:'.join('\\z|', map {
-				$self->{lei}->glob2re($_) // qr/\A\Q$_\E/
+				glob2re($_) // qr/\A\Q$_\E/
 			} @$excl).'\\z)';
 		my @gone = delete @$v2{grep(/$re/, keys %$v2)};
 		delete @$m{map { @$_ } @gone} and $self->{chg}->{manifest} = 1;
diff --git a/t/config.t b/t/config.t
index ba83e63f..d67931da 100644
--- a/t/config.t
+++ b/t/config.t
@@ -1,7 +1,6 @@
 # Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict;
-use v5.10.1;
+use v5.12;
 use PublicInbox::TestCommon;
 use PublicInbox::Import;
 use_ok 'PublicInbox';
@@ -260,5 +259,20 @@ EOF
 	is($cfg->urlmatch('imap.idleInterval', $url), undef, 'urlmatch miss');
 };
 
+my $glob2re = PublicInbox::Config->can('glob2re');
+is($glob2re->('http://[::1]:1234/foo/'), undef, 'IPv6 URL not globbed');
+is($glob2re->('foo'), undef, 'plain string unchanged');
+is_deeply($glob2re->('[f-o]'), '[f-o]' , 'range accepted');
+is_deeply($glob2re->('*'), '[^/]*?' , 'wildcard accepted');
+is_deeply($glob2re->('{a,b,c}'), '(a|b|c)' , 'braces');
+is_deeply($glob2re->('{,b,c}'), '(|b|c)' , 'brace with empty @ start');
+is_deeply($glob2re->('{a,b,}'), '(a|b|)' , 'brace with empty @ end');
+is_deeply($glob2re->('{a}'), undef, 'ungrouped brace');
+is_deeply($glob2re->('{a'), undef, 'open left brace');
+is_deeply($glob2re->('a}'), undef, 'open right brace');
+is_deeply($glob2re->('*.[ch]'), '[^/]*?\\.[ch]', 'suffix glob');
+is_deeply($glob2re->('{[a-z],9,}'), '([a-z]|9|)' , 'brace with range');
+is_deeply($glob2re->('\\{a,b\\}'), undef, 'escaped brace');
+is_deeply($glob2re->('\\\\{a,b}'), '\\\\\\\\(a|b)', 'fake escape brace');
 
 done_testing();
diff --git a/t/lei_external.t b/t/lei_external.t
index 51d0af5c..573cbc60 100644
--- a/t/lei_external.t
+++ b/t/lei_external.t
@@ -1,8 +1,8 @@
 #!perl -w
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 # internal unit test, see t/lei-externals.t for functional tests
-use strict; use v5.10.1; use Test::More;
+use v5.12; use Test::More;
 my $cls = 'PublicInbox::LeiExternal';
 require_ok $cls;
 my $canon = $cls->can('ext_canonicalize');
@@ -16,20 +16,4 @@ is($canon->('/this/path/is/nonexistent/'), '/this/path/is/nonexistent',
 is($canon->('/this//path/'), '/this/path', 'extra slashes gone');
 is($canon->('/ALL/CAPS'), '/ALL/CAPS', 'caps preserved');
 
-my $glob2re = $cls->can('glob2re');
-is($glob2re->('http://[::1]:1234/foo/'), undef, 'IPv6 URL not globbed');
-is($glob2re->('foo'), undef, 'plain string unchanged');
-is_deeply($glob2re->('[f-o]'), '[f-o]' , 'range accepted');
-is_deeply($glob2re->('*'), '[^/]*?' , 'wildcard accepted');
-is_deeply($glob2re->('{a,b,c}'), '(a|b|c)' , 'braces');
-is_deeply($glob2re->('{,b,c}'), '(|b|c)' , 'brace with empty @ start');
-is_deeply($glob2re->('{a,b,}'), '(a|b|)' , 'brace with empty @ end');
-is_deeply($glob2re->('{a}'), undef, 'ungrouped brace');
-is_deeply($glob2re->('{a'), undef, 'open left brace');
-is_deeply($glob2re->('a}'), undef, 'open right brace');
-is_deeply($glob2re->('*.[ch]'), '[^/]*?\\.[ch]', 'suffix glob');
-is_deeply($glob2re->('{[a-z],9,}'), '([a-z]|9|)' , 'brace with range');
-is_deeply($glob2re->('\\{a,b\\}'), undef, 'escaped brace');
-is_deeply($glob2re->('\\\\{a,b}'), '\\\\\\\\(a|b)', 'fake escape brace');
-
 done_testing;

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] config: glob2re supports `**' to match multiple path components
  2023-03-17 20:31 [PATCH 0/2] globbing updates Eric Wong
  2023-03-17 20:31 ` [PATCH 1/2] treewide: move glob2re to PublicInbox::Config Eric Wong
@ 2023-03-17 20:31 ` Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2023-03-17 20:31 UTC (permalink / raw)
  To: meta

This should match behavior documented in gitglossary(7)
---
 lib/PublicInbox/Config.pm | 3 ++-
 t/config.t                | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 34abcea3..4065b256 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -580,6 +580,7 @@ sub squote_maybe ($) {
 }
 
 my %re_map = ( '*' => '[^/]*?', '?' => '[^/]',
+		'/**' => '/.*', '**/' => '.*/', '/**/' => '/.*?',
 		'[' => '[', ']' => ']', ',' => ',' );
 
 sub glob2re ($) {
@@ -593,7 +594,7 @@ sub glob2re ($) {
 	if ($re =~ s!\A([a-z0-9\+]+://\[[a-f0-9\:]+\](?::[0-9]+)?/)!!i) {
 		$schema_host_port = quotemeta $1; # "http://[::1]:1234"
 	}
-	my $changes = ($re =~ s!(.)!
+	my $changes = ($re =~ s!(/\*\*/|\A\*\*/|/\*\*\z|.)!
 		$re_map{$p eq '\\' ? '' : do {
 			if ($1 eq '[') { ++$in_bracket }
 			elsif ($1 eq ']') { --$in_bracket }
diff --git a/t/config.t b/t/config.t
index d67931da..80f214cd 100644
--- a/t/config.t
+++ b/t/config.t
@@ -274,5 +274,10 @@ is_deeply($glob2re->('*.[ch]'), '[^/]*?\\.[ch]', 'suffix glob');
 is_deeply($glob2re->('{[a-z],9,}'), '([a-z]|9|)' , 'brace with range');
 is_deeply($glob2re->('\\{a,b\\}'), undef, 'escaped brace');
 is_deeply($glob2re->('\\\\{a,b}'), '\\\\\\\\(a|b)', 'fake escape brace');
+is_deeply($glob2re->('**/foo'), '.*/foo', 'double asterisk start');
+is_deeply($glob2re->('foo/**'), 'foo/.*', 'double asterisk end');
+my $re = $glob2re->('a/**/b');
+is_deeply($re, 'a/.*?b', 'double asterisk middle');
+like($_, qr!$re!, "a/**/b matches $_") for ('a/b', 'a/c/b', 'a/c/a/b');
 
 done_testing();

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-03-17 20:31 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-17 20:31 [PATCH 0/2] globbing updates Eric Wong
2023-03-17 20:31 ` [PATCH 1/2] treewide: move glob2re to PublicInbox::Config Eric Wong
2023-03-17 20:31 ` [PATCH 2/2] config: glob2re supports `**' to match multiple path components Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).