unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [abandoned/WIP]: use libgit2 to parse configs
Date: Thu, 11 Jan 2024 22:36:08 +0000	[thread overview]
Message-ID: <20240111223608.M380280@dcvr> (raw)

Publishing for documentation purposes
This was intended to avoid vfork+execve overhead, but
error handling seems wonky and giving libgit2 a bad config
file results in something like:
   perl: symbol lookup error: */Gcf2_*.so: undefined symbol: kkiterr_last

Which I'm very confused by :x

And the tests which do pass don't seem much faster (if at all).
---
 lib/PublicInbox/Config.pm      | 35 +++++++++++++++-------
 lib/PublicInbox/Gcf2.pm        |  8 +++++
 lib/PublicInbox/gcf2_libgit2.h | 55 ++++++++++++++++++++++++++++++++--
 t/gcf2.t                       | 37 +++++++++++++++++++++--
 4 files changed, 119 insertions(+), 16 deletions(-)

diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 6bebf790..244dcd1d 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -16,6 +16,7 @@ use PublicInbox::Inbox;
 use PublicInbox::Spawn qw(popen_rd run_qx);
 our $LD_PRELOAD = $ENV{LD_PRELOAD}; # only valid at startup
 our $DEDUPE; # set to {} to dedupe or clear cache
+use autodie qw(open seek);
 
 sub _array ($) { ref($_[0]) eq 'ARRAY' ? $_[0] : [ $_[0] ] }
 
@@ -182,17 +183,29 @@ sub git_config_dump {
 	$file = undef if !-e $file;
 	# XXX should we set {-f} if !-e $file?
 	return bless {}, $class if (!@opt_c && !defined($file));
-	my %env;
-	my $opt = { 2 => $lei->{2} // 2 };
-	if (@opt_c) {
-		unshift(@opt_c, '-c', "include.path=$file") if defined($file);
-		tmp_cmd_opt(\%env, $opt);
-	}
-	my @cmd = ('git', @opt_c, qw(config -z -l --includes));
-	push(@cmd, '-f', $file) if !@opt_c && defined($file);
-	my $fh = popen_rd(\@cmd, \%env, $opt);
-	my $rv = config_fh_parse($fh, "\0", "\n");
-	$fh->close or die "@cmd failed: \$?=$?\n";
+	my $rv;
+	if (eval { require PublicInbox::Gcf2 }) {
+		open my $fh, '+>', undef;
+		PublicInbox::Lg2Cfg->new($file)->lz($fh) if defined $file;
+		for (@{$lei->{opt}->{c} // []}) {
+			print $fh join("\n", split /=/, $_, 2), "\0";
+		}
+		$fh->flush or die "flush: $!";
+		seek($fh, 0, 0);
+		$rv = config_fh_parse($fh, "\0", "\n");
+	} else {
+		my %env;
+		my $opt = { 2 => $lei->{2} // 2 };
+		if (@opt_c) {
+			unshift(@opt_c, '-c',"include.path=$file") if defined($file);
+			tmp_cmd_opt(\%env, $opt);
+		}
+		my @cmd = ('git', @opt_c, qw(config -z -l --includes));
+		push(@cmd, '-f', $file) if !@opt_c && defined($file);
+		my $fh = popen_rd(\@cmd, \%env, $opt);
+		$rv = config_fh_parse($fh, "\0", "\n");
+		$fh->close or die "@cmd failed: \$?=$?\n";
+	}
 	$rv->{-opt_c} = \@opt_c if @opt_c; # for ->urlmatch
 	$rv->{-f} = $file;
 	bless $rv, $class;
diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm
index 78392990..b96011e8 100644
--- a/lib/PublicInbox/Gcf2.pm
+++ b/lib/PublicInbox/Gcf2.pm
@@ -131,4 +131,12 @@ sub loop (;$) {
 	}
 }
 
+package PublicInbox::Lg2Cfg;
+use v5.12;
+
+no warnings 'once';
+*new = \&PublicInbox::Gcf2::cfg_new;
+*DESTROY = \&PublicInbox::Gcf2::cfg_DESTROY;
+*lz = \&PublicInbox::Gcf2::cfg_lz;
+
 1;
diff --git a/lib/PublicInbox/gcf2_libgit2.h b/lib/PublicInbox/gcf2_libgit2.h
index e1f0ef39..33bc0542 100644
--- a/lib/PublicInbox/gcf2_libgit2.h
+++ b/lib/PublicInbox/gcf2_libgit2.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+ * Copyright (C) all contributors <meta@public-inbox.org>
  * License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
  *
  * libgit2 for Inline::C
@@ -14,7 +14,7 @@
 static void croak_if_err(int rc, const char *msg)
 {
 	if (rc != GIT_OK) {
-		const git_error *e = giterr_last();
+		const git_error *e = kkiterr_last();
 
 		croak("%d %s (%s)", rc, msg, e ? e->message : "unknown");
 	}
@@ -140,3 +140,54 @@ int cat_oid(SV *self, int fd, SV *oidsv)
 
 	return rc == GIT_OK;
 }
+
+SV *cfg_new(SV *ignore, const char *f)
+{
+	SV *ref, *self;
+	git_config *cfg;
+	int rc = git_config_open_ondisk(&cfg, f);
+
+	croak_if_err(rc, "git_config_open_ondisk");
+
+	ref = newSViv((IV)cfg);
+	self = newRV_noinc(ref);
+	sv_bless(self, gv_stashpv("PublicInbox::Lg2Cfg", GV_ADD));
+	SvREADONLY_on(ref);
+
+	return self;
+}
+
+static git_config *cfg_ptr(SV *self)
+{
+	return (git_config *)SvIV(SvRV(self));
+}
+
+void cfg_DESTROY(SV *self)
+{
+	git_config_free(cfg_ptr(self));
+}
+
+/* git_config_foreach cb */
+static int cfg_each_i(const git_config_entry *ent, void *ptr)
+{
+	PerlIO *io = ptr;
+
+	PerlIO_write(io, ent->name, strlen(ent->name));
+	if (ent->value) {
+		PerlIO_write(io, "\n", 1);
+		PerlIO_write(io, ent->value, strlen(ent->value));
+	}
+	PerlIO_write(io, "\0", 1);
+
+	return 0;
+}
+
+/* `git config -lz --includes' */
+void cfg_lz(SV *self, PerlIO *io)
+{
+	int rc;
+	git_config *cfg = cfg_ptr(self);
+
+	rc = git_config_foreach(cfg, cfg_each_i, io);
+	croak_if_err(rc, "git_config_foreach");
+}
diff --git a/t/gcf2.t b/t/gcf2.t
index 33f3bbca..ad43832a 100644
--- a/t/gcf2.t
+++ b/t/gcf2.t
@@ -1,9 +1,9 @@
 #!perl -w
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict;
+use v5.12;
+use autodie;
 use PublicInbox::TestCommon;
-use Test::More;
 use Fcntl qw(:seek);
 use IO::Handle ();
 use POSIX qw(_exit);
@@ -14,6 +14,37 @@ use PublicInbox::Syscall qw($F_SETPIPE_SZ);
 use PublicInbox::Import;
 my ($tmpdir, $for_destroy) = tmpdir();
 
+{
+	open my $fh, '>', "$tmpdir/inc";
+	print $fh "[a]\nb = c\n";
+	close $fh;
+	open $fh, '>', "$tmpdir/top";
+	print $fh "[include]\npath = $tmpdir/inc\n";
+	close $fh;
+	open $fh, '>', \(my $buf);
+	my $cfg = PublicInbox::Lg2Cfg->new("$tmpdir/top");
+	$cfg->lz($fh);
+	close $fh;
+	my @lg2 = split /^/sm, $buf;
+	my @git = xqx([qw(git config --includes -lz -f), "$tmpdir/top"]);
+	is_xdeeply(\@lg2, \@git, 'git(1) and our dumper matches');
+	my $nr = $ENV{TEST_CFG_LEAK_NR} // 0;
+	for (0..$nr) {
+		$cfg = PublicInbox::Lg2Cfg->new("$tmpdir/top");
+		open $fh, '>', '/dev/null';
+		$cfg->lz($fh);
+	}
+	open $fh, '+>>', "$tmpdir/top";
+	print $fh "\0include";
+	close $fh;
+	open $fh, '>', undef;
+	$cfg = PublicInbox::Lg2Cfg->new("$tmpdir/top"); # WTF symbol lookup error
+	warn "cfg=$cfg";
+	$cfg->lz($fh);
+	$fh->flush;
+	diag("size=".(-s $fh));
+}
+
 my $gcf2 = PublicInbox::Gcf2::new();
 is(ref($gcf2), 'PublicInbox::Gcf2', '::new works');
 my $COPYING = 'dba13ed2ddf783ee8118c6a581dbf75305f816a3';

                 reply	other threads:[~2024-01-11 22:36 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240111223608.M380280@dcvr \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).