From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.1 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 97C9A1F4B6 for ; Mon, 13 Mar 2023 12:00:25 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1678708825; bh=Ll6IkdUfwD4LH1mrgbNTo1ycNked/JmX7iWDswg8+Ec=; h=From:To:Subject:Date:In-Reply-To:References:From; b=l/y9p+gkS5OLLHPT/uFXfbxiYGgnrUTfcSBBzS7mMXh4K3HeJ7Mv1JyY01v3u0Jw8 /ZFNl1iuk9TkKCESyjUQxRbdGw1kpWZzR0oWRaf8DaVrZ3O2pPQ/GuP0eTR8qlM3RU RyPRuTcT4rc9lqccL7kG4lBH3m4AETM8z2zBktHw= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 4/5] lei_mirror: handle UTF-8 from manifest.js.gz properly Date: Mon, 13 Mar 2023 12:00:23 +0000 Message-Id: <20230313120024.1911925-5-e@80x24.org> In-Reply-To: <20230313120024.1911925-1-e@80x24.org> References: <20230313120024.1911925-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This should ensure we display the "git config gitweb.owner $OWNER" command invocation properly and also ensures we set the description properly without triggering wide character warnings. Also tested with a smallish iproute2 repo (/pub/scm/linux/kernel/git/toke/iproute2.git) using my mirror: public-inbox-clone --remote-manifest=pub/manifest.js.gz \ --include='*/toke/iproute2.git' --inbox-config=never \ https://80x24.org/lore $DST Anyways, I'm fairly certain this change and its tests are correct; but I still struggle to understand Perl's approach to Unicode and it's interactions with various JSON implementations. Fixes: 0830817c132cb105 ("lei_mirror: show non-ASCII owner properly w/ --verbose") --- lib/PublicInbox/LeiMirror.pm | 6 +++--- t/clone-coderepo.t | 8 ++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index 3ec8170f..18932cf4 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -259,8 +259,7 @@ sub run_reap { sub start_cmd { my ($self, $cmd, $opt, $fini) = @_; do_reap($self); - utf8::decode(my $msg = "# @$cmd"); - $self->{lei}->qerr($msg); + $self->{lei}->qerr("# @$cmd"); return if $self->{dry_run}; $LIVE->{spawn($cmd, undef, $opt)} = [ \&reap_cmd, $self, $cmd, $fini ] } @@ -633,7 +632,7 @@ sub clone_v1 { } my $d = $self->{-ent} ? $self->{-ent}->{description} : undef; - $self->{'txt.description'} = $d if defined $d; + utf8::encode($self->{'txt.description'} = $d) if defined $d; (!defined($d) && !$end) and _get_txt_start($self, 'description', $fini); @@ -823,6 +822,7 @@ sub update_ent { $new = $self->{-ent}->{owner} // return; $cur = $self->{-local_manifest}->{$key}->{owner} // "\0"; return if $cur eq $new; + utf8::encode($new); # to octets my $cmd = [ qw(git config -f), "$dst/config", 'gitweb.owner', $new ]; start_cmd($self, $cmd, { 2 => $self->{lei}->{2} }); } diff --git a/t/clone-coderepo.t b/t/clone-coderepo.t index 1f33a6d7..3a5997c9 100644 --- a/t/clone-coderepo.t +++ b/t/clone-coderepo.t @@ -63,11 +63,13 @@ EOM my $env = { TEST_DOCROOT => "$tmpdir/src", PI_CONFIG => $pi_config }; $td = start_script($cmd, $env, { 3 => $tcp }); my $fp = sha1_hex(my $refs = xqx([@git, 'show-ref'])); + my $alice = "\x{100}lice"; $m = { '/a.git' => { fingerprint => $fp, modified => 1, - owner => 'Alice', + owner => $alice, + description => "${alice}'s repo", }, '/b.git' => { fingerprint => $fp, @@ -89,9 +91,11 @@ my $cmd = [qw(-clone --inbox-config=never --manifest= --project-list= --objstore= -p -q), $url, "$tmpdir/dst", '--exit-code']; ok(run_script($cmd), 'clone'); is(xqx([qw(git config gitweb.owner)], { GIT_DIR => "$tmpdir/dst/a.git" }), - "Alice\n", 'a.git gitweb.owner set'); + "\xc4\x80lice\n", 'a.git gitweb.owner set'); is(xqx([qw(git config gitweb.owner)], { GIT_DIR => "$tmpdir/dst/b.git" }), "Bob\n", 'b.git gitweb.owner set'); +my $desc = PublicInbox::Git::try_cat("$tmpdir/dst/a.git/description"); +is($desc, "\xc4\x80lice's repo\n", 'description set'); my $dst_pl = "$tmpdir/dst/projects.list"; my $dst_mf = "$tmpdir/dst/manifest.js.gz";