From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id ECD1E1F406 for ; Wed, 25 Oct 2023 06:33:55 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1698215636; bh=jcY2bQ3uMbnu0pGLt8FxtcIiw+9v7XYUt4xMGBu8X2c=; h=Date:From:To:Subject:References:In-Reply-To:From; b=c8A0kUWkzbZFOYLttTi7f/hV7rKvo7JPfALxO3OAE6YDpyMjhTwPfbgHxtCr3BJrJ 4MfynPl23Oyp6fdHQ4kf3xYBrdY3lkdgc5fHanegxlmZ7qOMXsiBB02lBP/vFbeYQV W2M044mrhjjqNgg7x75F52DImhAsayT1+NVe9zXg= Date: Wed, 25 Oct 2023 06:33:55 +0000 From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 27/26] lei_mirror+fetch: don't slurp `git show-ref' output Message-ID: <20231025063355.M844764@dcvr> References: <20231025002949.3092193-1-e@80x24.org> <20231025002949.3092193-27-e@80x24.org> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <20231025002949.3092193-27-e@80x24.org> List-Id: While uncommon, some git repos have hundreds of thousands of refs and slurping that output into memory can bloat the heap. Introduce a sha_all sub in PublicInbox::SHA to loop until EOF and rely on autodie for checking sysread errors. --- lib/PublicInbox/CodeSearchIdx.pm | 7 ++----- lib/PublicInbox/Fetch.pm | 4 ++-- lib/PublicInbox/Git.pm | 6 ++---- lib/PublicInbox/LeiMirror.pm | 14 +++++++------- lib/PublicInbox/SHA.pm | 11 ++++++++++- 5 files changed, 23 insertions(+), 19 deletions(-) diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm index e31432b9..aeee37c0 100644 --- a/lib/PublicInbox/CodeSearchIdx.pm +++ b/lib/PublicInbox/CodeSearchIdx.pm @@ -45,7 +45,7 @@ use POSIX qw(WNOHANG SEEK_SET); use File::Path (); use File::Spec (); use List::Util qw(max); -use PublicInbox::SHA qw(sha256_hex); +use PublicInbox::SHA qw(sha256_hex sha_all); use PublicInbox::Search qw(xap_terms); use PublicInbox::SearchIdx qw(add_val); use PublicInbox::Config qw(glob2re rel2abs_collapsed); @@ -386,10 +386,7 @@ sub fp_fini { # run_git cb my (undef, $self, $git, $prep_repo) = @_; my $refs = $git->{-repo}->{refs} // die 'BUG: no {-repo}->{refs}'; sysseek($refs, 0, SEEK_SET); - my $buf; - my $dig = PublicInbox::SHA->new(256); - while (sysread($refs, $buf, 65536)) { $dig->add($buf) } - $git->{-repo}->{fp} = $dig->hexdigest; + $git->{-repo}->{fp} = sha_all(256, $refs)->hexdigest; } sub ct_start ($$$) { diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm index 6e9b1e94..e41dd448 100644 --- a/lib/PublicInbox/Fetch.pm +++ b/lib/PublicInbox/Fetch.pm @@ -10,6 +10,7 @@ use PublicInbox::Admin; use PublicInbox::LEI; use PublicInbox::LeiCurl; use PublicInbox::LeiMirror; +use PublicInbox::SHA qw(sha_all); use File::Temp (); sub new { bless {}, __PACKAGE__ } @@ -92,9 +93,8 @@ sub do_manifest ($$$) { sub get_fingerprint2 { my ($git_dir) = @_; - require PublicInbox::SHA; my $rd = popen_rd([qw(git show-ref)], undef, { -C => $git_dir }); - PublicInbox::SHA::sha256(do { local $/; <$rd> }); + sha_all(256, $rd)->digest; # ignore show-ref errors } sub writable_dir ($) { diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 476dcf30..9c26d8bf 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -23,7 +23,7 @@ use PublicInbox::ProcessIONBF; use PublicInbox::Tmpfile; use IO::Poll qw(POLLIN); use Carp qw(croak carp); -use PublicInbox::SHA (); +use PublicInbox::SHA qw(sha_all); our %HEXLEN2SHA = (40 => 1, 64 => 256); our %OFMT2HEXLEN = (sha1 => 40, sha256 => 64); our @EXPORT_OK = qw(git_unquote git_quote %HEXLEN2SHA %OFMT2HEXLEN read_all); @@ -620,10 +620,8 @@ sub manifest_entry { $ent->{reference} = $buf; } } - my $dig = PublicInbox::SHA->new(1); - while (CORE::read($sr, $buf, 65536)) { $dig->add($buf) } + $ent->{fingerprint} = sha_all(1, $sr)->hexdigest; CORE::close $sr or return; # empty, uninitialized git repo - $ent->{fingerprint} = $dig->hexdigest; $ent->{modified} = modified(undef, $mod); chomp($buf = <$own> // ''); utf8::decode($buf); diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index 47fb767b..43e59e6c 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -19,10 +19,10 @@ use PublicInbox::Inbox; use PublicInbox::Git qw(read_all); use PublicInbox::LeiCurl; use PublicInbox::OnDestroy; -use PublicInbox::SHA qw(sha256_hex sha1_hex); +use PublicInbox::SHA qw(sha256_hex sha_all); use POSIX qw(strftime); -use autodie qw(chdir chmod close open pipe readlink seek symlink sysopen - truncate unlink); +use autodie qw(chdir chmod close open pipe readlink + seek symlink sysopen sysseek truncate unlink); our $LIVE; # pid => callback our $FGRP_TODO; # objstore -> [[ to resume ], [ to clone ]] @@ -533,10 +533,10 @@ sub fp_done { } return if !keep_going($self); my $fh = delete $self->{-show_ref} // die 'BUG: no show-ref output'; - seek($fh, SEEK_SET, 0); + sysseek($fh, SEEK_SET, 0); $self->{-ent} // die 'BUG: no -ent'; my $A = $self->{-ent}->{fingerprint} // die 'BUG: no fingerprint'; - my $B = sha1_hex(read_all($fh)); + my $B = sha_all(1, $fh)->hexdigest; return $cb->($self, @arg) if $A ne $B; $self->{lei}->qerr("# $self->{-key} up-to-date"); } @@ -730,10 +730,10 @@ sub up_fp_done { my ($self) = @_; return if !keep_going($self); my $fh = delete $self->{-show_ref_up} // die 'BUG: no show-ref output'; - seek($fh, SEEK_SET, 0); + sysseek($fh, SEEK_SET, 0); $self->{-ent} // die 'BUG: no -ent'; my $A = $self->{-ent}->{fingerprint} // die 'BUG: no fingerprint'; - my $B = sha1_hex(read_all($fh)); + my $B = sha_all(1, $fh)->hexdigest; return if $A eq $B; $self->{-ent}->{fingerprint} = $B; push @{$self->{chg}->{fp_mismatch}}, $self->{-key}; diff --git a/lib/PublicInbox/SHA.pm b/lib/PublicInbox/SHA.pm index 81f62618..3fa8530e 100644 --- a/lib/PublicInbox/SHA.pm +++ b/lib/PublicInbox/SHA.pm @@ -12,7 +12,8 @@ package PublicInbox::SHA; use v5.12; require Exporter; -our @EXPORT_OK = qw(sha1_hex sha256_hex sha256); +our @EXPORT_OK = qw(sha1_hex sha256_hex sha256 sha_all); +use autodie qw(sysread); our @ISA; BEGIN { @@ -55,4 +56,12 @@ EOM } } # /BEGIN + +sub sha_all ($$) { + my ($n, $fh) = @_; + my ($dig, $buf) = (PublicInbox::SHA->new($n)); + while (sysread($fh, $buf, 65536)) { $dig->add($buf) } + $dig +} + 1;