Thanks for the reply Liliana,

On Wed, 2 Mar 2022 at 21:06, Liliana Marie Prikler <liliana.prikler@gmail.com> wrote:

>> In Guix this means that the first time I build a PR it fails, and I
>> have to do something like "guix build  foo | guix build foo" which is
>> at best a clumsy hack, but it works!
>Note that you could alternatively just use the requesting repo URL in
>the git-reference.  Ah, but that would be too boring, wouldn't it?

Ha ha - indeed far too boring :-) If you're suggesting I use an HTTP(S)/URL to access the repository, then annoingly in my specific case the repo is password protected - this is why I turned to SSH in the first place so I could use keys to authenticate.  Or I'm missing the point?

 
>> I was wondering if anyway could confirm this and/or give me a pointer
>> of where the compliation occurs (where the record in the source is
>> handled and compiled into a git clone) - even if my approach is
>>(quite possibly) unviable, I'd like to understand why!
>The source field of a package is not thunked, so it is compiled in the
>package definition itself, rather than handed off.  Note that every
>hack of doing something fancyful usually invokes at least a delay form,
>see e.g. the computed-origin-method used by linux-libre.  Hope that
>helps.

 
Yes - thanks for the hints, I got there in the end.  Just to confirm that although perhaps not the most elegant workaround, I eventually got the idea to work.

My original effort lacked the exports for the below, and a few other missing items private to (guix git).
update-cached-checkout-x-ref
latest-repository-commit-x-ref

Having dug a bit deeper into how the hash table for %gexp-compilers is setup as a mapping of to record type descriptior to gexp-compiler, I've got a better understanding now of how package compilation works so it was a worthy if experiment :-)

I've included what I think is a true minimal implementation, borrowing from (guix git) where possible to minimize duplication - in case anyone else ever stumbles into the same issue (although it's pretty niche, I admit!).

Now to go and fix it properly in libgit2....


;; -*- mode: scheme; eval: (guix-devel-mode 1); geiser-scheme-implementation: guile -*-


;; This is an alternative to guix-checkout record type, which always does a fetch
;; even after the initial clone.
;; This is required because of a bug in libgit2 which means that fresh clones
;; don't pull down any additional refs from the git config:
;; https://github.com/libgit2/libgit2/issues/6183

;; Once this is fixed and Guix builds against the fixed version this should
;; be removed.

(define-module (my-tools git)
  #:use-module (git) ;; openable-repository
  #:use-module (guix git) ;; avoid duplication - use all public members from here
  #:use-module (guix cache) ;; maybe-remove-expired-cache-entries
  #:use-module (guix store) ;; add-to-store
  #:use-module (guix records)
  #:use-module (guix gexp)
  #:use-module (ice-9 match)
  #:use-module (ice-9 ftw) ;; scandir
  #:use-module (srfi srfi-1) ;; filter-map
  #:use-module (srfi srfi-11) ;; let*-values
  #:export (
            update-cached-checkout-x-ref
            latest-repository-commit-x-ref

            git-checkout-x-refs
            git-checkout-x-refs?
            git-checkout-x-refs-url
            git-checkout-x-refs-branch
            git-checkout-x-refs-commit
            git-checkout-x-refs-recursive?))


;; Avoid duplicating required private functions
(define make-default-fetch-options (@@ (guix git) make-default-fetch-options))
(define resolve-reference (@@ (guix git) resolve-reference))
(define clone/swh-fallback (@@ (guix git) clone/swh-fallback))
(define switch-to-ref (@@ (guix git) switch-to-ref))
(define update-submodules (@@ (guix git) update-submodules))
(define reference-available? (@@ (guix git) reference-available?))
(define cached-checkout-expiration (@@ (guix git) cached-checkout-expiration))
(define %checkout-cache-cleanup-period (@@ (guix git) %checkout-cache-cleanup-period))
(define delete-checkout (@@ (guix git) delete-checkout))
(define print-git-error (@@ (guix git) print-git-error))


(define* (update-cached-checkout-x-ref url
                                       #:key
                                       (ref '())
                                       recursive?
                                       (check-out? #t)
                                       starting-commit
                                       (log-port (%make-void-port "w"))
                                       (cache-directory
                                        (url-cache-directory
                                         url (%repository-cache-directory)
                                         #:recursive? recursive?)))
  "Update the cached checkout of URL to REF in CACHE-DIRECTORY.  Return three
values: the cache directory name, and the SHA1 commit (a string) corresponding
to REF, and the relation of the new commit relative to STARTING-COMMIT (if
provided) as returned by 'commit-relation'.

REF is pair whose key is [branch | commit | tag | tag-or-commit ] and value
the associated data: [<branch name> | <sha1> | <tag name> | <string>].
If REF is the empty list, the remote HEAD is used.

When RECURSIVE? is true, check out submodules as well, if any.

When CHECK-OUT? is true, reset the cached working tree to REF; otherwise leave
it unchanged."
  (define (cache-entries directory)
    (filter-map (match-lambda
                  ((or "." "..")
                   #f)
                  (file
                   (string-append directory "/" file)))
                (or (scandir directory) '())))

  (define canonical-ref
    ;; We used to require callers to specify "origin/" for each branch, which
    ;; made little sense since the cache should be transparent to them.  So
    ;; here we append "origin/" if it's missing and otherwise keep it.
    (match ref
      (() '(symref . "refs/remotes/origin/HEAD"))
      (('branch . branch)
       `(branch . ,(if (string-prefix? "origin/" branch)
                       branch
                       (string-append "origin/" branch))))
      (_ ref)))

  ((@@ (guix git) with-libgit2)
   (let* ((cache-exists? (openable-repository? cache-directory))
          (repository    (if cache-exists?
                             (repository-open cache-directory)
                             (clone/swh-fallback url ref cache-directory))))
     ;; Always fetch remote, even if it has been cloned just before.
     (when ;; <---- THIS IS WHERE THE CHANGE IS FROM DEFAULT BEHAVIOR
         (not (reference-available? repository ref))
       (remote-fetch (remote-lookup repository "origin")
                     #:fetch-options (make-default-fetch-options)))
     (when recursive?
       (update-submodules repository #:log-port log-port
                          #:fetch-options (make-default-fetch-options)))

     ;; Note: call 'commit-relation' from here because it's more efficient
     ;; than letting users re-open the checkout later on.
     (let* ((oid      (if check-out?
                          (switch-to-ref repository canonical-ref)
                          (object-id
                           (resolve-reference repository canonical-ref))))
            (new      (and starting-commit
                           (commit-lookup repository oid)))
            (old      (and starting-commit
                           (false-if-git-not-found
                            (commit-lookup repository
                                           (string->oid starting-commit)))))
            (relation (and starting-commit
                           (if old
                               (commit-relation old new)
                               'unrelated))))

       ;; Reclaim file descriptors and memory mappings associated with
       ;; REPOSITORY as soon as possible.
       (repository-close! repository)

       ;; Update CACHE-DIRECTORY's mtime to so the cache logic sees it.
       (match (gettimeofday)
         ((seconds . microseconds)
          (let ((nanoseconds (* 1000 microseconds)))
            (utime cache-directory
                   seconds seconds
                   nanoseconds nanoseconds))))

       ;; When CACHE-DIRECTORY is a sub-directory of the default cache
       ;; directory, remove expired checkouts that are next to it.
       (let ((parent (dirname cache-directory)))
         (when (string=? parent (%repository-cache-directory))
           (maybe-remove-expired-cache-entries parent cache-entries
                                               #:entry-expiration
                                               cached-checkout-expiration
                                               #:delete-entry delete-checkout
                                               #:cleanup-period
                                               %checkout-cache-cleanup-period)))

       (values cache-directory (oid->string oid) relation)))))



(define* (latest-repository-commit-x-ref store url
                                         #:key
                                         recursive?
                                         (log-port (%make-void-port "w"))
                                         (cache-directory
                                          (%repository-cache-directory))
                                         (ref '()))
  "Return two values: the content of the git repository at URL copied into a
store directory and the sha1 of the top level commit in this directory.  The
reference to be checkout, once the repository is fetched, is specified by REF.
REF is pair whose key is [branch | commit | tag] and value the associated
data, respectively [<branch name> | <sha1> | <tag name>].  If REF is the empty
list, the remote HEAD is used.

When RECURSIVE? is true, check out submodules as well, if any.

Git repositories are kept in the cache directory specified by
%repository-cache-directory parameter.

Log progress and checkout info to LOG-PORT."
  (define (dot-git? file stat)
    (and (string=? (basename file) ".git")
         (or (eq? 'directory (stat:type stat))

             ;; Submodule checkouts end up with a '.git' regular file that
             ;; contains metadata about where their actual '.git' directory
             ;; lives.
             (and recursive?
                  (eq? 'regular (stat:type stat))))))

  (format log-port "updating checkout of '~a'...~%" url)
  (let*-values
      (((checkout commit _)
        (update-cached-checkout-x-ref url
                                      #:recursive? recursive?
                                      #:ref ref
                                      #:cache-directory
                                      (url-cache-directory url cache-directory
                                                           #:recursive?
                                                           recursive?)
                                      #:log-port log-port))
       ((name)
        (url+commit->name url commit)))
    (format log-port "retrieved commit ~a~%" commit)
    (values (add-to-store store name #t "sha256" checkout
                          #:select? (negate dot-git?))
            commit)))


(set-exception-printer! 'git-error print-git-error)


;;;
;;; Checkouts.
;;;



;; Representation of the "latest" checkout of a branch or a specific commit.
(define-record-type* <git-checkout-x-refs>
  git-checkout-x-refs make-git-checkout-x-refs
  git-checkout-x-refs?
  (url     git-checkout-x-refs-url)
  (branch  git-checkout-x-refs-branch (default #f))
  (commit  git-checkout-x-refs-commit (default #f))      ;#f | tag | commit
  (recursive? git-checkout-x-refs-recursive? (default #f)))



(define* (latest-repository-commit* url #:key ref recursive? log-port)
  ;; Monadic variant of 'latest-repository-commit'.
  (lambda (store)
    ;; The caller--e.g., (guix scripts build)--may not handle 'git-error' so
    ;; translate it into '&message' conditions that we know will be properly
    ;; handled.
    (catch 'git-error
      (lambda ()
        (values (latest-repository-commit-x-ref store url
                                                #:ref ref
                                                #:recursive? recursive?
                                                #:log-port log-port)
                store))
      (lambda (key error . _)
        (raise (condition
                (&message
                 (message
                  (match ref
                    (('commit . commit)
                     (format #f (G_ "cannot fetch commit ~a from ~a: ~a")
                             commit url (git-error-message error)))
                    (('branch . branch)
                     (format #f (G_ "cannot fetch branch '~a' from ~a: ~a")
                             branch url (git-error-message error)))
                    (_
                     (format #f (G_ "Git failure while fetching ~a: ~a")
                             url (git-error-message error))))))))))))



(define-gexp-compiler (git-checkout-x-refs-compiler (checkout <git-checkout-x-refs>)
                                                    system target)
  ;; "Compile" CHECKOUT by updating the local checkout and adding it to the
  ;; store.
  (match checkout
    (($ <git-checkout-x-refs> url branch commit recursive?)
     (latest-repository-commit* url
                                #:ref (cond (commit
                                             `(tag-or-commit . ,commit))
                                            (branch
                                             `(branch . ,branch))
                                            (else '()))
                                #:recursive? recursive?
                                #:log-port (current-error-port)))))