;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2020 Katherine Cox-Buday ;;; Copyright © 2020 Helio Machado <0x2b3bfa0+guix@googlemail.com> ;;; Copyright © 2021 François Joulaud ;;; ;;; This file is part of GNU Guix. ;;; ;;; GNU Guix is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 3 of the License, or (at ;;; your option) any later version. ;;; ;;; GNU Guix is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with GNU Guix. If not, see . ;;; (guix import golang) wants to make easier to create Guix package ;;; declaration for Go modules. ;;; ;;; Modules in Go are "collection of related Go packages" which are ;;; "the unit of source code interchange and versioning". ;;; Modules are generally hosted in a repository. ;;; ;;; At this point it should handle correctly modules which ;;; have only Go dependencies and are accessible from proxy.golang.org ;;; (or configured GOPROXY). ;;; ;;; We want it to work more or less this way: ;;; - get latest version for the module from GOPROXY ;;; - infer VCS root repo from which we will check-out source by ;;; + recognising known patterns (like github.com) ;;; + or (TODO) recognising .vcs suffix ;;; + or parsing meta tag in html served at the URL ;;; + or (TODO) if nothing else works by using zip file served by GOPROXY ;;; - get go.mod from GOPROXY (which is able to synthetize one if needed) ;;; - extract list of dependencies from this go.mod ;;; ;;; We translate Go module paths to a Guix package name under the ;;; assumption that there will be no collision. ;;; TODO list ;;; - get correct hash in vcs->origin ;;; - print partial result during recursive imports (need to catch ;;; exceptions) ;;; - infer repo from module path with VCS qualifier ;;; (e.g. site.example/my/path/to/repo.git/and/subdir/module) ;;; - don't print fetch messages to stdout ;;; - pre-fill synopsis, description and license (define-module (guix import go) #:use-module (ice-9 match) #:use-module (ice-9 rdelim) #:use-module (ice-9 receive) #:use-module (ice-9 regex) #:use-module (guix build-system go) #:use-module (htmlprag) #:use-module (sxml xpath) #:use-module (srfi srfi-1) #:use-module (srfi srfi-9) #:use-module (srfi srfi-11) #:use-module (json) #:use-module ((guix download) #:prefix download:) #:use-module (guix git) #:use-module (guix import utils) #:use-module (guix import json) #:use-module (guix packages) #:use-module (guix upstream) #:use-module (guix utils) #:use-module ((guix licenses) #:prefix license:) #:use-module (guix base16) #:use-module (guix base32) #:use-module (guix memoization) #:use-module ((guix build download) #:prefix build-download:) #:use-module (web uri) #:export (go-module->guix-package go-module-recursive-import infer-module-root-repo)) (define (go-path-escape path) "Escape a module path by replacing every uppercase letter with an exclamation mark followed with its lowercase equivalent, as per the module Escaped Paths specification. https://godoc.org/golang.org/x/mod/module#hdr-Escaped_Paths" (define (escape occurrence) (string-append "!" (string-downcase (match:substring occurrence)))) (regexp-substitute/global #f "[A-Z]" path 'pre escape 'post)) (define (go-module-latest-version goproxy-url module-path) "Fetches the version number of the latest version for MODULE-PATH from the given GOPROXY-URL server." (assoc-ref (json-fetch (format #f "~a/~a/@latest" goproxy-url (go-path-escape module-path))) "Version")) (define go-module-latest-version* (memoize go-module-latest-version)) (define (fetch-go.mod goproxy-url module-path version file) "Fetches go.mod from the given GOPROXY-URL server for the given MODULE-PATH and VERSION." (let ((url (format #f "~a/~a/@v/~a.mod" goproxy-url (go-path-escape module-path) (go-path-escape version)))) (parameterize ((current-output-port (current-error-port))) (build-download:url-fetch url file #:print-build-trace? #f)))) (define (parse-go.mod go.mod-path) (parse-go.mod-port (open-input-file go.mod-path))) (define (parse-go.mod-port go.mod-port) "PARSE-GO.MOD takes a filename in GO.MOD-PATH and extract a list of requirements from it." ;; We parse only a subset of https://golang.org/ref/mod#go-mod-file-grammar ;; which we think necessary for our use case. (define (toplevel results) "Main parser, RESULTS is a pair of alist serving as accumulator for all encountered requirements and replacements." (let ((line (read-line))) (cond ((eof-object? line) ;; parsing ended, give back the result results) ((string=? line "require (") ;; a require block begins, delegate parsing to IN-REQUIRE (in-require results)) ((string=? line "replace (") ;; a replace block begins, delegate parsing to IN-REPLACE (in-replace results)) ((string-prefix? "require " line) ;; a require directive by itself (let* ((stripped-line (string-drop line 8)) (new-results (require-directive results stripped-line))) (toplevel new-results))) ((string-prefix? "replace " line) ;; a replace directive by itself (let* ((stripped-line (string-drop line 8)) (new-results (replace-directive results stripped-line))) (toplevel new-results))) (#t ;; unrecognised line, ignore silently (toplevel results))))) (define (in-require results) (let ((line (read-line))) (cond ((eof-object? line) ;; this should never happen here but we ignore silently results) ((string=? line ")") ;; end of block, coming back to toplevel (toplevel results)) (#t (in-require (require-directive results line)))))) (define (in-replace results) (let ((line (read-line))) (cond ((eof-object? line) ;; this should never happen here but we ignore silently results) ((string=? line ")") ;; end of block, coming back to toplevel (toplevel results)) (#t (in-replace (replace-directive results line)))))) (define (replace-directive results line) "Extract replaced modules and new requirements from replace directive in LINE and add to RESULTS." ;; ReplaceSpec = ModulePath [ Version ] "=>" FilePath newline ;; | ModulePath [ Version ] "=>" ModulePath Version newline . (let* ((requirements (car results)) (replaced (cdr results)) (re (string-concatenate '("([^[:blank:]]+)([[:blank:]]+([^[:blank:]]+))?" "[[:blank:]]+" "=>" "[[:blank:]]+" "([^[:blank:]]+)([[:blank:]]+([^[:blank:]]+))?"))) (match (string-match re line)) (module-path (match:substring match 1)) (version (match:substring match 3)) (new-module-path (match:substring match 4)) (new-version (match:substring match 6)) (new-replaced (acons module-path version replaced)) (new-requirements (if (string-match "^\\.?\\./" new-module-path) requirements (acons new-module-path new-version requirements)))) (cons new-requirements new-replaced))) (define (require-directive results line) "Extract requirement from LINE and add it to RESULTS." (let* ((requirements (car results)) (replaced (cdr results)) ;; A line in a require directive is composed of a module path and ;; a version separated by whitespace and an optionnal '//' comment at ;; the end. (re (string-concatenate '("^[[:blank:]]*" "([^[:blank:]]+)[[:blank:]]+([^[:blank:]]+)" "([[:blank:]]+//.*)?"))) (match (string-match re line)) (module-path (match:substring match 1)) ;; we saw double-quoted string in the wild without escape ;; sequences so we just trim the quotes (module-path (string-trim-both module-path #\")) (version (match:substring match 2))) (cons (acons module-path version requirements) replaced))) (with-input-from-port go.mod-port (lambda () (let* ((results (toplevel '(() . ()))) (requirements (car results)) (replaced (cdr results))) ;; At last we remove replaced modules from the requirements list (fold (lambda (replacedelem requirements) (alist-delete! (car replacedelem) requirements)) requirements replaced))))) (define (infer-module-root-repo module-path) "Go modules can be defined at any level of a repository's tree, but querying for the meta tag usually can only be done at the webpage at the root of the repository. Therefore, it is sometimes necessary to try and derive a module's root path from its path. For a set of well-known forges, the pattern of what consists of a module's root page is known before hand." ;; See the following URL for the official Go equivalent: ;; https://github.com/golang/go/blob/846dce9d05f19a1f53465e62a304dea21b99f910/src/cmd/go/internal/vcs/vcs.go#L1026-L1087 ;; ;; TODO: handle module path with VCS qualifier as described in ;; https://golang.org/ref/mod#vcs-find and ;; https://golang.org/cmd/go/#hdr-Remote_import_paths (define-record-type (make-vcs url-prefix root-regex type) vcs? (url-prefix vcs-url-prefix) (root-regex vcs-root-regex) (type vcs-type)) (let* ((known-vcs (list (make-vcs "github.com" "^(github\\.com/[A-Za-z0-9_.\\-]+/[A-Za-z0-9_.\\-]+)(/[A-Za-z0-9_.\\-]+)*$" 'git) (make-vcs "bitbucket.org" "^(bitbucket\\.org/([A-Za-z0-9_.\\-]+/[A-Za-z0-9_.\\-]+))(/[A-Za-z0-9_.\\-]+)*$" 'unknown) (make-vcs "hub.jazz.net/git/" "^(hub\\.jazz\\.net/git/[a-z0-9]+/[A-Za-z0-9_.\\-]+)(/[A-Za-z0-9_.\\-]+)*$" 'git) (make-vcs "git.apache.org" "^(git\\.apache\\.org/[a-z0-9_.\\-]+\\.git)(/[A-Za-z0-9_.\\-]+)*$" 'git) (make-vcs "git.openstack.org" "^(git\\.openstack\\.org/[A-Za-z0-9_.\\-]+/[A-Za-z0-9_.\\-]+)(\\.git)?(/[A-Za-z0-9_.\\-]+)*$" 'git))) (vcs (find (lambda (vcs) (string-prefix? (vcs-url-prefix vcs) module-path)) known-vcs))) (if vcs (match:substring (string-match (vcs-root-regex vcs) module-path) 1) module-path))) (define (go-module->guix-package-name module-path) "Converts a module's path to the canonical Guix format for Go packages." (string-downcase (string-append "go-" (string-replace-substring (string-replace-substring module-path "." "-") "/" "-")))) (define-record-type (make-module-meta import-prefix vcs repo-root) module-meta? (import-prefix module-meta-import-prefix) ;; VCS field is a symbol (vcs module-meta-vcs) (repo-root module-meta-repo-root)) (define (fetch-module-meta-data module-path) "Fetches module meta-data from a module's landing page. This is necessary because goproxy servers don't currently provide all the information needed to build a package." ;; (define (meta-go-import->module-meta text) "Takes the content of the go-import meta tag as TEXT and gives back a MODULE-META record" (define (get-component s start) (let* ((start (string-skip s char-set:whitespace start)) (end (string-index s char-set:whitespace start)) (end (if end end (string-length s))) (result (substring s start end))) (values result end))) (let*-values (((import-prefix end) (get-component text 0)) ((vcs end) (get-component text end)) ((repo-root end) (get-component text end))) (make-module-meta import-prefix (string->symbol vcs) repo-root))) (define (html->meta-go-import port) "Read PORT with HTML content. Find the go-import meta tag and gives back its content as a string." (let* ((parsedhtml (html->sxml port)) (extract-content (node-join (select-kids (node-typeof? 'html)) (select-kids (node-typeof? 'head)) (select-kids (node-typeof? 'meta)) (select-kids (node-typeof? '@)) (node-self (node-join (select-kids (node-typeof? 'name)) (select-kids (node-equal? "go-import")))) (select-kids (node-typeof? 'content)) (select-kids (lambda (_) #t)))) (content (car (extract-content parsedhtml)))) content)) (let* ((port (build-download:http-fetch (string->uri (format #f "https://~a?go-get=1" module-path)))) (meta-go-import (html->meta-go-import port)) (module-metadata (meta-go-import->module-meta meta-go-import))) (close-port port) module-metadata)) (define (module-meta-data-repo-url meta-data goproxy-url) "Return the URL where the fetcher which will be used can download the source control." (if (member (module-meta-vcs meta-data)'(fossil mod)) goproxy-url (module-meta-repo-root meta-data))) (define (vcs->origin vcs-type vcs-repo-url version file) "Generate the `origin' block of a package depending on what type of source control system is being used." (case vcs-type ((git) `(origin (method git-fetch) (uri (git-reference (url ,vcs-repo-url) (commit (go-version->git-ref version)))) (file-name (git-file-name name version)) (sha256 (base32 ;; FIXME: get hash for git repo checkout "0000000000000000000000000000000000000000000000000000")))) ((hg) `(origin (method hg-fetch) (uri (hg-reference (url ,vcs-repo-url) (changeset ,version))) (file-name (format #f "~a-~a-checkout" name version)))) ((svn) `(origin (method svn-fetch) (uri (svn-reference (url ,vcs-repo-url) (revision (string->number version)) (recursive? #f))) (file-name (format #f "~a-~a-checkout" name version)) (sha256 (base32 ,(guix-hash-url file))))) (else (raise-exception (format #f "unsupported vcs type: ~a" vcs-type))))) (define* (go-module->guix-package module-path #:key (goproxy-url "https://proxy.golang.org")) (call-with-temporary-output-file (lambda (temp port) (let* ((latest-version (go-module-latest-version* goproxy-url module-path)) (go.mod-path (fetch-go.mod goproxy-url module-path latest-version temp)) (dependencies (map car (parse-go.mod temp))) (guix-name (go-module->guix-package-name module-path)) (root-module-path (infer-module-root-repo module-path)) ;; VCS type and URL are not included in goproxy information. For ;; this we need to fetch it from the official module page. (meta-data (fetch-module-meta-data root-module-path)) (vcs-type (module-meta-vcs meta-data)) (vcs-repo-url (module-meta-data-repo-url meta-data goproxy-url))) (values `(package (name ,guix-name) ;; Elide the "v" prefix Go uses (version ,(string-trim latest-version #\v)) (source ,(vcs->origin vcs-type vcs-repo-url latest-version temp)) (build-system go-build-system) (arguments '(#:import-path ,root-module-path)) ,@(maybe-inputs (map go-module->guix-package-name dependencies)) ;; TODO(katco): It would be nice to make an effort to fetch this ;; from known forges, e.g. GitHub (home-page ,(format #f "https://~a" root-module-path)) (synopsis "A Go package") (description ,(format #f "~a is a Go package." guix-name)) (license #f)) dependencies))))) (define go-module->guix-package* (memoize go-module->guix-package)) (define* (go-module-recursive-import package-name #:key (goproxy-url "https://proxy.golang.org")) (recursive-import package-name #:repo->guix-package (lambda* (name . _) (go-module->guix-package* name #:goproxy-url goproxy-url)) #:guix-name go-module->guix-package-name))