Upgrade the go.mod parser to handle the full go.mod spec, and to gracefully handle unexpected/malformed syntax. Restructure parser usage, making the parse tree available for other uses. guix/import/go.scm (parse-go.mod): Parse using (ice-9 peg) instead of regex matching for more robustness. Return a list of directives. (go.mod-directives): New procedure. (go.mod-requirements): New procedure. (go-module->guix-package): Use it. (%go.mod-require-directive-rx) (%go.mod-replace-directive-rx): Remove unused variable. tests/go.scm (testing-parse-mod): Adjust accordingly. (go.mod-requirements) (fixture-go-mod-unparseable) (fixture-go-mod-retract) (fixture-go-mod-strings): New variable. ("parse-go.mod: simple") ("parse-go.mod: comments and unparseable lines") ("parse-go.mod: retract") ("parse-go.mod: raw strings and quoted strings") ("parse-go.mod: complete"): New tests. --- Hello Guix, This one is pretty self-explanatory. This parser handles the full go.mod spec (notably: comments, extra whitespace, retract/exclude) and gracefully handles things it doesn't understand. It is a bit more complex, I suppose. WDYT? -- Sarah guix/import/go.scm | 247 +++++++++++++++++++++++---------------------- tests/go.scm | 132 +++++++++++++++++++++++- 2 files changed, 260 insertions(+), 119 deletions(-) diff --git a/guix/import/go.scm b/guix/import/go.scm index d8f838f635..69e71d01e5 100644 --- a/guix/import/go.scm +++ b/guix/import/go.scm @@ -41,6 +41,7 @@ #:autoload (guix base32) (bytevector->nix-base32-string) #:autoload (guix build utils) (mkdir-p) #:use-module (ice-9 match) + #:use-module (ice-9 peg) #:use-module (ice-9 rdelim) #:use-module (ice-9 receive) #:use-module (ice-9 regex) @@ -244,129 +245,139 @@ and VERSION and return an input port." (go-path-escape version)))) (http-fetch* url))) -(define %go.mod-require-directive-rx - ;; A line in a require directive is composed of a module path and - ;; a version separated by whitespace and an optionnal '//' comment at - ;; the end. - (make-regexp - (string-append - "^[[:blank:]]*([^[:blank:]]+)[[:blank:]]+" ;the module path - "([^[:blank:]]+)" ;the version - "([[:blank:]]+//.*)?"))) ;an optional comment - -(define %go.mod-replace-directive-rx - ;; ReplaceSpec = ModulePath [ Version ] "=>" FilePath newline - ;; | ModulePath [ Version ] "=>" ModulePath Version newline . - (make-regexp - (string-append - "([^[:blank:]]+)" ;the module path - "([[:blank:]]+([^[:blank:]]+))?" ;optional version - "[[:blank:]]+=>[[:blank:]]+" - "([^[:blank:]]+)" ;the file or module path - "([[:blank:]]+([^[:blank:]]+))?"))) ;the version (if a module path) (define (parse-go.mod content) - "Parse the go.mod file CONTENT, returning a list of requirements." - ;; We parse only a subset of https://golang.org/ref/mod#go-mod-file-grammar - ;; which we think necessary for our use case. - (define (toplevel requirements replaced) - "This is the main parser. The results are accumulated in THE REQUIREMENTS -and REPLACED lists." - (let ((line (read-line))) - (cond - ((eof-object? line) - ;; parsing ended, give back the result - (values requirements replaced)) - ((string=? line "require (") - ;; a require block begins, delegate parsing to IN-REQUIRE - (in-require requirements replaced)) - ((string=? line "replace (") - ;; a replace block begins, delegate parsing to IN-REPLACE - (in-replace requirements replaced)) - ((string-prefix? "require " line) - ;; a require directive by itself - (let* ((stripped-line (string-drop line 8))) - (call-with-values - (lambda () - (require-directive requirements replaced stripped-line)) - toplevel))) - ((string-prefix? "replace " line) - ;; a replace directive by itself - (let* ((stripped-line (string-drop line 8))) - (call-with-values - (lambda () - (replace-directive requirements replaced stripped-line)) - toplevel))) - (#t - ;; unrecognised line, ignore silently - (toplevel requirements replaced))))) - - (define (in-require requirements replaced) - (let ((line (read-line))) - (cond - ((eof-object? line) - ;; this should never happen here but we ignore silently - (values requirements replaced)) - ((string=? line ")") - ;; end of block, coming back to toplevel - (toplevel requirements replaced)) - (#t - (call-with-values (lambda () - (require-directive requirements replaced line)) - in-require))))) - - (define (in-replace requirements replaced) - (let ((line (read-line))) - (cond - ((eof-object? line) - ;; this should never happen here but we ignore silently - (values requirements replaced)) - ((string=? line ")") - ;; end of block, coming back to toplevel - (toplevel requirements replaced)) - (#t - (call-with-values (lambda () - (replace-directive requirements replaced line)) - in-replace))))) - - (define (replace-directive requirements replaced line) - "Extract replaced modules and new requirements from the replace directive -in LINE and add them to the REQUIREMENTS and REPLACED lists." - (let* ((rx-match (regexp-exec %go.mod-replace-directive-rx line)) - (module-path (match:substring rx-match 1)) - (version (match:substring rx-match 3)) - (new-module-path (match:substring rx-match 4)) - (new-version (match:substring rx-match 6)) - (new-replaced (cons (list module-path version) replaced)) - (new-requirements - (if (string-match "^\\.?\\./" new-module-path) - requirements - (cons (list new-module-path new-version) requirements)))) - (values new-requirements new-replaced))) - - (define (require-directive requirements replaced line) - "Extract requirement from LINE and augment the REQUIREMENTS and REPLACED -lists." - (let* ((rx-match (regexp-exec %go.mod-require-directive-rx line)) - (module-path (match:substring rx-match 1)) - ;; Double-quoted strings were seen in the wild without escape - ;; sequences; trim the quotes to be on the safe side. - (module-path (string-trim-both module-path #\")) - (version (match:substring rx-match 2))) - (values (cons (list module-path version) requirements) replaced))) - - (with-input-from-string content - (lambda () - (receive (requirements replaced) - (toplevel '() '()) - ;; At last remove the replaced modules from the requirements list. - (remove (lambda (r) - (assoc (car r) replaced)) - requirements))))) + "Parse the go.mod file CONTENT, returning a list of directives, comments, +and unknown lines. Each sublist begins with a symbol (go, module, require, +replace, exclude, retract, comment, or unknown) and is followed by one or more +sublists. Each sublist begins with a symbol (module-path, version, file-path, +comment, or unknown) and is followed by the indicated data." + ;; https://golang.org/ref/mod#go-mod-file-grammar + (define-peg-pattern NL none "\n") + (define-peg-pattern WS none (or " " "\t" "\r")) + (define-peg-pattern => none (and (* WS) "=>")) + (define-peg-pattern punctuation none (or "," "=>" "[" "]" "(" ")")) + (define-peg-pattern comment all + (and (ignore "//") (* WS) (* (and (not-followed-by NL) peg-any)))) + (define-peg-pattern EOL body (and (* WS) (? comment) NL)) + (define-peg-pattern block-start none (and (* WS) "(" EOL)) + (define-peg-pattern block-end none (and (* WS) ")" EOL)) + (define-peg-pattern any-line body + (and (* WS) (* (and (not-followed-by NL) peg-any)) EOL)) + + ;; Strings and identifiers + (define-peg-pattern identifier body + (+ (and (not-followed-by (or NL WS punctuation)) peg-any))) + (define-peg-pattern string-raw body + (and (ignore "`") (+ (and (not-followed-by "`") peg-any)) (ignore "`"))) + (define-peg-pattern string-quoted body + (and (ignore "\"") + (+ (or (and (ignore "\\") peg-any) + (and (not-followed-by "\"") peg-any))) + (ignore "\""))) + (define-peg-pattern string-or-ident body + (and (* WS) (or string-raw string-quoted identifier))) + + (define-peg-pattern version all string-or-ident) + (define-peg-pattern module-path all string-or-ident) + (define-peg-pattern file-path all string-or-ident) + + ;; Non-directive lines + (define-peg-pattern unknown all any-line) + (define-peg-pattern block-line body + (or EOL (and (not-followed-by block-end) unknown))) + + ;; GoDirective = "go" GoVersion newline . + (define-peg-pattern go all (and (ignore "go") version EOL)) + + ;; ModuleDirective = "module" ( ModulePath | "(" newline ModulePath newline ")" ) newline . + (define-peg-pattern module all + (and (ignore "module") (or (and block-start module-path EOL block-end) + (and module-path EOL)))) + + ;; The following directives may all be used solo or in a block + ;; RequireSpec = ModulePath Version newline . + (define-peg-pattern require all (and module-path version EOL)) + (define-peg-pattern require-top body + (and (ignore "require") + (or (and block-start (* (or require block-line)) block-end) require))) + + ;; ExcludeSpec = ModulePath Version newline . + (define-peg-pattern exclude all (and module-path version EOL)) + (define-peg-pattern exclude-top body + (and (ignore "exclude") + (or (and block-start (* (or exclude block-line)) block-end) exclude))) + + ;; ReplaceSpec = ModulePath [ Version ] "=>" FilePath newline + ;; | ModulePath [ Version ] "=>" ModulePath Version newline . + (define-peg-pattern original all (or (and module-path version) module-path)) + (define-peg-pattern with all (or (and module-path version) file-path)) + (define-peg-pattern replace all (and original => with EOL)) + (define-peg-pattern replace-top body + (and (ignore "replace") + (or (and block-start (* (or replace block-line)) block-end) replace))) + + ;; RetractSpec = ( Version | "[" Version "," Version "]" ) newline . + (define-peg-pattern range all + (and (* WS) (ignore "[") version + (* WS) (ignore ",") version (* WS) (ignore "]"))) + (define-peg-pattern retract all (and (or range version) EOL)) + (define-peg-pattern retract-top body + (and (ignore "retract") + (or (and block-start (* (or retract block-line)) block-end) retract))) + + (define-peg-pattern go-mod body + (* (and (* WS) (or go module require-top exclude-top replace-top + retract-top EOL unknown)))) + + (let ((tree (peg:tree (match-pattern go-mod content))) + (keywords '(go module require replace exclude retract comment unknown))) + (keyword-flatten keywords tree))) ;; Prevent inlining of this procedure, which is accessed by unit tests. (set! parse-go.mod parse-go.mod) +(define (go.mod-directives go.mod directive) + "Return the list of top-level directive bodies in GO.MOD matching the symbol +DIRECTIVE." + (filter-map (match-lambda + (((? (cut eq? <> directive) head) . rest) rest) + (_ #f)) + go.mod)) + +(define (go.mod-requirements go.mod) + "Compute and return the list of requirements specified by GO.MOD." + (define (replace directive requirements) + (define (maybe-replace module-path new-requirement) + ;; Do not allow version updates for indirect dependencies + ;; TODO: Is this correct behavior? It's in the go.mod for a reason... + (if (and (equal? module-path (first new-requirement)) + (not (assoc-ref requirements module-path))) + requirements + (cons new-requirement (alist-delete module-path requirements)))) + + (match directive + ((('original ('module-path module-path) . _) with . _) + (match with + (('with ('file-path _) . _) + (alist-delete module-path requirements)) + (('with ('module-path new-module-path) ('version new-version) . _) + (maybe-replace module-path + (list new-module-path new-version))))))) + + (define (require directive requirements) + (match directive + ((('module-path module-path) ('version version) . _) + (cons (list module-path version) requirements)))) + + (let* ((requires (go.mod-directives go.mod 'require)) + (replaces (go.mod-directives go.mod 'replace)) + (requirements (fold require '() requires))) + (fold replace requirements replaces))) + +;; Prevent inlining of this procedure, which is accessed by unit tests. +(set! go.mod-requirements go.mod-requirements) + (define-record-type (%make-vcs url-prefix root-regex type) vcs? @@ -588,7 +599,7 @@ When VERSION is unspecified, the latest version available is used." hint: use one of the following available versions ~a\n" version* available-versions)))) (content (fetch-go.mod goproxy module-path version*)) - (dependencies+versions (parse-go.mod content)) + (dependencies+versions (go.mod-requirements (parse-go.mod content))) (dependencies (if pin-versions? dependencies+versions (map car dependencies+versions))) diff --git a/tests/go.scm b/tests/go.scm index b088ab50d2..b16965d941 100644 --- a/tests/go.scm +++ b/tests/go.scm @@ -1,5 +1,6 @@ ;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2021 François Joulaud +;;; Copyright © 2021 Sarah Morgensen ;;; ;;; This file is part of GNU Guix. ;;; @@ -31,6 +32,9 @@ #:use-module (srfi srfi-64) #:use-module (web response)) +(define go.mod-requirements + (@@ (guix import go) go.mod-requirements)) + (define parse-go.mod (@@ (guix import go) parse-go.mod)) @@ -96,6 +100,40 @@ replace ( ") +(define fixture-go-mod-unparseable + "module my/thing +go 1.12 // avoid feature X +require other/thing v1.0.2 +// Security issue: CVE-XXXXX +exclude old/thing v1.2.3 +new-directive another/thing yet-another/thing +replace ( + bad/thing v1.4.5 => good/thing v1.4.5 + // Unparseable + bad/thing [v1.4.5, v1.9.7] => good/thing v2.0.0 +) +") + +(define fixture-go-mod-retract + "retract v0.9.1 + +retract ( + v1.9.2 + [v1.0.0, v1.7.9] +) +") + +(define fixture-go-mod-strings + "require `example.com/\"some-repo\"` v1.9.3 +require ( + `example.com/\"another.repo\"` v1.0.0 + \"example.com/special!repo\" v9.3.1 +) +replace \"example.com/\\\"some-repo\\\"\" => `launchpad.net/some-repo` v1.9.3 +replace ( + \"example.com/\\\"another.repo\\\"\" => launchpad.net/another-repo v1.0.0 +) +") (define fixture-latest-for-go-check @@ -185,7 +223,7 @@ require github.com/kr/pretty v0.2.1 (string good/thing v2.0.0")) + (parse-go.mod fixture-go-mod-unparseable)) + +(test-equal "parse-go.mod: retract" + `((retract (version "v0.9.1")) + (retract (version "v1.9.2")) + (retract (range (version "v1.0.0") (version "v1.7.9")))) + (parse-go.mod fixture-go-mod-retract)) + +(test-equal "parse-go.mod: raw strings and quoted strings" + `((require (module-path "example.com/\"some-repo\"") (version "v1.9.3")) + (require (module-path "example.com/\"another.repo\"") (version "v1.0.0")) + (require (module-path "example.com/special!repo") (version "v9.3.1")) + (replace (original (module-path "example.com/\"some-repo\"")) + (with (module-path "launchpad.net/some-repo") (version "v1.9.3"))) + (replace (original (module-path "example.com/\"another.repo\"")) + (with (module-path "launchpad.net/another-repo") (version "v1.0.0")))) + (parse-go.mod fixture-go-mod-strings)) + +(test-equal "parse-go.mod: complete" + `((module (module-path "M")) + (go (version "1.13")) + (replace (original (module-path "github.com/myname/myproject/myapi")) + (with (file-path "./api"))) + (replace (original (module-path "github.com/mymname/myproject/thissdk")) + (with (file-path "../sdk"))) + (replace (original (module-path "launchpad.net/gocheck")) + (with (module-path "github.com/go-check/check") + (version "v0.0.0-20140225173054-eb6ee6f84d0a"))) + (require (module-path "github.com/user/project") + (version "v1.1.11")) + (require (module-path "github.com/user/project/sub/directory") + (version "v1.1.12")) + (require (module-path "bitbucket.org/user/project") + (version "v1.11.20")) + (require (module-path "bitbucket.org/user/project/sub/directory") + (version "v1.11.21")) + (require (module-path "launchpad.net/project") + (version "v1.1.13")) + (require (module-path "launchpad.net/project/series") + (version "v1.1.14")) + (require (module-path "launchpad.net/project/series/sub/directory") + (version "v1.1.15")) + (require (module-path "launchpad.net/~user/project/branch") + (version "v1.1.16")) + (require (module-path "launchpad.net/~user/project/branch/sub/directory") + (version "v1.1.17")) + (require (module-path "hub.jazz.net/git/user/project") + (version "v1.1.18")) + (require (module-path "hub.jazz.net/git/user/project/sub/directory") + (version "v1.1.19")) + (require (module-path "k8s.io/kubernetes/subproject") + (version "v1.1.101")) + (require (module-path "one.example.com/abitrary/repo") + (version "v1.1.111")) + (require (module-path "two.example.com/abitrary/repo") + (version "v0.0.2")) + (require (module-path "quoted.example.com/abitrary/repo") + (version "v0.0.2")) + (replace (original (module-path "two.example.com/abitrary/repo")) + (with (module-path "github.com/corp/arbitrary-repo") + (version "v0.0.2"))) + (replace (original (module-path "golang.org/x/sys")) + (with (module-path "golang.org/x/sys") + (version "v0.0.0-20190813064441-fde4db37ae7a")) + (comment "pinned to release-branch.go1.13")) + (replace (original (module-path "golang.org/x/tools")) + (with (module-path "golang.org/x/tools") + (version "v0.0.0-20190821162956-65e3620a7ae7")) + (comment "pinned to release-branch.go1.13"))) + (parse-go.mod fixture-go-mod-complete)) + ;;; End-to-end tests for (guix import go) (define (mock-http-fetch testcase) (lambda (url . rest) base-commit: 3ee0f170c8bd883728d8abb2c2e00f445c13f17d -- 2.31.1