From: Maxim Cournoyer <maxim.cournoyer@gmail.com>
To: 47310@debbugs.gnu.org
Cc: Maxim Cournoyer <maxim.cournoyer@gmail.com>
Subject: [bug#47310] [PATCH 2/4] import: go: Improve synopsis and description parsing.
Date: Mon, 22 Mar 2021 00:20:20 -0400 [thread overview]
Message-ID: <20210322042022.17385-2-maxim.cournoyer@gmail.com> (raw)
In-Reply-To: <20210322042022.17385-1-maxim.cournoyer@gmail.com>
* guix/import/go.scm (%strict-tokenizer?): Set parameter to #t.
(go-path-escape): Redefine to prevent inlining.
(http-get*): Replace by ...
(http-fetch*): this ...
(json-fetch*): New procedure.
(go.pkg.dev-info): Use http-fetch*.
(go-package-licenses): Rewrite in terms of go.pkg.dev-info.
(go-package-description): Likewise.
(go-package-synopsis): Likewise.
(fetch-go.mod): Use the memoized http-fetch*.
(parse-go.mod): Adjust to receive content as a string.
(fetch-module-meta-data): Adjust to use http-fetch*.
(go-module->guix-package): Adjust to the modified fetch-go.mod return value.
[inputs]: Use propagated inputs, which is the most common situations for Go
libraries.
[description]: Beautify description.
[licenses]: Do no check for #f. The result of the license parsing is always a
list.
* tests/go.scm: Adjust following above changes.
---
guix/import/go.scm | 213 ++++++++++++++++++++++++++----------------
guix/import/utils.scm | 4 +-
tests/go.scm | 75 +++++++--------
3 files changed, 170 insertions(+), 122 deletions(-)
diff --git a/guix/import/go.scm b/guix/import/go.scm
index 7452b4c903..2ef2d872e8 100644
--- a/guix/import/go.scm
+++ b/guix/import/go.scm
@@ -32,7 +32,7 @@
#:use-module (guix http-client)
#:use-module ((guix licenses) #:prefix license:)
#:use-module (guix memoization)
- #:autoload (htmlprag) (html->sxml) ;from Guile-Lib
+ #:use-module (htmlprag) ;from Guile-Lib
#:autoload (guix git) (update-cached-checkout)
#:autoload (gcrypt hash) (open-hash-port hash-algorithm sha256)
#:autoload (guix serialization) (write-file)
@@ -42,20 +42,28 @@
#:use-module (ice-9 rdelim)
#:use-module (ice-9 receive)
#:use-module (ice-9 regex)
+ #:use-module (ice-9 textual-ports)
#:use-module ((rnrs io ports) #:select (call-with-port))
#:use-module (srfi srfi-1)
+ #:use-module (srfi srfi-2)
#:use-module (srfi srfi-9)
#:use-module (srfi srfi-11)
#:use-module (srfi srfi-26)
- #:use-module (sxml xpath)
+ #:use-module (sxml match)
+ #:use-module ((sxml xpath) #:renamer (lambda (s)
+ (if (eq? 'filter s)
+ 'xfilter
+ s)))
#:use-module (web client)
#:use-module (web response)
#:use-module (web uri)
- #:export (go-path-escape
- go-module->guix-package
+ #:export (go-module->guix-package
go-module-recursive-import))
+;;; Parameterize htmlprag to parse valid HTML more reliably.
+(%strict-tokenizer? #t)
+
;;; Commentary:
;;;
;;; (guix import go) attempts to make it easier to create Guix package
@@ -89,6 +97,14 @@
;;; Code:
+(define http-fetch*
+ ;; Like http-fetch, but memoized and returning the body as a string.
+ (memoize (lambda args
+ (call-with-port (apply http-fetch args) get-string-all))))
+
+(define json-fetch*
+ (memoize json-fetch))
+
(define (go-path-escape path)
"Escape a module path by replacing every uppercase letter with an
exclamation mark followed with its lowercase equivalent, as per the module
@@ -98,54 +114,73 @@ https://godoc.org/golang.org/x/mod/module#hdr-Escaped_Paths)."
(string-append "!" (string-downcase (match:substring occurrence))))
(regexp-substitute/global #f "[A-Z]" path 'pre escape 'post))
+;; Prevent inlining of this procedure, which is accessed by unit tests.
+(set! go-path-escape go-path-escape)
+
+(define (go.pkg.dev-info name)
+ (http-fetch* (string-append "https://pkg.go.dev/" name)))
+
(define (go-module-latest-version goproxy-url module-path)
"Fetch the version number of the latest version for MODULE-PATH from the
given GOPROXY-URL server."
- (assoc-ref (json-fetch (format #f "~a/~a/@latest" goproxy-url
- (go-path-escape module-path)))
+ (assoc-ref (json-fetch* (format #f "~a/~a/@latest" goproxy-url
+ (go-path-escape module-path)))
"Version"))
-
(define (go-package-licenses name)
"Retrieve the list of licenses that apply to NAME, a Go package or module
-name (e.g. \"github.com/golang/protobuf/proto\"). The data is scraped from
-the https://pkg.go.dev/ web site."
- (let*-values (((url) (string-append "https://pkg.go.dev/" name
- "?tab=licenses"))
- ((response body) (http-get url))
- ;; Extract the text contained in a h2 child node of any
- ;; element marked with a "License" class attribute.
- ((select) (sxpath `(// (* (@ (equal? (class "License"))))
- h2 // *text*))))
- (and (eq? (response-code response) 200)
- (match (select (html->sxml body))
- (() #f) ;nothing selected
- (licenses licenses)))))
-
-(define (go.pkg.dev-info name)
- (http-get (string-append "https://pkg.go.dev/" name)))
-(define go.pkg.dev-info*
- (memoize go.pkg.dev-info))
+name (e.g. \"github.com/golang/protobuf/proto\")."
+ (let* ((body (go.pkg.dev-info (string-append name "?tab=licenses")))
+ ;; Extract the text contained in a h2 child node of any
+ ;; element marked with a "License" class attribute.
+ (select (sxpath `(// (* (@ (equal? (class "License"))))
+ h2 // *text*))))
+ (select (html->sxml body))))
+
+(define (sxml->texi sxml-node)
+ "A very basic SXML to Texinfo converter which attempts to preserve HTML
+formatting and links as text."
+ (sxml-match sxml-node
+ ((strong ,text)
+ (format #f "@strong{~a}" text))
+ ((a (@ (href ,url)) ,text)
+ (format #f "@url{~a,~a}" url text))
+ ((code ,text)
+ (format #f "@code{~a}" text))
+ (,something-else something-else)))
(define (go-package-description name)
"Retrieve a short description for NAME, a Go package name,
-e.g. \"google.golang.org/protobuf/proto\". The data is scraped from the
-https://pkg.go.dev/ web site."
- (let*-values (((response body) (go.pkg.dev-info* name))
- ;; Extract the text contained in a h2 child node of any
- ;; element marked with a "License" class attribute.
- ((select) (sxpath
- `(// (section
- (@ (equal? (class "Documentation-overview"))))
- (p 1)))))
- (and (eq? (response-code response) 200)
- (match (select (html->sxml body))
- (() #f) ;nothing selected
- (((p . strings))
- ;; The paragraph text is returned as a list of strings embedding
- ;; newline characters. Join them and strip the newline
- ;; characters.
- (string-delete #\newline (string-join strings)))))))
+e.g. \"google.golang.org/protobuf/proto\"."
+ (let* ((body (go.pkg.dev-info name))
+ (sxml (html->sxml body))
+ (overview ((sxpath
+ `(//
+ (* (@ (equal? (class "Documentation-overview"))))
+ (p 1))) sxml))
+ ;; Sometimes, the first paragraph just contains images/links that
+ ;; has only "\n" for text. The following filter is designed to
+ ;; omit it.
+ (contains-text? (lambda (node)
+ (remove string-null?
+ (map string-trim-both
+ (filter (node-typeof? '*text*)
+ (cdr node))))))
+ (select-content (sxpath
+ `(//
+ (* (@ (equal? (class "UnitReadme-content"))))
+ div // p ,(xfilter contains-text?))))
+ ;; Fall-back to use content; this is less desirable as it is more
+ ;; verbose, but not every page has an overview.
+ (description (if (not (null? overview))
+ overview
+ (select-content sxml)))
+ (description* (and (not (null? description))
+ (first description))))
+ (match description*
+ (() #f) ;nothing selected
+ ((p elements ...)
+ (apply string-append (filter string? (map sxml->texi elements)))))))
(define (go-package-synopsis module-name)
"Retrieve a short synopsis for a Go module named MODULE-NAME,
@@ -153,17 +188,17 @@ e.g. \"google.golang.org/protobuf\". The data is scraped from
the https://pkg.go.dev/ web site."
;; Note: Only the *module* (rather than package) page has the README title
;; used as a synopsis on the https://pkg.go.dev web site.
- (let*-values (((response body) (go.pkg.dev-info* module-name))
- ;; Extract the text contained in a h2 child node of any
- ;; element marked with a "License" class attribute.
- ((select) (sxpath
- `(// (div (@ (equal? (class "UnitReadme-content"))))
- // h3 *text*))))
- (and (eq? (response-code response) 200)
- (match (select (html->sxml body))
- (() #f) ;nothing selected
- ((title more ...) ;title is the first string of the list
- (string-trim-both title))))))
+ (let* ((url (string-append "https://pkg.go.dev/" module-name))
+ (body (http-fetch* url))
+ ;; Extract the text contained in a h2 child node of any
+ ;; element marked with a "License" class attribute.
+ (select-title (sxpath
+ `(// (div (@ (equal? (class "UnitReadme-content"))))
+ // h3 *text*))))
+ (match (select-title (html->sxml body))
+ (() #f) ;nothing selected
+ ((title more ...) ;title is the first string of the list
+ (string-trim-both title)))))
(define (list->licenses licenses)
"Given a list of LICENSES mostly following the SPDX conventions, return the
@@ -188,13 +223,13 @@ corresponding Guix license or 'unknown-license!"
'unknown-license!)))
licenses))
-(define (fetch-go.mod goproxy-url module-path version)
- "Fetches go.mod from the given GOPROXY-URL server for the given MODULE-PATH
-and VERSION."
- (let ((url (format #f "~a/~a/@v/~a.mod" goproxy-url
+(define (fetch-go.mod goproxy module-path version)
+ "Fetch go.mod from the given GOPROXY server for the given MODULE-PATH
+and VERSION and return an input port."
+ (let ((url (format #f "~a/~a/@v/~a.mod" goproxy
(go-path-escape module-path)
(go-path-escape version))))
- (http-fetch url)))
+ (http-fetch* url)))
(define %go.mod-require-directive-rx
;; A line in a require directive is composed of a module path and
@@ -215,9 +250,8 @@ and VERSION."
"[[:blank:]]+" "=>" "[[:blank:]]+"
"([^[:blank:]]+)([[:blank:]]+([^[:blank:]]+))?")))
-(define (parse-go.mod port)
- "Parse the go.mod file accessible via the input PORT, returning a list of
-requirements."
+(define (parse-go.mod content)
+ "Parse the go.mod file CONTENT, returning a list of requirements."
(define-record-type <results>
(make-results requirements replacements)
results?
@@ -228,7 +262,7 @@ requirements."
(define (toplevel results)
"Main parser, RESULTS is a pair of alist serving as accumulator for
all encountered requirements and replacements."
- (let ((line (read-line port)))
+ (let ((line (read-line)))
(cond
((eof-object? line)
;; parsing ended, give back the result
@@ -254,7 +288,7 @@ requirements."
(toplevel results)))))
(define (in-require results)
- (let ((line (read-line port)))
+ (let ((line (read-line)))
(cond
((eof-object? line)
;; this should never happen here but we ignore silently
@@ -266,7 +300,7 @@ requirements."
(in-require (require-directive results line))))))
(define (in-replace results)
- (let ((line (read-line port)))
+ (let ((line (read-line)))
(cond
((eof-object? line)
;; this should never happen here but we ignore silently
@@ -305,7 +339,9 @@ requirements."
(($ <results> requirements replaced)
(make-results (alist-cons module-path version requirements) replaced)))))
- (let ((results (toplevel (make-results '() '()))))
+ (let ((results (with-input-from-string content
+ (lambda _
+ (toplevel (make-results '() '()))))))
(match results
(($ <results> requirements replaced)
;; At last we remove replaced modules from the requirements list
@@ -324,8 +360,10 @@ requirements."
(url-prefix vcs-url-prefix)
(root-regex vcs-root-regex)
(type vcs-type))
+
(define (make-vcs prefix regexp type)
- (%make-vcs prefix (make-regexp regexp) type))
+ (%make-vcs prefix (make-regexp regexp) type))
+
(define known-vcs
;; See the following URL for the official Go equivalent:
;; https://github.com/golang/go/blob/846dce9d05f19a1f53465e62a304dea21b99f910/src/cmd/go/internal/vcs/vcs.go#L1026-L1087
@@ -384,6 +422,14 @@ hence the need to derive this information."
"." "-")
"/" "-"))))
+(define (strip-.git-suffix/maybe repo-url)
+ "Strip a repository URL '.git' suffix from REPO-URL if hosted at GitHub."
+ (match repo-url
+ ((and (? (cut string-prefix? "https://github.com" <>))
+ (? (cut string-suffix? ".git" <>)))
+ (string-drop-right repo-url 4))
+ (_ repo-url)))
+
(define-record-type <module-meta>
(make-module-meta import-prefix vcs repo-root)
module-meta?
@@ -396,21 +442,22 @@ hence the need to derive this information."
because goproxy servers don't currently provide all the information needed to
build a package."
;; <meta name="go-import" content="import-prefix vcs repo-root">
- (let* ((port (http-fetch (format #f "https://~a?go-get=1" module-path)))
+ (let* ((meta-data (http-fetch* (format #f "https://~a?go-get=1" module-path)))
(select (sxpath `(// head (meta (@ (equal? (name "go-import"))))
// content))))
- (match (select (call-with-port port html->sxml))
- (() #f) ;nothing selected
+ (match (select (html->sxml meta-data))
+ (() #f) ;nothing selected
(((content content-text))
(match (string-split content-text #\space)
((root-path vcs repo-url)
- (make-module-meta root-path (string->symbol vcs) repo-url)))))))
+ (make-module-meta root-path (string->symbol vcs)
+ (strip-.git-suffix/maybe repo-url))))))))
-(define (module-meta-data-repo-url meta-data goproxy-url)
+(define (module-meta-data-repo-url meta-data goproxy)
"Return the URL where the fetcher which will be used can download the
source."
(if (member (module-meta-vcs meta-data) '(fossil mod))
- goproxy-url
+ goproxy
(module-meta-repo-root meta-data)))
;; XXX: Copied from (guix scripts hash).
@@ -463,6 +510,9 @@ control system is being used."
(method git-fetch)
(uri (git-reference
(url ,vcs-repo-url)
+ ;; This is done because the version field of the package,
+ ;; which the generated quoted expression refers to, has been
+ ;; stripped of any 'v' prefixed.
(commit ,(if (and plain-version? v-prefixed?)
'(string-append "v" version)
'(go-version->git-ref version)))))
@@ -502,8 +552,8 @@ control system is being used."
(define* (go-module->guix-package module-path #:key
(goproxy-url "https://proxy.golang.org"))
(let* ((latest-version (go-module-latest-version goproxy-url module-path))
- (port (fetch-go.mod goproxy-url module-path latest-version))
- (dependencies (map car (call-with-port port parse-go.mod)))
+ (content (fetch-go.mod goproxy-url module-path latest-version))
+ (dependencies (map car (parse-go.mod content)))
(guix-name (go-module->guix-package-name module-path))
(root-module-path (module-path->repository-root module-path))
;; The VCS type and URL are not included in goproxy information. For
@@ -524,14 +574,17 @@ control system is being used."
(build-system go-build-system)
(arguments
'(#:import-path ,root-module-path))
- ,@(maybe-inputs (map go-module->guix-package-name dependencies))
+ ,@(maybe-propagated-inputs
+ (map go-module->guix-package-name dependencies))
(home-page ,(format #f "https://~a" root-module-path))
(synopsis ,synopsis)
- (description ,description)
- (license ,(match (and=> licenses list->licenses)
- ((license) license)
- ((licenses ...) `(list ,@licenses))
- (x x))))
+ (description ,(and=> description beautify-description))
+ (license ,(match (list->licenses licenses)
+ (() #f) ;unknown license
+ ((license) ;a single license
+ license)
+ ((license ...) ;a list of licenses
+ `(list ,@license)))))
dependencies)))
(define go-module->guix-package* (memoize go-module->guix-package))
diff --git a/guix/import/utils.scm b/guix/import/utils.scm
index e55258e306..21fafad388 100644
--- a/guix/import/utils.scm
+++ b/guix/import/utils.scm
@@ -445,8 +445,8 @@ obtain a node's uniquely identifying \"key\"."
"Return a list of package expressions for PACKAGE-NAME and all its
dependencies, sorted in topological order. For each package,
call (REPO->GUIX-PACKAGE NAME :KEYS version repo), which should return a
-package expression and a list of dependencies; call (GUIX-NAME NAME) to
-obtain the Guix package name corresponding to the upstream name."
+package expression and a list of dependencies; call (GUIX-NAME PACKAGE-NAME)
+to obtain the Guix package name corresponding to the upstream name."
(define-record-type <node>
(make-node name version package dependencies)
node?
diff --git a/tests/go.scm b/tests/go.scm
index 6ab99f508a..fa8fa7a2a6 100644
--- a/tests/go.scm
+++ b/tests/go.scm
@@ -180,13 +180,9 @@ require github.com/kr/pretty v0.2.1
(define (testing-parse-mod name expected input)
(define (inf? p1 p2)
(string<? (car p1) (car p2)))
- (let ((input-port (open-input-string input)))
- (test-equal name
- (sort expected inf?)
- (sort
- ( (@@ (guix import go) parse-go.mod)
- input-port)
- inf?))))
+ (test-equal name
+ (sort expected inf?)
+ (sort ((@@ (guix import go) parse-go.mod) input) inf?)))
(testing-parse-mod "parse-go.mod-simple"
'(("good/thing" . "v1.4.5")
@@ -249,44 +245,43 @@ require github.com/kr/pretty v0.2.1
(test-equal "go-module->guix-package"
'(package
- (name "go-github-com-go-check-check")
- (version "0.0.0-20201130134442-10cb98267c6c")
- (source
- (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/go-check/check.git")
- (commit (go-version->git-ref version))))
- (file-name (git-file-name name version))
- (sha256
- (base32
- "0sjjj9z1dhilhpc8pq4154czrb79z9cm044jvn75kxcjv6v5l2m5"))))
- (build-system go-build-system)
- (arguments
- (quote (#:import-path "github.com/go-check/check")))
- (inputs
- (quasiquote (("go-github-com-kr-pretty"
- (unquote go-github-com-kr-pretty)))))
- (home-page "https://github.com/go-check/check")
- (synopsis "Instructions")
- (description #f)
- (license license:bsd-2))
+ (name "go-github-com-go-check-check")
+ (version "0.0.0-20201130134442-10cb98267c6c")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/go-check/check")
+ (commit (go-version->git-ref version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "0sjjj9z1dhilhpc8pq4154czrb79z9cm044jvn75kxcjv6v5l2m5"))))
+ (build-system go-build-system)
+ (arguments
+ '(#:import-path "github.com/go-check/check"))
+ (propagated-inputs
+ `(("go-github-com-kr-pretty" ,go-github-com-kr-pretty)))
+ (home-page "https://github.com/go-check/check")
+ (synopsis "Instructions")
+ (description "Package check is a rich testing extension for Go's testing \
+package.")
+ (license license:bsd-2))
;; Replace network resources with sample data.
(call-with-temporary-directory
(lambda (checkout)
(mock ((web client) http-get
(mock-http-get fixtures-go-check-test))
- (mock ((guix http-client) http-fetch
- (mock-http-fetch fixtures-go-check-test))
- (mock ((guix git) update-cached-checkout
- (lambda* (url #:key ref)
- ;; Return an empty directory and its hash.
- (values checkout
- (nix-base32-string->bytevector
- "0sjjj9z1dhilhpc8pq4154czrb79z9cm044jvn75kxcjv6v5l2m5")
- #f)))
- (go-module->guix-package "github.com/go-check/check")))))))
+ (mock ((guix http-client) http-fetch
+ (mock-http-fetch fixtures-go-check-test))
+ (mock ((guix git) update-cached-checkout
+ (lambda* (url #:key ref)
+ ;; Return an empty directory and its hash.
+ (values checkout
+ (nix-base32-string->bytevector
+ "0sjjj9z1dhilhpc8pq4154czrb79z9cm044jvn75kxcjv6v5l2m5")
+ #f)))
+ (go-module->guix-package "github.com/go-check/check")))))))
(test-end "go")
-
--
2.30.1
next prev parent reply other threads:[~2021-03-22 4:31 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-22 4:10 [bug#47310] [PATCH 0/4] Improve parsing of synopsis and description, add pinned versions mode Maxim Cournoyer
2021-03-22 4:20 ` [bug#47310] [PATCH 1/4] import: utils: Refactor maybe-inputs and add maybe-propagated-inputs Maxim Cournoyer
2021-03-22 4:20 ` Maxim Cournoyer [this message]
2021-03-22 4:20 ` [bug#47310] [PATCH 3/4] import: go: Add an option to use pinned versions Maxim Cournoyer
2021-04-10 3:13 ` bug#47310: " Maxim Cournoyer
2021-03-22 4:20 ` [bug#47310] [PATCH 4/4] import: go: Append version to symbol name in the pinned version mode Maxim Cournoyer
2021-04-10 20:54 ` [bug#47310] [PATCH 0/4] go importer: Better synopsis and descriptions, pinned versions mode Ludovic Courtès
2021-04-11 0:57 ` Maxim Cournoyer
[not found] ` <20210414120939.mmvprjaz6636kz4h@noop.avalenn.eu>
[not found] ` <87czusjtpc.fsf@gmail.com>
2021-04-18 10:31 ` go-build-system possible improvments François
2021-04-21 2:00 ` Maxim Cournoyer
2021-04-22 16:36 ` François
2021-04-24 3:30 ` Maxim Cournoyer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210322042022.17385-2-maxim.cournoyer@gmail.com \
--to=maxim.cournoyer@gmail.com \
--cc=47310@debbugs.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this external index
https://git.savannah.gnu.org/cgit/guix.git
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.