1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
| | ;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2015 Ricardo Wurmus <rekado@elephly.net>
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
(define-module (guix import cran)
#:use-module (ice-9 match)
#:use-module (ice-9 regex)
#:use-module (srfi srfi-1)
#:use-module (sxml simple)
#:use-module (sxml match)
#:use-module (sxml xpath)
#:use-module (guix http-client)
#:use-module (guix hash)
#:use-module (guix store)
#:use-module (guix base32)
#:use-module ((guix download) #:select (download-to-store))
#:use-module (guix import utils)
#:export (cran->guix-package))
;;; Commentary:
;;;
;;; Generate a package declaration template for the latest version of an R
;;; package on CRAN, using the HTML description downloaded from
;;; cran.r-project.org.
;;;
;;; Code:
(define string->license
(match-lambda
("AGPL-3" 'agpl3+)
("Artistic-2.0" 'artistic2.0)
("Apache License 2.0" 'asl2.0)
("BSD_2_clause" 'bsd-2)
("BSD_3_clause" 'bsd-3)
("GPL-2" 'gpl2+)
("GPL-3" 'gpl3+)
("LGPL-2" 'lgpl2.0+)
("LGPL-2.1" 'lgpl2.1+)
("LGPL-3" 'lgpl3+)
("MIT" 'x11)
((x) (string->license x))
((lst ...) `(list ,@(map string->license lst)))
(_ #f)))
(define (format-inputs names)
"Generate a sorted list of package inputs from a list of package NAMES."
(map (lambda (name)
(list name (list 'unquote (string->symbol name))))
(sort names string-ci<?)))
(define* (maybe-inputs package-inputs #:optional (type 'inputs))
"Given a list of PACKAGE-INPUTS, tries to generate the TYPE field of a
package definition."
(match package-inputs
(()
'())
((package-inputs ...)
`((,type (,'quasiquote ,(format-inputs package-inputs)))))))
(define (table-datum tree label)
"Extract the datum node following a LABEL in the sxml table TREE. Only the
first cell of a table row is considered a label cell."
((node-pos 1)
((sxpath `(xhtml:tr
(xhtml:td 1) ; only first cell can contain label
(equal? ,label)
,(node-parent tree) ; go up to label cell
,(node-parent tree) ; go up to matching row
(xhtml:td 2))) ; select second cell
tree)))
(define %cran-url "http://cran.r-project.org/web/packages/")
(define (cran-fetch name)
"Return an sxml representation of the CRAN page for the R package NAME,
or #f on failure. NAME is case-sensitive."
;; This API always returns the latest release of the module.
(let ((cran-url (string-append %cran-url name)))
(false-if-exception
(xml->sxml (http-fetch cran-url)
#:trim-whitespace? #t
#:namespaces '((xhtml . "http://www.w3.org/1999/xhtml"))
#:default-entity-handler
(lambda (port name)
(case name
((nbsp) " ")
((ge) ">=")
((gt) ">")
((lt) "<")
(else
(format (current-warning-port)
"~a:~a:~a: undefined entitity: ~a\n"
cran-url (port-line port) (port-column port)
name)
(symbol->string name))))))))
(define (cran-sxml->sexp sxml)
"Return the `package' s-expression for a CRAN package from the SXML
representation of the package page."
(define (nodes->text nodeset)
(string-join ((sxpath '(// *text*)) nodeset) " "))
(define (guix-name name)
(if (string-prefix? "r-" name)
(string-downcase name)
(string-append "r-" (string-downcase name))))
(sxml-match-let*
(((*TOP* (xhtml:html
,head
(xhtml:body
(xhtml:h2 ,name-and-synopsis)
(xhtml:p ,description)
,summary
(xhtml:h4 "Downloads:") ,downloads
. ,rest)))
sxml))
(let* ((name (match:prefix (string-match ": " name-and-synopsis)))
(synopsis (match:suffix (string-match ": " name-and-synopsis)))
(version (nodes->text (table-datum summary "Version:")))
(license ((compose string->license nodes->text)
(table-datum summary "License:")))
(home-page (nodes->text ((sxpath '((xhtml:a 1)))
(table-datum summary "URL:"))))
(source-url (string-append "mirror://cran/"
;; Remove double dots, because we want an
;; absolute path.
(regexp-substitute/global
#f "\\.\\./"
(string-join
((sxpath '((xhtml:a 1) @ href *text*))
(table-datum downloads
" Package source: ")))
'pre 'post)))
(tarball (with-store store (download-to-store store source-url)))
(sysdepends (map match:substring
(list-matches
"[^ ]+"
;; Strip off comma and parenthetical
;; expressions.
(regexp-substitute/global
#f "(,|\\([^\\)]+\\))"
(nodes->text (table-datum summary
"SystemRequirements:"))
'pre 'post))))
(imports (map guix-name
((sxpath '(// xhtml:a *text*))
(table-datum summary "Imports:")))))
`(package
(name ,(guix-name name))
(version ,version)
(source (origin
(method url-fetch)
(uri (string-append ,@(factorize-uri source-url version)))
(sha256
(base32
,(bytevector->nix-base32-string (file-sha256 tarball))))))
(build-system r-build-system)
,@(maybe-inputs sysdepends)
,@(maybe-inputs imports 'propagated-inputs)
(home-page ,(if (string-null? home-page)
(string-append %cran-url name)
home-page))
(synopsis ,synopsis)
;; Use double spacing
(description ,(regexp-substitute/global #f "\\. \\b" description
'pre ". " 'post))
(license ,license)))))
(define (cran->guix-package package-name)
"Fetch the metadata for PACKAGE-NAME from cran.r-project.org, and return the
`package' s-expression corresponding to that package, or #f on failure."
(let ((module-meta (cran-fetch package-name)))
(and=> module-meta cran-sxml->sexp)))
|