;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2015 Federico Beffa ;;; ;;; This file is part of GNU Guix. ;;; ;;; GNU Guix is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 3 of the License, or (at ;;; your option) any later version. ;;; ;;; GNU Guix is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with GNU Guix. If not, see . (define-module (guix import hackage) #:use-module (ice-9 match) #:use-module (ice-9 regex) #:use-module (ice-9 rdelim) #:use-module (ice-9 receive) #:use-module (ice-9 pretty-print) #:use-module (srfi srfi-26) #:use-module (srfi srfi-11) #:use-module (srfi srfi-1) #:use-module ((guix download) #:select (download-to-store)) #:use-module ((guix utils) #:select (package-name->name+version)) #:use-module (guix import utils) #:use-module (guix store) #:use-module (guix hash) #:use-module (guix base32) #:use-module ((guix utils) #:select (call-with-temporary-output-file)) #:export (hackage->guix-package)) ;; (use-modules (ice-9 match)) ;; (use-modules (ice-9 regex)) ;; (use-modules (ice-9 rdelim)) ;; (use-modules (ice-9 receive)) ;; (use-modules (ice-9 pretty-print)) ;; (use-modules (srfi srfi-26)) ;; (use-modules (srfi srfi-11)) ;; (use-modules (srfi srfi-1)) ;; Part 1: ;; ;; Functions used to read a Cabal file and do some pre-processing: discarding ;; comments and empty lines. (define ghc-standard-libraries ;; List of libraries distributed with ghc (7.8.4). '("haskell98" "hoopl" "base" "transformers" "deepseq" "array" "binary" "bytestring" "containers" "time" "cabal" "bin-package-db" "ghc-prim" "integer-gmp" "win32" "template-haskell" "process" "haskeline" "terminfo" "directory" "filepath" "old-locale" "unix" "old-time" "pretty" "xhtml" "hpc")) ;; Libraries present in the "Haskell Platform" 2014.2.0 and not included in ;; the GHC standard libraries: ;; ;; "zlib" ; 0.5.4.1 ;; "async" ; 2.0.1.5 ;; "stm" ; 2.4.2 ;; "mtl" ; 2.1.3.1 ;; "primitive" ; 0.5.2.1 ;; "parallel" ; 3.2.0.4 ;; "attoparsec" ; 0.10.4.0 ;; "case-insensitive" ; 1.1.0.3 ;; "syb" ; 0.4.1 ;; "containers" ; 0.5.5.1 ;; "fgl" ; 5.5.0.1 ;; "unordered-containers" ; 0.2.3.3 ;; "hashable" ; 1.2.1.0 ;; "split" ; 0.2.2 ;; "text" ; 1.1.0.0 ;; "vector" ; 0.10.9.1 ;; "GLURaw" ; 1.4.0.1 ;; "OpenGL" ; 2.9.2.0 ;; "OpenGLRaw" ; 1.5.0.0 ;; "GLUT" ; 2.5.1.1 ;; "haskell-src" ; 1.0.1.6 ;; "network" ; 2.4.2.2 ;; "HTTP" ; 4000.2.10 ;; "random" ; 1.0.1.1 ;; "HUnit" ; 1.2.5.2 ;; "QuickCheck" ; 2.6 ;; "html" ; 1.0.1.2 ;; "parsec" ; 3.1.5 ;; "regex-compat" ; 0.95.1 ;; "regex-base" ; 0.93.2 ;; "regex-posix" ; 0.95.2 (define package-name-prefix "ghc-") (define key-value-rx ;; Regular expression matching "key: value" (make-regexp "([a-zA-Z0-9-]+): *(\\w?.*)$")) (define sections-rx ;; Regular expression matching a section "head sub-head ..." (make-regexp "([a-zA-Z0-9\\(\\)-]+)")) (define comment-rx ;; Regexp matching Cabal comment lines. (make-regexp "^ *--")) (define (has-key? line) "Check if LINE includes a key." (regexp-exec key-value-rx line)) (define (comment-line? line) "Check if LINE is a comment line." (regexp-exec comment-rx line)) (define (line-indentation+rest line) "Returns two results: The number of indentation spaces and the rest of the line (without indentation)." (let loop ((line-lst (string->list line)) (count 0)) (if (or (null? line-lst) (not (eqv? (first line-lst) #\space))) (values count (list->string line-lst)) (loop (cdr line-lst) (+ count 1))))) (define (strip-cabal port) "Read a Cabal file from PORT and filter empty and comment lines. Return a list composed by the remaining lines of the file." (let loop ((line (read-line port)) (result '())) (cond ((eof-object? line) (reverse result)) ((or (string-null? line) (comment-line? line)) (loop (read-line port) result)) (else (loop (read-line port) (cons line result)))))) ;; Part 2: ;; ;; Take the result of part 1 and convert the content of the file in a list of ;; list pairs, where the first list of the pair includes keys while the second ;; is a list of values. (define (multi-line-value lines seed) "Function to read a value split across multiple lines. LINES are the remaining input lines to be read. SEED is the value read on the same line as the key. Return two values: A list with values and the remaining lines to be processed." (if (null? lines) (values '() '()) (let-values (((current-indent value) (line-indentation+rest (first lines))) ((next-line-indent next-line-value) (if (null? (cdr lines)) (values #f "") (line-indentation+rest (second lines))))) (if (or (not next-line-indent) (< next-line-indent current-indent) (regexp-exec condition-rx next-line-value)) (values (reverse (cons value seed)) (cdr lines)) (multi-line-value (cdr lines) (cons value seed)))))) (define (read-cabal lines) "Parses a Cabal file. LINES is a list with each element being a line of a Cabal file, as produced by STRIP-CABAL. Return a list of list pairs: (((head1 sub-head1 ... key1) (value)) ((head2 sub-head2 ... key2) (value2)) ...). We try do deduce the Cabal format from the following document: https://www.haskell.org/cabal/users-guide/developing-packages.html Keys are case-insensitive. We therefore lowercase them. Values are case-sensitive. Currently only indentation-structured files are parsed. Braces structured files are not handled." ;" <- make emacs happy. (let loop ((lines lines) (indents '()) ; only includes indents at start of section heads. (sections '()) (result '())) (let-values (((current-indent line) (if (null? lines) (values 0 "") (line-indentation+rest (first lines)))) ((next-line-indent next-line) (if (or (null? lines) (null? (cdr lines))) (values 0 "") (line-indentation+rest (second lines))))) (if (null? lines) (reverse result) (let ((rx-result (has-key? line))) (cond (rx-result (let ((key (string-downcase (match:substring rx-result 1))) (value (match:substring rx-result 2))) (cond ;; Simple single line "key: value". ((= next-line-indent current-indent) (loop (cdr lines) indents sections (cons (list (reverse (cons key sections)) (list value)) result))) ;; Multi line "key: value\n value cont...". ((> next-line-indent current-indent) (let*-values (((value-lst lines) (multi-line-value (cdr lines) (if (string-null? value) '() `(,value))))) ;; multi-line-value returns to the first line after the ;; multi-value. (loop lines indents sections (cons (list (reverse (cons key sections)) value-lst) result)))) ;; Section ended. (else ;; Indentation is reduced. Check by how many levels. (let* ((idx (+ (list-index (lambda (x) (= next-line-indent x)) indents) (if (has-key? next-line) 1 0))) (sec (drop sections idx)) (ind (drop indents idx))) (loop (cdr lines) ind sec (cons (list (reverse (cons key sections)) (list value)) result))))))) ;; Start of a new section. ((or (null? indents) (> current-indent (first indents))) (loop (cdr lines) (cons current-indent indents) (cons (string-downcase line) sections) result)) (else (loop (cdr lines) indents (cons (string-downcase line) (cdr sections)) result)))))))) (define condition-rx ;; Regexp for conditionals. (make-regexp "^if +(.*)$")) (define (split-section section) "Split SECTION in individual words with exception for the predicate of an 'if' conditional." (let ((rx-result (regexp-exec condition-rx section))) (if rx-result `("if" ,(match:substring rx-result 1)) (map match:substring (list-matches sections-rx section))))) (define (join-sections sec1 sec2) (fold-right cons sec2 sec1)) (define (pre-process-keys key) (match key (() '()) ((sec1 rest ...) (join-sections (split-section sec1) (pre-process-keys rest))))) (define (pre-process-entry-keys entry) (match entry ((key value) (list (pre-process-keys key) value)) (() '()))) (define (pre-process-entries-keys entries) "ENTRIES is a list of list pairs, a keys list and a valules list, as produced by 'read-cabal'. Split each element of the keys list into individual words. This pre-processing is used to read flags." (match entries ((entry rest ...) (cons (pre-process-entry-keys entry) (pre-process-entries-keys rest))) (() '()))) (define (get-flags pre-processed-entries) "PRE-PROCESSED-ENTRIES is a list of list pairs, a keys list and a values list, as produced by 'read-cabal' and pre-processed by 'pre-process-entries-keys'. Return a list of pairs with the name of flags and their default value (one of \"False\" or \"True\") as specified in the Cabal file: ((\"flag1-name\" . \"False-or-True\") ...)." ;" <- make emacs happy (match pre-processed-entries (() '()) (((("flag" flag-name "default") (flag-val)) rest ...) (cons (cons flag-name flag-val) (get-flags rest))) ((entry rest ... ) (get-flags rest)) (_ #f))) ;; Part 3: ;; ;; Functions to read information from the Cabal object created by 'read-cabal' ;; and process dependencies conditionals. (define tests-rx ;; Cabal test keywords (make-regexp "(os|arch|flag|impl)\\(([a-zA-Z0-9_-]+)\\)")) (define parens-rx ;; Parentheses within conditions (make-regexp "\\((.+)\\)")) (define or-rx ;; OR operator in conditions (make-regexp " +\\|\\| +")) (define and-rx ;; AND operator in conditions (make-regexp " +&& +")) (define not-rx ;; NOT operator in conditions (make-regexp "^!.+")) (define (bi-op-args str match-lst) "Return a list with the arguments of (logic) bianry operators. MATCH-LST is the result of 'list-match' against a binary operator regexp on STR." (let ((operators (length match-lst))) (map (lambda (from to) (substring str from to)) (cons 0 (map match:end match-lst)) (append (map match:start match-lst) (list (string-length str)))))) (define (bi-op->sexp-like bi-op args) "BI-OP is a string with the name of a Scheme operator which in a Cabal file is represented by a binary operator. ARGS are the arguments of said operator. Return a string representing an S-expression of the operator applied to its arguments." (if (= (length args) 1) (first args) (string-append "(" bi-op (fold (lambda (arg seed) (string-append seed " " arg)) "" args) ")"))) (define (not->sexp-like arg) "If the string ARG is prefixed by a Cabal negation operator, convert it to an equivalent Scheme S-expression string." (if (regexp-exec not-rx arg) (string-append "(not " (substring arg 1 (string-length arg)) ")") arg)) (define (parens-less-cond->sexp-like conditional) "Convert a Cabal CONDITIONAL string into a string with equivalent Scheme syntax. This procedure accepts only simple conditionals without parentheses." ;; The outher operation is the one with the lowest priority: OR (bi-op->sexp-like "or" ;; each OR argument may be an AND operation (map (lambda (or-arg) (let ((m-lst (list-matches and-rx or-arg))) ;; is there an AND operation? (if (> (length m-lst) 0) (bi-op->sexp-like "and" ;; expand NOT operators when there are ANDs (map not->sexp-like (bi-op-args or-arg m-lst))) ;; ... and when there aren't. (not->sexp-like or-arg)))) ;; list of OR arguments (bi-op-args conditional (list-matches or-rx conditional))))) (define test-keyword-ornament "§§") (define (conditional->sexp-like conditional) "Convert a Cabal CONDITIONAL string into a string with equivalent Scheme syntax." ;; First we substitute TEST-KEYWORD-ORNAMENT for parentheses around tests ;; keywords so that parentheses are only used to set precedences. This ;; substantially simplify parsing. (let ((conditional (regexp-substitute/global #f tests-rx conditional 'pre 1 test-keyword-ornament 2 test-keyword-ornament 'post))) (let loop ((sub-cond conditional)) (let ((rx-result (regexp-exec parens-rx sub-cond))) (cond (rx-result (parens-less-cond->sexp-like (string-append (match:prefix rx-result) (loop (match:substring rx-result 1)) (match:suffix rx-result)))) (else (parens-less-cond->sexp-like sub-cond))))))) (define (eval-flags sexp-like-cond flags) "SEXP-LIKE-COND is a string representing an S-expression conditional. FLAGS is a list of flag name and value pairs as produced by 'get-flags'. Substitute \"#t\" or \"#f\" according to the value of flags. (Default to \"True\")." (fold-right (lambda (flag sexp) (match flag ((name . value) (let ((rx (make-regexp (string-append "flag" test-keyword-ornament name test-keyword-ornament)))) (regexp-substitute/global #f rx sexp 'pre (if (string-ci= value "False") "#f" "#t") 'post))) (_ sexp))) sexp-like-cond (cons '("[a-zA-Z0-9_-]+" . "True") flags))) (define (eval-tests sexp-like-cond) "In the string SEXP-LIKE-COND substitute test keywords \"os(...)\" and \"arch(...)\" with strings representing Scheme functions performing equivalent checks." (with-input-from-string (fold-right (lambda (test sexp) (match test ((type pre-match post-match) (let ((rx (make-regexp (string-append type test-keyword-ornament "(\\w+)" test-keyword-ornament)))) (regexp-substitute/global #f rx sexp 'pre pre-match 2 post-match 'post))) (_ sexp))) sexp-like-cond ;; (%current-system) returns, e.g., "x86_64-linux" or "i686-linux". '(("(os|arch)" "(string-match \"" "\" (%current-system))"))) read)) (define (eval-impl sexp-like-cond) "Check for the Cabal test \"impl(...)\" in the string SEXP-LIKE-COND. Assume the module declaring the generated package includes a local variable called \"haskell-implementation\" with a string value of the form NAME-VERSION against which we compare." (with-output-to-string (lambda () (write (with-input-from-string (fold-right (lambda (test sexp) (match test ((pre-match post-match) (let ((rx-with-version (make-regexp (string-append "impl" test-keyword-ornament "([a-zA-Z0-9_-]+) *([<>=]+) *([0-9.]+) *" test-keyword-ornament))) (rx-without-version (make-regexp (string-append "impl" test-keyword-ornament "(\\w+)" test-keyword-ornament)))) (if (regexp-exec rx-with-version sexp) (regexp-substitute/global #f rx-with-version sexp 'pre pre-match 2 " \"" 1 "-" 3 "\" " post-match 'post) (regexp-substitute/global #f rx-without-version sexp 'pre pre-match "-match \"" 1 "\" " post-match 'post)))) (_ sexp))) sexp-like-cond '(("(string" "haskell-implementation)"))) read))))) (define (key->values meta key) "META is the representation of a Cabal file as produced by 'read-cabal'. Return the list of values associated with a specific KEY (a string)." (match meta (() '()) (((((? (lambda(x) (equal? x key)))) v) r ...) v) (((k v) r ...) (key->values (cdr meta) key)) (_ "key Not fount"))) (define (key-start-end->entries meta key-start key-end) "META is the representation of a Cabal file as produced by 'read-cabal'. Return all entries whose keys list starts with KEY-START and ends with KEY-END." (let ((pred (lambda (x) (equal? (list key-start key-end) (list (first x) (last x)))))) (match meta (() '()) ((((? pred k) v) r ...) (cons `(,k ,v) (key-start-end->entries (cdr meta) key-start key-end))) (((k v) r ...) (key-start-end->entries (cdr meta) key-start key-end)) (_ "key Not fount")))) (define else-rx (make-regexp "^else$")) (define (count-if-else rx-result-ls) (apply + (map (lambda (m) (if m 1 0)) rx-result-ls))) (define (analyze-entry-cond entry) (let* ((keys (first entry)) (vals (second entry)) (rx-cond-result (map (cut regexp-exec condition-rx <>) keys)) (rx-else-result (map (cut regexp-exec else-rx <>) keys)) (cond-no (count-if-else rx-cond-result)) (else-no (count-if-else rx-else-result)) (cond-idx (list-index (lambda (rx) (if rx #t #f)) rx-cond-result)) (else-idx (list-index (lambda (rx) (if rx #t #f)) rx-else-result)) (key-cond (cond ((or (and cond-idx else-idx (< cond-idx else-idx)) (and cond-idx (not else-idx))) (match:substring (receive (head tail) (split-at rx-cond-result cond-idx) (first tail)))) ((or (and cond-idx else-idx (> cond-idx else-idx)) (and (not cond-idx) else-idx)) (match:substring (receive (head tail) (split-at rx-else-result else-idx) (first tail)))) (else "")))) (values keys vals rx-cond-result rx-else-result cond-no else-no key-cond))) (define (remove-cond entry cond) (match entry ((k v) (list (cdr (member cond k)) v)))) (define (group-and-reduce-level entries group group-cond) (let loop ((true-group group) (false-group '()) (entries entries)) (if (null? entries) (values (reverse true-group) (reverse false-group) entries) (let*-values (((entry) (first entries)) ((keys vals rx-cond-result rx-else-result cond-no else-no key-cond) (analyze-entry-cond entry))) (cond ((and (>= (+ cond-no else-no) 1) (string= group-cond key-cond)) (loop (cons (remove-cond entry group-cond) true-group) false-group (cdr entries))) ((and (>= (+ cond-no else-no) 1) (string= key-cond "else")) (loop true-group (cons (remove-cond entry "else") false-group) (cdr entries))) (else (values (reverse true-group) (reverse false-group) entries))))))) (define dependencies-rx (make-regexp "([a-zA-Z0-9_-]+) *[^,]*,?")) (define (hackage-name->package-name name) (if (string-prefix? package-name-prefix name) (string-downcase name) (string-append package-name-prefix (string-downcase name)))) (define (split-dependencies ls) "Split the comma separated list of dependencies LS coming from the Cabal file and return a list with inputs suitable for the Guix package. Currently the version information is discarded." (define (split-at-comma d) (map (lambda (m) (let ((name (hackage-name->package-name (match:substring m 1)))) (list name (list 'unquote (string->symbol name))))) (list-matches dependencies-rx d))) (fold (lambda (d p) (append (split-at-comma d) p)) '() ls)) (define (dependencies-cond->sexp meta) "META is the representation of a Cabal file as produced by 'read-cabal'. Return an S-expression containing the list of dependencies as expected by the 'inputs' field of a package. The generated S-expressions may include conditionals as defined in the cabal file. During this process we discard the version information of the packages." (define (take-dependencies meta) (let ((key-start-exe "executable cabal") (key-start-lib "library") (key-end "build-depends")) (append (key-start-end->entries meta key-start-exe key-end) (key-start-end->entries meta key-start-lib key-end)))) (let ((flags (get-flags (pre-process-entries-keys meta)))) (delete-duplicates (let loop ((entries (take-dependencies meta)) (result '())) (if (null? entries) result (let*-values (((entry) (first entries)) ((keys vals rx-cond-result rx-else-result cond-no else-no key-cond) (analyze-entry-cond entry))) (cond ((= (+ cond-no else-no) 0) (loop (cdr entries) (append (split-dependencies vals) result))) (else (let-values (((true-group false-group entries) (group-and-reduce-level entries '() key-cond)) ((cond-final) (eval-tests (eval-impl (eval-flags (conditional->sexp-like (last (split-section key-cond))) flags))))) (loop entries (cond ((or (eq? cond-final #t) (equal? cond-final '(not #f))) (append (loop true-group '()) result)) ((or (eq? cond-final #f) (equal? cond-final '(not #t))) (append (loop false-group '()) result)) (else (cons `(unquote-splicing (if ,cond-final ,(loop true-group '()) ,(loop false-group '()))) result))))))))))))) (define (standard-library? name) (member name ghc-standard-libraries)) (define (filter-standard-libraries ls) "Filter from list of inputs LS the libraries already included with the Haskell compiler. Currently we imply the use of GHC." (let ((real-name-rx (make-regexp (string-append package-name-prefix "(\\w+)")))) (fold (lambda (elem seed) (if (eq? (first elem) 'unquote-splicing) (let*-values (((group-cond true-group false-group) (match (second elem) ((if c t f ) (values c t f)) (_ (values c '() '())))) ((filtered-true-group) (filter-standard-libraries true-group)) ((filtered-false-group) (filter-standard-libraries false-group))) (cond ((and (null? filtered-false-group) (null? filtered-true-group)) seed) ((null? filtered-false-group) (cons (list 'unquote-splicing (list 'if group-cond filtered-true-group)) seed)) (else (cons (list 'unquote-splicing (list 'if group-cond filtered-true-group filtered-false-group)) seed)))) (let ((rx-result (regexp-exec real-name-rx (first elem)))) (match (match:substring rx-result 1) ((? standard-library?) seed) (_ (cons elem seed)))))) '() ls))) ;; Part 4: ;; ;; Retrive the desired package and its Cabal file from ;; http://hackage.haskell.org and construct the Guix package S-expression. (define (hackage-fetch name-version) "Return the Cabal file for the package NAME-VERSION, or #f on failure. If the version part is omitted from the package name, then return the latest version." (let*-values (((name version) (package-name->name+version name-version)) ((url) (if version (string-append "http://hackage.haskell.org/package/" name "-" version "/" name ".cabal") (string-append "http://hackage.haskell.org/package/" name "/" name ".cabal"))) ((cabal) (call-with-temporary-output-file (lambda (temp port) (and (url-fetch url temp) (call-with-input-file temp strip-cabal)))))) (and=> cabal read-cabal))) (define string->license ;; List of valid values from ;; https://www.haskell.org ;; /cabal/release/cabal-latest/doc/API/Cabal/Distribution-License.html. (match-lambda ("GPL-2" 'gpl2) ("GPL-3" 'gpl3) ("GPL" "'gpl??") ("AGPL-3" 'agpl3) ("AGPL" "'agpl??") ("LGPL-2.1" 'lgpl2.1) ("LGPL-3" 'lgpl3) ("LGPL" "'lgpl??") ("BSD2" 'bsd-2) ("BSD3" 'bsd-3) ("MIT" 'expat) ("ISC" 'isc) ("MPL" 'mpl2.0) ("Apache-2.0" 'asl2.0) ((x) (string->license x)) ((lst ...) `(list ,@(map string->license lst))) (_ #f))) (define (hackage-module->sexp meta) "Return the `package' S-expression for a Cabal package. META is the representation of a Cabal file as produced by 'read-cabal'." (define name (first (key->values meta "name"))) (define version (first (key->values meta "version"))) (define description (let*-values (((description) (key->values meta "description")) ((lines last) (split-at description (- (length description) 1)))) (fold-right (lambda (line seed) (string-append line "\n" seed)) (first last) lines))) (define source-url (string-append "http://hackage.haskell.org/package/" name "/" name "-" version ".tar.gz")) (define (maybe-inputs hackage-name->package-name inputs) (match (filter-standard-libraries inputs) (() '()) ((inputs ...) (list (list hackage-name->package-name (list 'quasiquote inputs)))))) (let ((tarball (with-store store (download-to-store store source-url)))) `(package (name ,(hackage-name->package-name name)) (version ,version) (source (origin (method url-fetch) (uri (string-append ,@(factorize-uri source-url version))) (sha256 (base32 ,(if tarball (bytevector->nix-base32-string (file-sha256 tarball)) "failed to download tar archive"))))) (build-system haskell-build-system) ,@(maybe-inputs 'inputs (dependencies-cond->sexp meta)) (home-page ,@(key->values meta "homepage")) (synopsis ,@(key->values meta "synopsis")) (description ,description) (license ,(string->license (key->values meta "license")))))) (define (hackage->guix-package module-name) "Fetch the Cabal file for PACKAGE-NAME from hackage.haskell.org, and return the `package' S-expression corresponding to that package, or #f on failure." (let ((module-meta (hackage-fetch module-name))) (and=> module-meta hackage-module->sexp))) ;;; cabal.scm ends here