;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2014 David Thompson ;;; Copyright © 2015 Cyril Roelandt ;;; Copyright © 2015, 2016, 2017 Ludovic Courtès ;;; Copyright © 2017 Mathieu Othacehe ;;; Copyright © 2018 Ricardo Wurmus ;;; ;;; This file is part of GNU Guix. ;;; ;;; GNU Guix is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 3 of the License, or (at ;;; your option) any later version. ;;; ;;; GNU Guix is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with GNU Guix. If not, see . (define-module (guix import pypi) #:use-module (ice-9 match) #:use-module (ice-9 regex) #:use-module (ice-9 receive) #:use-module ((ice-9 rdelim) #:select (read-line)) #:use-module (srfi srfi-1) #:use-module (srfi srfi-26) #:use-module (srfi srfi-34) #:use-module (srfi srfi-35) #:use-module (guix ui) #:use-module (guix utils) #:use-module ((guix build utils) #:select ((package-name->name+version . hyphen-package-name->name+version) find-files)) #:use-module (guix import utils) #:use-module ((guix download) #:prefix download:) #:use-module (guix import json) #:use-module (guix packages) #:use-module (guix upstream) #:use-module ((guix licenses) #:prefix license:) #:use-module (guix build-system python) #:export (parse-requires.txt parse-wheel-metadata specification->requirement-name guix-package->pypi-name pypi-recursive-import pypi->guix-package %pypi-updater)) (define (pypi-fetch name) "Return an alist representation of the PyPI metadata for the package NAME, or #f on failure." (json-fetch-alist (string-append "https://pypi.org/pypi/" name "/json"))) ;; For packages found on PyPI that lack a source distribution. (define-condition-type &missing-source-error &error missing-source-error? (package missing-source-error-package)) (define (latest-source-release pypi-package) "Return the latest source release for PYPI-PACKAGE." (let ((releases (assoc-ref* pypi-package "releases" (assoc-ref* pypi-package "info" "version")))) (or (find (lambda (release) (string=? "sdist" (assoc-ref release "packagetype"))) releases) (raise (condition (&missing-source-error (package pypi-package))))))) (define (latest-wheel-release pypi-package) "Return the url of the wheel for the latest release of pypi-package, or #f if there isn't any." (let ((releases (assoc-ref* pypi-package "releases" (assoc-ref* pypi-package "info" "version")))) (or (find (lambda (release) (string=? "bdist_wheel" (assoc-ref release "packagetype"))) releases) #f))) (define (python->package-name name) "Given the NAME of a package on PyPI, return a Guix-compliant name for the package." (if (string-prefix? "python-" name) (snake-case name) (string-append "python-" (snake-case name)))) (define (guix-package->pypi-name package) "Given a Python PACKAGE built from pypi.org, return the name of the package on PyPI." (define (url->pypi-name url) (hyphen-package-name->name+version (basename (file-sans-extension url)))) (match (and=> (package-source package) origin-uri) ((? string? url) (url->pypi-name url)) ((lst ...) (any url->pypi-name lst)) (#f #f))) (define (wheel-url->extracted-directory wheel-url) (match (string-split (basename wheel-url) #\-) ((name version _ ...) (string-append name "-" version ".dist-info")))) (define (maybe-inputs package-inputs) "Given a list of PACKAGE-INPUTS, tries to generate the 'inputs' field of a package definition." (match package-inputs (() '()) ((package-inputs ...) `((propagated-inputs (,'quasiquote ,package-inputs)))))) (define %requirement-name-regexp ;; Regexp to match the requirement name in a requirement specification. ;; Some grammar, taken from PEP-0508 (see: ;; https://www.python.org/dev/peps/pep-0508/). ;; The unified rule can be expressed as: ;; specification = wsp* ( url_req | name_req ) wsp* ;; where url_req is: ;; url_req = name wsp* extras? wsp* urlspec wsp+ quoted_marker? ;; and where name_req is: ;; name_req = name wsp* extras? wsp* versionspec? wsp* quoted_marker? ;; Thus, we need only matching NAME, which is expressed as: ;; identifer_end = letterOrDigit | (('-' | '_' | '.' )* letterOrDigit) ;; identifier = letterOrDigit identifier_end* ;; name = identifier (let* ((letter-or-digit "[A-Za-z0-9]") (identifier-end (string-append "(" letter-or-digit "|" "[-_.]*" letter-or-digit ")")) (identifier (string-append "^" letter-or-digit identifier-end "*")) (name identifier)) (make-regexp name))) (define (specification->requirement-name spec) "Given a specification SPEC, return the requirement name." (match:substring (or (regexp-exec %requirement-name-regexp spec) (error (G_ "Could not extract requirement name in spec:") spec)))) (define (parse-requires.txt requires.txt) "Given REQUIRES.TXT, a Setuptools requires.txt file, return a list of requirement names." ;; This is a very incomplete parser, which job is to select the non-optional ;; dependencies and strip them out of any version information. ;; Alternatively, we could implement a PEG parser with the (ice-9 peg) ;; library and the requirements grammar defined by PEP-0508 ;; (https://www.python.org/dev/peps/pep-0508/). (define (comment? line) ;; Return #t if the given LINE is a comment, #f otherwise. (eq? (string-ref (string-trim line) 0) #\#)) (define (section-header? line) ;; Return #t if the given LINE is a section header, #f otherwise. (let ((trimmed-line (string-trim line))) (and (not (string-null? trimmed-line)) (eq? (string-ref trimmed-line 0) #\[)))) (call-with-input-file requires.txt (lambda (port) (let loop ((result '())) (let ((line (read-line port))) ;; Stop when a section is encountered, as sections contains optional ;; (extra) requirements. Non-optional requirements must appear ;; before any section is defined. (if (or (eof-object? line) (section-header? line)) ;; Duplicates can occur, since the same requirement can be ;; listed multiple times with different conditional markers, e.g. ;; pytest >= 3 ; python_version >= "3.3" ;; pytest < 3 ; python_version < "3.3" (reverse (delete-duplicates result)) (cond ((or (string-null? line) (comment? line)) (loop result)) (else (loop (cons (specification->requirement-name line) result)))))))))) (define (parse-wheel-metadata metadata) "Given METADATA, a Wheel metadata file, return a list of requirement names." ;; METADATA is a RFC-2822-like, header based file. (define (requires-dist-header? line) ;; Return #t if the given LINE is a Requires-Dist header. (regexp-match? (string-match "^Requires-Dist: " line))) (define (requires-dist-value line) (string-drop line (string-length "Requires-Dist: "))) (define (extra? line) ;; Return #t if the given LINE is an "extra" requirement. (regexp-match? (string-match "extra == " line))) (call-with-input-file metadata (lambda (port) (let loop ((requirements '())) (let ((line (read-line port))) ;; Stop at the first 'Provides-Extra' section: the non-optional ;; requirements appear before the optional ones. (if (eof-object? line) (reverse (delete-duplicates requirements)) (cond ((and (requires-dist-header? line) (not (extra? line))) (loop (cons (specification->requirement-name (requires-dist-value line)) requirements))) (else (loop requirements))))))))) (define (guess-requirements source-url wheel-url archive) "Given SOURCE-URL, WHEEL-URL and a ARCHIVE of the package, return a list of the required packages specified in the requirements.txt file. ARCHIVE will be extracted in a temporary directory." (define (archive-root-directory url) ;; Given the URL of the package's archive, return the name of the directory ;; that will be created upon decompressing it. If the filetype is not ;; supported, return #f. (if (compressed-file? url) (let ((root-directory (file-sans-extension (basename url)))) (if (string=? "tar" (file-extension root-directory)) (file-sans-extension root-directory) root-directory)) (begin (warning (G_ "Unsupported archive format (~a): \ cannot determine package dependencies") (file-extension url)) #f))) (define (read-wheel-metadata wheel-archive) ;; Given WHEEL-ARCHIVE, a ZIP Python wheel archive, return the package's ;; requirements, or #f if the metadata file contained therein couldn't be ;; extracted. (let* ((dirname (wheel-url->extracted-directory wheel-url)) (metadata (string-append dirname "/METADATA"))) (call-with-temporary-directory (lambda (dir) (if (zero? (system* "unzip" "-q" wheel-archive "-d" dir metadata)) (parse-wheel-metadata (string-append dir "/" metadata)) (begin (warning (G_ "Failed to extract file: ~a from wheel.~%") metadata) #f)))))) (define (guess-requirements-from-wheel) ;; Return the package's requirements using the wheel, or #f if an error ;; occurs. (call-with-temporary-output-file (lambda (temp port) (if wheel-url (and (url-fetch wheel-url temp) (read-wheel-metadata temp)) #f)))) (define (guess-requirements-from-source) ;; Return the package's requirements by guessing them from the source. (let ((dirname (archive-root-directory source-url)) (extension (file-extension source-url))) (if (string? dirname) (call-with-temporary-directory (lambda (dir) (let* ((pypi-name (string-take dirname (string-rindex dirname #\-))) (requires.txt (string-append dirname "/" pypi-name ".egg-info" "/requires.txt")) (exit-code (parameterize ((current-error-port (%make-void-port "rw+")) (current-output-port (%make-void-port "rw+"))) (if (string=? "zip" extension) (system* "unzip" archive "-d" dir requires.txt) (system* "tar" "xf" archive "-C" dir requires.txt))))) (if (zero? exit-code) (parse-requires.txt (string-append dir "/" requires.txt)) (begin (warning (G_ "Failed to extract file: ~a from source.~%") requires.txt) '()))))) '()))) ;; First, try to compute the requirements using the wheel, else, fallback to ;; reading the "requires.txt" from the egg-info directory from the source ;; tarball. (or (guess-requirements-from-wheel) (guess-requirements-from-source))) (define (compute-inputs source-url wheel-url archive) "Given the SOURCE-URL of an already downloaded ARCHIVE, return a list of name/variable pairs describing the required inputs of this package. Also return the unaltered list of upstream dependency names." (let ((dependencies (remove (cut string=? "argparse" <>) (guess-requirements source-url wheel-url archive)))) (values (sort (map (lambda (input) (let ((guix-name (python->package-name input))) (list guix-name (list 'unquote (string->symbol guix-name))))) dependencies) (lambda args (match args (((a _ ...) (b _ ...)) (string-cipackage-name name)) (version ,version) (source (origin (method url-fetch) ;; Sometimes 'pypi-uri' doesn't quite work due to mixed ;; cases in NAME, for instance, as is the case with ;; "uwsgi". In that case, fall back to a full URL. (uri (pypi-uri ,(string-downcase name) version)) (sha256 (base32 ,(guix-hash-url temp))))) (build-system python-build-system) ,@(maybe-inputs input-package-names) (home-page ,home-page) (synopsis ,synopsis) (description ,description) (license ,(license->symbol license))) upstream-dependency-names)))))) (define pypi->guix-package (memoize (lambda* (package-name) "Fetch the metadata for PACKAGE-NAME from pypi.org, and return the `package' s-expression corresponding to that package, or #f on failure." (let ((package (pypi-fetch package-name))) (and package (guard (c ((missing-source-error? c) (let ((package (missing-source-error-package c))) (leave (G_ "no source release for pypi package ~a ~a~%") (assoc-ref* package "info" "name") (assoc-ref* package "info" "version"))))) (let ((name (assoc-ref* package "info" "name")) (version (assoc-ref* package "info" "version")) (release (assoc-ref (latest-source-release package) "url")) (wheel (assoc-ref (latest-wheel-release package) "url")) (synopsis (assoc-ref* package "info" "summary")) (description (assoc-ref* package "info" "summary")) (home-page (assoc-ref* package "info" "home_page")) (license (string->license (assoc-ref* package "info" "license")))) (make-pypi-sexp name version release wheel home-page synopsis description license)))))))) (define (pypi-recursive-import package-name) (recursive-import package-name #f #:repo->guix-package (lambda (name repo) (pypi->guix-package name)) #:guix-name python->package-name)) (define (string->license str) "Convert the string STR into a license object." (match str ("GNU LGPL" license:lgpl2.0) ("GPL" license:gpl3) ((or "BSD" "BSD License") license:bsd-3) ((or "MIT" "MIT license" "Expat license") license:expat) ("Public domain" license:public-domain) ((or "Apache License, Version 2.0" "Apache 2.0") license:asl2.0) (_ #f))) (define (pypi-package? package) "Return true if PACKAGE is a Python package from PyPI." (define (pypi-url? url) (or (string-prefix? "https://pypi.org/" url) (string-prefix? "https://pypi.python.org/" url) (string-prefix? "https://pypi.org/packages" url))) (let ((source-url (and=> (package-source package) origin-uri)) (fetch-method (and=> (package-source package) origin-method))) (and (eq? fetch-method download:url-fetch) (match source-url ((? string?) (pypi-url? source-url)) ((source-url ...) (any pypi-url? source-url)))))) (define (latest-release package) "Return an for the latest release of PACKAGE." (let* ((pypi-name (guix-package->pypi-name package)) (pypi-package (pypi-fetch pypi-name))) (and pypi-package (guard (c ((missing-source-error? c) #f)) (let* ((metadata pypi-package) (version (assoc-ref* metadata "info" "version")) (url (assoc-ref (latest-source-release metadata) "url"))) (upstream-source (package (package-name package)) (version version) (urls (list url)))))))) (define %pypi-updater (upstream-updater (name 'pypi) (description "Updater for PyPI packages") (pred pypi-package?) (latest latest-release)))