unofficial mirror of guix-devel@gnu.org 
 help / color / mirror / code / Atom feed
* [PATCH 1/4] gnu: Add perl-time-piece.
@ 2016-12-03 10:38 Ben Woodcroft
  2016-12-03 10:38 ` [PATCH 2/4] gnu: Add ncbi-tools Ben Woodcroft
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Ben Woodcroft @ 2016-12-03 10:38 UTC (permalink / raw)
  To: guix-devel

* gnu/packages/perl.scm (perl-time-piece): New variable.
---
 gnu/packages/perl.scm | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/gnu/packages/perl.scm b/gnu/packages/perl.scm
index 6b4370d..55982ab 100644
--- a/gnu/packages/perl.scm
+++ b/gnu/packages/perl.scm
@@ -7162,6 +7162,28 @@ time values and formatting dates into ASCII strings.")
 and time() calls.")
     (license (package-license perl))))
 
+(define-public perl-time-piece
+  (package
+    (name "perl-time-piece")
+    (version "1.31")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (string-append "mirror://cpan/authors/id/E/ES/ESAYM/Time-Piece-"
+                           version ".tar.gz"))
+       (sha256
+        (base32
+         "1fb7s5y9f3j80h2dfsgplmdcrhp96ccqs0qqabmckkkgvhj40205"))))
+    (build-system perl-build-system)
+    (home-page "http://search.cpan.org/dist/Time-Piece")
+    (synopsis "Object-oriented time objects")
+    (description "This module replaces the standard @code{localtime} and
+@code{gmtime} Perl functions with implementations that return objects.  It does
+so in a backwards compatible manner, so that using
+@code{localtime}/@code{gmtime} in the way documented in perlfunc will still
+return what you expect.")
+    (license (package-license perl))))
+
 (define-public perl-tree-simple
   (package
     (name "perl-tree-simple")
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/4] gnu: Add ncbi-tools.
  2016-12-03 10:38 [PATCH 1/4] gnu: Add perl-time-piece Ben Woodcroft
@ 2016-12-03 10:38 ` Ben Woodcroft
  2016-12-03 19:19   ` Ricardo Wurmus
  2016-12-03 19:30   ` Marius Bakke
  2016-12-03 10:39 ` [PATCH 3/4] gnu: Add barrnap Ben Woodcroft
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 11+ messages in thread
From: Ben Woodcroft @ 2016-12-03 10:38 UTC (permalink / raw)
  To: guix-devel

* gnu/packages/bioinformatics.scm (ncbi-tools): New variable.
---
 gnu/packages/bioinformatics.scm | 62 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 415024f..961f0b2 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -80,6 +80,7 @@
   #:use-module (gnu packages readline)
   #:use-module (gnu packages ruby)
   #:use-module (gnu packages serialization)
+  #:use-module (gnu packages shells)
   #:use-module (gnu packages statistics)
   #:use-module (gnu packages tbb)
   #:use-module (gnu packages tex)
@@ -4340,6 +4341,67 @@ simultaneously.")
        ("ngs-sdk" ,ngs-sdk)))
     (synopsis "Java bindings for NGS SDK")))
 
+(define-public ncbi-tools
+  (package
+    (name "ncbi-tools")
+    (version "20160908")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (string-append "ftp://ftp.ncbi.nih.gov/toolbox/ncbi_tools/old/"
+                           version "/ncbi.tar.gz"))
+       (file-name (string-append name "-" version ".tar.gz"))
+       (sha256
+        (base32
+         "1252s4fw41w5yalz9b50pvzvkiyjfcgy0isw1qgmg0v66bp49khz"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:tests? #f ; There are no tests.
+       #:phases
+       (modify-phases %standard-phases
+         (delete 'configure)
+         (add-after 'unpack 'patch-sources
+            (lambda _
+              (for-each (lambda (file)
+                          (substitute* file
+                            (("NCBI_MAKE_SHELL = .*")
+                             (string-append
+                              "NCBI_MAKE_SHELL = "
+                              (which "sh")
+                              "\n"))))
+                        (find-files "platform" ".*mk"))
+              (substitute* "make/ln-if-absent"
+                (("set path=\\(/usr/bin /bin\\)") ""))
+              #t))
+         (replace 'build
+            (lambda _
+              (chdir "..")
+              (zero? (system* "ncbi/make/makedis.csh"))))
+         (replace 'install
+           (lambda* (#:key outputs #:allow-other-keys)
+             (let* ((out (assoc-ref outputs "out"))
+                    (bin (string-append out "/bin"))
+                    (man (string-append out "/share/man/man1")))
+               (for-each (lambda (file)
+                           (install-file
+                            (string-append "ncbi/build/" file) bin)
+                           (install-file
+                            (string-append "ncbi/doc/man/" file ".1") man))
+                         ;; XXX: TODO: Install and test other binaries.
+                         (list "tbl2asn"))
+               #t))))))
+    (native-inputs
+     `(("csh" ,tcsh)
+       ("pkg-config" ,pkg-config)
+       ("coreutils" ,coreutils)))
+    (home-page "https://www.ncbi.nlm.nih.gov/IEB/ToolBox/MainPage/index.html")
+    (synopsis "NCBI-related tools")
+    (description "The United States of America @dfn{National Center for
+Biotechnology Information} (NCBI) Software Development Toolkit is for the
+production and distribution of GenBank, Entrez, BLAST and related NCBI
+services.")
+    (license license:public-domain)))
+
 (define-public ncbi-vdb
   (package
     (name "ncbi-vdb")
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/4] gnu: Add barrnap.
  2016-12-03 10:38 [PATCH 1/4] gnu: Add perl-time-piece Ben Woodcroft
  2016-12-03 10:38 ` [PATCH 2/4] gnu: Add ncbi-tools Ben Woodcroft
@ 2016-12-03 10:39 ` Ben Woodcroft
  2016-12-03 19:20   ` Ricardo Wurmus
  2016-12-03 19:36   ` Marius Bakke
  2016-12-03 10:39 ` [PATCH 4/4] gnu: Add prokka Ben Woodcroft
  2016-12-03 18:55 ` [PATCH 1/4] gnu: Add perl-time-piece Marius Bakke
  3 siblings, 2 replies; 11+ messages in thread
From: Ben Woodcroft @ 2016-12-03 10:39 UTC (permalink / raw)
  To: guix-devel

* gnu/packages/bioinformatics.scm (barrnap): New variable.
---
 gnu/packages/bioinformatics.scm | 67 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 961f0b2..4541791 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -268,6 +268,73 @@ instance, it implements several methods to assess contig-wise read coverage.")
 BAM files.")
     (license license:expat)))
 
+(define-public barrnap
+  (package
+   (name "barrnap")
+   (version "0.7")
+   (source
+    (origin
+      (method url-fetch)
+      (uri (string-append
+            "https://github.com/tseemann/barrnap/archive/"
+            version ".tar.gz"))
+      (file-name (string-append name "-" version ".tar.gz"))
+      (sha256
+       (base32
+        "16y040np76my3y82hgk4yy790smbsk4h8d60d5swlv7ha3i768gg"))
+      (modules '((guix build utils)))
+      ;; Remove pre-built binaries.
+      (snippet '(begin
+                  (delete-file-recursively "binaries")
+                  #t))))
+   (build-system gnu-build-system)
+   (arguments
+    `(#:test-target "test"
+      #:phases
+      (modify-phases %standard-phases
+        (add-after 'unpack 'patch-nhmer-path
+          (lambda* (#:key inputs #:allow-other-keys)
+            (substitute* "bin/barrnap"
+              (("^my \\$NHMMER = .*")
+               (string-append "my $NHMMER = '"
+                              (assoc-ref inputs "hmmer")
+                              "/bin/nhmmer';\n")))
+            #t))
+        (delete 'configure)
+        (delete 'build)
+        (replace 'install
+          (lambda* (#:key outputs #:allow-other-keys)
+            (let* ((out  (assoc-ref outputs "out"))
+                   (bin  (string-append out "/bin"))
+                                        ;(path (getenv "PATH"))
+                   (share (string-append out "/share/barrnap/db"))
+                   (binary "bin/barrnap"))
+              (substitute* binary
+                (("^my \\$DBDIR = .*")
+                 (string-append "my $DBDIR = '" share "';\n")))
+              (install-file binary bin)
+              (mkdir-p share)
+              (copy-recursively "db" share))
+            #t)))))
+   (inputs
+    `(("perl" ,perl)
+      ("hmmer" ,hmmer)))
+   (home-page "https://github.com/tseemann/barrnap")
+   (synopsis "Ribosomal RNA predictor")
+   (description
+    "Barrnap predicts the location of ribosomal RNA genes in genomes.  It
+supports bacteria (5S, 23S, 16S), archaea (5S,5.8S,23S,16S), mitochondria (12S,
+16S) and eukaryotes (5S, 5.8S, 28S, 18S).  It takes FASTA DNA sequence as input,
+and write GFF3 as output.  It uses the NHMMER tool that comes with HMMER for
+HMM searching in RNA:DNA style.")
+   (license (list license:gpl3
+                  ;; The Rfam HMMs are under cc0, and the SILVA-derived HMMs are
+                  ;; academic-only.
+                  license:cc0
+                  (license:non-copyleft
+                   "file:///LICENSE.SILVA"
+                   "See LICENSE.SILVA in the distribution.")))))
+
 (define-public bcftools
   (package
     (name "bcftools")
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/4] gnu: Add prokka.
  2016-12-03 10:38 [PATCH 1/4] gnu: Add perl-time-piece Ben Woodcroft
  2016-12-03 10:38 ` [PATCH 2/4] gnu: Add ncbi-tools Ben Woodcroft
  2016-12-03 10:39 ` [PATCH 3/4] gnu: Add barrnap Ben Woodcroft
@ 2016-12-03 10:39 ` Ben Woodcroft
  2016-12-03 19:47   ` Marius Bakke
  2016-12-03 18:55 ` [PATCH 1/4] gnu: Add perl-time-piece Marius Bakke
  3 siblings, 1 reply; 11+ messages in thread
From: Ben Woodcroft @ 2016-12-03 10:39 UTC (permalink / raw)
  To: guix-devel

* gnu/packages/bioinformatics.scm (prokka): New variable.
---
 gnu/packages/bioinformatics.scm | 110 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 4541791..c4f421c 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -62,6 +62,7 @@
   #:use-module (gnu packages image)
   #:use-module (gnu packages imagemagick)
   #:use-module (gnu packages java)
+  #:use-module (gnu packages less)
   #:use-module (gnu packages linux)
   #:use-module (gnu packages logging)
   #:use-module (gnu packages machine-learning)
@@ -3743,6 +3744,115 @@ for sequences to be aligned and then, simultaneously with the alignment,
 predicts the locations of structural units in the sequences.")
     (license license:gpl2+)))
 
+(define-public prokka
+  ;; There has been many commits since the last released version 1.11 so we
+  ;; package from git.
+  (let ((commit "1caf2394850998f89a3782cc8846dc51978faac2"))
+    (package
+      (name "prokka")
+      (version (string-append "1.11-1." (string-take commit 8)))
+      (source
+       (origin
+         (method git-fetch)
+         (uri (git-reference
+               (url "https://github.com/tseemann/prokka.git")
+               (commit commit)))
+         (file-name (string-append name "-" version "-checkout"))
+         (sha256
+          (base32
+           "1h8dpmrcfw5z3w9sydgv4439v1wdidyq4nx247fyqahvn88rfsnj"))
+         (modules '((guix build utils)))
+         ;; Remove bundled code.
+         (snippet '(begin
+                     (delete-file-recursively "binaries")
+                     (delete-file-recursively "perl5")
+                     #t))))
+      (build-system gnu-build-system)
+      (arguments
+       `(#:phases
+         (modify-phases %standard-phases
+           (delete 'configure)
+           (replace 'build
+             (lambda _
+               (zero? (system* "bin/prokka" "--setupdb"))))
+           (replace 'install
+             (lambda* (#:key outputs #:allow-other-keys)
+               ;; Set the path to the 'db' to refer to the share directory.
+               (substitute* "bin/prokka"
+                 (("^my \\$DBDIR = .*")
+                  (string-append "my $DBDIR = '"
+                                 (assoc-ref outputs "out")
+                                 "/share/prokka/db';\n")))
+               (let* ((out (assoc-ref outputs "out"))
+                      (bin (string-append out "/bin"))
+                      (prokka (string-append bin "/prokka"))
+                      (share (string-append out "/share/prokka"))
+                      (path (getenv "PATH"))
+                      (perl5lib (getenv "PERL5LIB")))
+                 (mkdir-p share)
+                 (copy-recursively "db" (string-append share "/db"))
+                 (install-file "bin/prokka" bin)
+                 (install-file "bin/prokka-genbank_to_fasta_db" bin)
+                 (wrap-program prokka
+                   `("PATH" ":" prefix
+                     (,(string-append path ":" out "/bin"))))
+                 (for-each
+                  (lambda (binary)
+                    (wrap-program binary
+                      `("PERL5LIB" ":" prefix
+                        (,(string-append perl5lib ":" out
+                                         "/lib/perl5/site_perl")))))
+                  (list prokka (string-append
+                                bin "/prokka-genbank_to_fasta_db"))))
+               #t))
+           (delete 'check)
+           (add-after 'install 'post-install-check
+             (lambda* (#:key inputs #:allow-other-keys)
+               ;; There are no tests, instead we run a sanity check on an
+               ;; entire genome.
+               (zero? (system* "bin/prokka"
+                               "--noanno"
+                               "--outdir" "example-out"
+                               (assoc-ref inputs "example-genome"))))))))
+      (native-inputs
+       `(("example-genome"
+          ,(origin
+             (method url-fetch)
+             (uri "http://www.ebi.ac.uk/ena/data/view/CP002565&display=fasta")
+             (file-name (string-append "ena-genome-CP002565.fasta"))
+             (sha256
+              (base32
+               "0dv3m29kgyssjc96zbmb5khkrk7cy7a66bsjk2ricwc302g5hgfy"))))))
+      (inputs
+       `(("perl" ,perl)
+         ("bioperl" ,bioperl-minimal)
+         ("blast+" ,blast+)
+         ("hmmer" ,hmmer)
+         ("aragorn" ,aragorn)
+         ("prodigal" ,prodigal)
+         ("parallel" ,parallel)
+         ("infernal" ,infernal)
+         ("barrnap" ,barrnap)
+         ("minced" ,minced)
+         ("tbl2asn" ,ncbi-tools)
+         ("grep" ,grep)
+         ("sed" ,sed)
+         ("less" ,less)
+         ("perl-time-piece" ,perl-time-piece)
+         ("perl-xml-simple" ,perl-xml-simple)
+         ("perl-digest-md5" ,perl-digest-md5)))
+      (home-page "http://www.vicbioinformatics.com/software.prokka.shtml")
+      (synopsis "Rapid prokaryotic genome annotation")
+      (description
+       "Prokka is a software tool for the rapid annotation of prokaryotic
+genomes.  It produces GFF3, GBK and SQN files that are ready for editing in
+Sequin and ultimately submitted to Genbank/DDJB/ENA. ")
+      (license (list license:gpl2
+                     ;; Available under various licenses.
+                     (license:non-copyleft
+                      "file://doc"
+                      "See license files in the doc directory."))))))
+
 (define-public proteinortho
   (package
     (name "proteinortho")
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] gnu: Add perl-time-piece.
  2016-12-03 10:38 [PATCH 1/4] gnu: Add perl-time-piece Ben Woodcroft
                   ` (2 preceding siblings ...)
  2016-12-03 10:39 ` [PATCH 4/4] gnu: Add prokka Ben Woodcroft
@ 2016-12-03 18:55 ` Marius Bakke
  3 siblings, 0 replies; 11+ messages in thread
From: Marius Bakke @ 2016-12-03 18:55 UTC (permalink / raw)
  To: Ben Woodcroft, guix-devel

[-- Attachment #1: Type: text/plain, Size: 1573 bytes --]

Ben Woodcroft <donttrustben@gmail.com> writes:

> * gnu/packages/perl.scm (perl-time-piece): New variable.
> ---
>  gnu/packages/perl.scm | 22 ++++++++++++++++++++++
>  1 file changed, 22 insertions(+)
>
> diff --git a/gnu/packages/perl.scm b/gnu/packages/perl.scm
> index 6b4370d..55982ab 100644
> --- a/gnu/packages/perl.scm
> +++ b/gnu/packages/perl.scm
> @@ -7162,6 +7162,28 @@ time values and formatting dates into ASCII strings.")
>  and time() calls.")
>      (license (package-license perl))))
>  
> +(define-public perl-time-piece
> +  (package
> +    (name "perl-time-piece")
> +    (version "1.31")
> +    (source
> +     (origin
> +       (method url-fetch)
> +       (uri (string-append "mirror://cpan/authors/id/E/ES/ESAYM/Time-Piece-"
> +                           version ".tar.gz"))
> +       (sha256
> +        (base32
> +         "1fb7s5y9f3j80h2dfsgplmdcrhp96ccqs0qqabmckkkgvhj40205"))))
> +    (build-system perl-build-system)
> +    (home-page "http://search.cpan.org/dist/Time-Piece")
> +    (synopsis "Object-oriented time objects")
> +    (description "This module replaces the standard @code{localtime} and
> +@code{gmtime} Perl functions with implementations that return objects.  It does
> +so in a backwards compatible manner, so that using
> +@code{localtime}/@code{gmtime} in the way documented in perlfunc will still
> +return what you expect.")
> +    (license (package-license perl))))

LGTM

> +
>  (define-public perl-tree-simple
>    (package
>      (name "perl-tree-simple")
> -- 
> 2.10.2

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 487 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/4] gnu: Add ncbi-tools.
  2016-12-03 10:38 ` [PATCH 2/4] gnu: Add ncbi-tools Ben Woodcroft
@ 2016-12-03 19:19   ` Ricardo Wurmus
  2016-12-03 19:30   ` Marius Bakke
  1 sibling, 0 replies; 11+ messages in thread
From: Ricardo Wurmus @ 2016-12-03 19:19 UTC (permalink / raw)
  To: Ben Woodcroft; +Cc: guix-devel


Hi Ben,

thank you for the patch!

> * gnu/packages/bioinformatics.scm (ncbi-tools): New variable.
> ---
>  gnu/packages/bioinformatics.scm | 62 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 62 insertions(+)

[…]

> +       #:phases
> +       (modify-phases %standard-phases
> +         (delete 'configure)
> +         (add-after 'unpack 'patch-sources
> +            (lambda _

Indentation here is a little too far.  The opening paren is usually
aligned with the first “d” off “add-after”.

> +              (for-each (lambda (file)
> +                          (substitute* file
> +                            (("NCBI_MAKE_SHELL = .*")
> +                             (string-append
> +                              "NCBI_MAKE_SHELL = "
> +                              (which "sh")
> +                              "\n"))))
> +                        (find-files "platform" ".*mk"))

Here I’d use

    (substitute* (find-files "platform" ".*mk")
      …)

“substitute*” can take a list of files.  That’s nicer than using
“for-each” here.

> +              (substitute* "make/ln-if-absent"
> +                (("set path=\\(/usr/bin /bin\\)") ""))
> +              #t))
> +         (replace 'build
> +            (lambda _

Same as above; it’s a little too far to the right.

> +              (chdir "..")
> +              (zero? (system* "ncbi/make/makedis.csh"))))
> +         (replace 'install
> +           (lambda* (#:key outputs #:allow-other-keys)

And the same here.

> +             (let* ((out (assoc-ref outputs "out"))
> +                    (bin (string-append out "/bin"))
> +                    (man (string-append out "/share/man/man1")))
> +               (for-each (lambda (file)
> +                           (install-file
> +                            (string-append "ncbi/build/" file) bin)
> +                           (install-file
> +                            (string-append "ncbi/doc/man/" file ".1") man))
> +                         ;; XXX: TODO: Install and test other binaries.
> +                         (list "tbl2asn"))
> +               #t))))))
> +    (native-inputs
> +     `(("csh" ,tcsh)
> +       ("pkg-config" ,pkg-config)
> +       ("coreutils" ,coreutils)))
> +    (home-page "https://www.ncbi.nlm.nih.gov/IEB/ToolBox/MainPage/index.html")
> +    (synopsis "NCBI-related tools")
> +    (description "The United States of America @dfn{National Center for
> +Biotechnology Information} (NCBI) Software Development Toolkit is for the
> +production and distribution of GenBank, Entrez, BLAST and related NCBI
> +services.")
> +    (license license:public-domain)))
> +

Okay with these changes.  Thank you!

-- 
Ricardo

GPG: BCA6 89B6 3655 3801 C3C6  2150 197A 5888 235F ACAC
http://elephly.net

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/4] gnu: Add barrnap.
  2016-12-03 10:39 ` [PATCH 3/4] gnu: Add barrnap Ben Woodcroft
@ 2016-12-03 19:20   ` Ricardo Wurmus
  2016-12-03 19:36   ` Marius Bakke
  1 sibling, 0 replies; 11+ messages in thread
From: Ricardo Wurmus @ 2016-12-03 19:20 UTC (permalink / raw)
  To: Ben Woodcroft; +Cc: guix-devel


Ben Woodcroft <donttrustben@gmail.com> writes:

> * gnu/packages/bioinformatics.scm (barrnap): New variable.
> ---
>  gnu/packages/bioinformatics.scm | 67 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 67 insertions(+)

[…]

LGTM.  Thanks!

-- 
Ricardo

GPG: BCA6 89B6 3655 3801 C3C6  2150 197A 5888 235F ACAC
http://elephly.net

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/4] gnu: Add ncbi-tools.
  2016-12-03 10:38 ` [PATCH 2/4] gnu: Add ncbi-tools Ben Woodcroft
  2016-12-03 19:19   ` Ricardo Wurmus
@ 2016-12-03 19:30   ` Marius Bakke
  1 sibling, 0 replies; 11+ messages in thread
From: Marius Bakke @ 2016-12-03 19:30 UTC (permalink / raw)
  To: Ben Woodcroft, guix-devel

[-- Attachment #1: Type: text/plain, Size: 4282 bytes --]

Ben Woodcroft <donttrustben@gmail.com> writes:

> * gnu/packages/bioinformatics.scm (ncbi-tools): New variable.
> ---
>  gnu/packages/bioinformatics.scm | 62 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 62 insertions(+)
>
> diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
> index 415024f..961f0b2 100644
> --- a/gnu/packages/bioinformatics.scm
> +++ b/gnu/packages/bioinformatics.scm
> @@ -80,6 +80,7 @@
>    #:use-module (gnu packages readline)
>    #:use-module (gnu packages ruby)
>    #:use-module (gnu packages serialization)
> +  #:use-module (gnu packages shells)
>    #:use-module (gnu packages statistics)
>    #:use-module (gnu packages tbb)
>    #:use-module (gnu packages tex)
> @@ -4340,6 +4341,67 @@ simultaneously.")
>         ("ngs-sdk" ,ngs-sdk)))
>      (synopsis "Java bindings for NGS SDK")))
>  
> +(define-public ncbi-tools
> +  (package
> +    (name "ncbi-tools")
> +    (version "20160908")
> +    (source
> +     (origin
> +       (method url-fetch)
> +       (uri (string-append "ftp://ftp.ncbi.nih.gov/toolbox/ncbi_tools/old/"
> +                           version "/ncbi.tar.gz"))

I'm getting a timeout on this, though it works in a browser. However
this server is apparently also available over HTTPS, which is better.

> +       (file-name (string-append name "-" version ".tar.gz"))
> +       (sha256
> +        (base32
> +         "1252s4fw41w5yalz9b50pvzvkiyjfcgy0isw1qgmg0v66bp49khz"))))
> +    (build-system gnu-build-system)
> +    (arguments
> +     `(#:tests? #f ; There are no tests.
> +       #:phases
> +       (modify-phases %standard-phases
> +         (delete 'configure)
> +         (add-after 'unpack 'patch-sources
> +            (lambda _
> +              (for-each (lambda (file)
> +                          (substitute* file
> +                            (("NCBI_MAKE_SHELL = .*")
> +                             (string-append
> +                              "NCBI_MAKE_SHELL = "
> +                              (which "sh")
> +                              "\n"))))
> +                        (find-files "platform" ".*mk"))
> +              (substitute* "make/ln-if-absent"
> +                (("set path=\\(/usr/bin /bin\\)") ""))
> +              #t))
> +         (replace 'build
> +            (lambda _
> +              (chdir "..")
> +              (zero? (system* "ncbi/make/makedis.csh"))))
> +         (replace 'install
> +           (lambda* (#:key outputs #:allow-other-keys)
> +             (let* ((out (assoc-ref outputs "out"))
> +                    (bin (string-append out "/bin"))
> +                    (man (string-append out "/share/man/man1")))
> +               (for-each (lambda (file)
> +                           (install-file
> +                            (string-append "ncbi/build/" file) bin)
> +                           (install-file
> +                            (string-append "ncbi/doc/man/" file ".1") man))
> +                         ;; XXX: TODO: Install and test other binaries.
> +                         (list "tbl2asn"))
> +               #t))))))
> +    (native-inputs
> +     `(("csh" ,tcsh)
> +       ("pkg-config" ,pkg-config)
> +       ("coreutils" ,coreutils)))
> +    (home-page "https://www.ncbi.nlm.nih.gov/IEB/ToolBox/MainPage/index.html")
> +    (synopsis "NCBI-related tools")
> +    (description "The United States of America @dfn{National Center for
> +Biotechnology Information} (NCBI) Software Development Toolkit is for the
> +production and distribution of GenBank, Entrez, BLAST and related NCBI
> +services.")
> +    (license license:public-domain)))

The files in regexp/ are released under a custom license found in
regexp/doc/LICENSE. Also corelib/regex.{c.h} are GPL2+.

algo/blast/core/boost_erf.c is covered by boost1.0, and
connect/parson.{c,h} are MIT. Some files in connect/mitsock seems to be
covered by an early variant of the 2-clause BSD.

Other files have unclear license terms such as those in
corelib/morefiles, though they seem sufficiently free for the
non-copyleft procedure. LGTM with these updates!

> +
>  (define-public ncbi-vdb
>    (package
>      (name "ncbi-vdb")
> -- 
> 2.10.2

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 487 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/4] gnu: Add barrnap.
  2016-12-03 10:39 ` [PATCH 3/4] gnu: Add barrnap Ben Woodcroft
  2016-12-03 19:20   ` Ricardo Wurmus
@ 2016-12-03 19:36   ` Marius Bakke
  2016-12-04 15:51     ` Ricardo Wurmus
  1 sibling, 1 reply; 11+ messages in thread
From: Marius Bakke @ 2016-12-03 19:36 UTC (permalink / raw)
  To: Ben Woodcroft, guix-devel

[-- Attachment #1: Type: text/plain, Size: 5087 bytes --]

Ben Woodcroft <donttrustben@gmail.com> writes:

> * gnu/packages/bioinformatics.scm (barrnap): New variable.
> ---
>  gnu/packages/bioinformatics.scm | 67 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 67 insertions(+)
>
> diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
> index 961f0b2..4541791 100644
> --- a/gnu/packages/bioinformatics.scm
> +++ b/gnu/packages/bioinformatics.scm
> @@ -268,6 +268,73 @@ instance, it implements several methods to assess contig-wise read coverage.")
>  BAM files.")
>      (license license:expat)))
>  
> +(define-public barrnap
> +  (package
> +   (name "barrnap")
> +   (version "0.7")
> +   (source
> +    (origin
> +      (method url-fetch)
> +      (uri (string-append
> +            "https://github.com/tseemann/barrnap/archive/"
> +            version ".tar.gz"))
> +      (file-name (string-append name "-" version ".tar.gz"))
> +      (sha256
> +       (base32
> +        "16y040np76my3y82hgk4yy790smbsk4h8d60d5swlv7ha3i768gg"))
> +      (modules '((guix build utils)))
> +      ;; Remove pre-built binaries.
> +      (snippet '(begin
> +                  (delete-file-recursively "binaries")
> +                  #t))))
> +   (build-system gnu-build-system)
> +   (arguments
> +    `(#:test-target "test"
> +      #:phases
> +      (modify-phases %standard-phases
> +        (add-after 'unpack 'patch-nhmer-path
> +          (lambda* (#:key inputs #:allow-other-keys)
> +            (substitute* "bin/barrnap"
> +              (("^my \\$NHMMER = .*")
> +               (string-append "my $NHMMER = '"
> +                              (assoc-ref inputs "hmmer")
> +                              "/bin/nhmmer';\n")))
> +            #t))
> +        (delete 'configure)
> +        (delete 'build)
> +        (replace 'install
> +          (lambda* (#:key outputs #:allow-other-keys)
> +            (let* ((out  (assoc-ref outputs "out"))
> +                   (bin  (string-append out "/bin"))
> +                                        ;(path (getenv "PATH"))
> +                   (share (string-append out "/share/barrnap/db"))
> +                   (binary "bin/barrnap"))
> +              (substitute* binary
> +                (("^my \\$DBDIR = .*")
> +                 (string-append "my $DBDIR = '" share "';\n")))
> +              (install-file binary bin)
> +              (mkdir-p share)
> +              (copy-recursively "db" share))
> +            #t)))))
> +   (inputs
> +    `(("perl" ,perl)
> +      ("hmmer" ,hmmer)))
> +   (home-page "https://github.com/tseemann/barrnap")
> +   (synopsis "Ribosomal RNA predictor")
> +   (description
> +    "Barrnap predicts the location of ribosomal RNA genes in genomes.  It
> +supports bacteria (5S, 23S, 16S), archaea (5S,5.8S,23S,16S), mitochondria (12S,
> +16S) and eukaryotes (5S, 5.8S, 28S, 18S).  It takes FASTA DNA sequence as input,
> +and write GFF3 as output.  It uses the NHMMER tool that comes with HMMER for
> +HMM searching in RNA:DNA style.")
> +   (license (list license:gpl3
> +                  ;; The Rfam HMMs are under cc0, and the SILVA-derived HMMs are
> +                  ;; academic-only.

I don't think we can package the SILVA files. They are very much
non-free according to the license text:

SILVA Terms of Use/License Information

The SILVA database content offered at www.arb-silva.de can be freely browsed
and the corresponding information deployed by all users, independent from
their status (academic or non-academic).

All downloads are free for academic users. They can be used, modified and
redistributed within the academic environment without any limitation.
However, in case of redistribution please make transparent the SILVA Terms
of Use/License Information by linking/referring to this page (see link in
the header of your browser). Academic users are represented by universities
and non-commercial research institutes such as members of the German
Helmholtz Association, Leibniz Association and Max-Planck Society as well as
US National Labs.

Users from NON-ACADEMIC can also directly access all downloads including the
results of the SILVA web-aligner (SINA) but only for limited/temporary use
(only for test purposes). All downloaded files should be deleted latest
after 48 hours. Unauthorized usage beyond test purposes is strictly
prohibited. If you are interested in unlimited usage of the SILVA
databases/services (e.g. the web-aligner) or parts of them within a
non-academic environment, please contact us for more information at
contact(at)arb-silva.de. A non-academic environment is defined by a direct
or indirect commercial interest in the data and includes all industrial
research entities.


> +                  license:cc0
> +                  (license:non-copyleft
> +                   "file:///LICENSE.SILVA"
> +                   "See LICENSE.SILVA in the distribution.")))))
> +
>  (define-public bcftools
>    (package
>      (name "bcftools")
> -- 
> 2.10.2

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 487 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/4] gnu: Add prokka.
  2016-12-03 10:39 ` [PATCH 4/4] gnu: Add prokka Ben Woodcroft
@ 2016-12-03 19:47   ` Marius Bakke
  0 siblings, 0 replies; 11+ messages in thread
From: Marius Bakke @ 2016-12-03 19:47 UTC (permalink / raw)
  To: Ben Woodcroft, guix-devel

[-- Attachment #1: Type: text/plain, Size: 6319 bytes --]

Ben Woodcroft <donttrustben@gmail.com> writes:

> * gnu/packages/bioinformatics.scm (prokka): New variable.
> ---
>  gnu/packages/bioinformatics.scm | 110 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 110 insertions(+)
>
> diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
> index 4541791..c4f421c 100644
> --- a/gnu/packages/bioinformatics.scm
> +++ b/gnu/packages/bioinformatics.scm
> @@ -62,6 +62,7 @@
>    #:use-module (gnu packages image)
>    #:use-module (gnu packages imagemagick)
>    #:use-module (gnu packages java)
> +  #:use-module (gnu packages less)
>    #:use-module (gnu packages linux)
>    #:use-module (gnu packages logging)
>    #:use-module (gnu packages machine-learning)
> @@ -3743,6 +3744,115 @@ for sequences to be aligned and then, simultaneously with the alignment,
>  predicts the locations of structural units in the sequences.")
>      (license license:gpl2+)))
>  
> +(define-public prokka
> +  ;; There has been many commits since the last released version 1.11 so we
> +  ;; package from git.
> +  (let ((commit "1caf2394850998f89a3782cc8846dc51978faac2"))
> +    (package
> +      (name "prokka")
> +      (version (string-append "1.11-1." (string-take commit 8)))
> +      (source
> +       (origin
> +         (method git-fetch)
> +         (uri (git-reference
> +               (url "https://github.com/tseemann/prokka.git")
> +               (commit commit)))
> +         (file-name (string-append name "-" version "-checkout"))
> +         (sha256
> +          (base32
> +           "1h8dpmrcfw5z3w9sydgv4439v1wdidyq4nx247fyqahvn88rfsnj"))
> +         (modules '((guix build utils)))
> +         ;; Remove bundled code.
> +         (snippet '(begin
> +                     (delete-file-recursively "binaries")
> +                     (delete-file-recursively "perl5")
> +                     #t))))
> +      (build-system gnu-build-system)
> +      (arguments
> +       `(#:phases
> +         (modify-phases %standard-phases
> +           (delete 'configure)
> +           (replace 'build
> +             (lambda _
> +               (zero? (system* "bin/prokka" "--setupdb"))))
> +           (replace 'install
> +             (lambda* (#:key outputs #:allow-other-keys)
> +               ;; Set the path to the 'db' to refer to the share directory.
> +               (substitute* "bin/prokka"
> +                 (("^my \\$DBDIR = .*")
> +                  (string-append "my $DBDIR = '"
> +                                 (assoc-ref outputs "out")
> +                                 "/share/prokka/db';\n")))
> +               (let* ((out (assoc-ref outputs "out"))
> +                      (bin (string-append out "/bin"))
> +                      (prokka (string-append bin "/prokka"))
> +                      (share (string-append out "/share/prokka"))
> +                      (path (getenv "PATH"))
> +                      (perl5lib (getenv "PERL5LIB")))
> +                 (mkdir-p share)
> +                 (copy-recursively "db" (string-append share "/db"))
> +                 (install-file "bin/prokka" bin)
> +                 (install-file "bin/prokka-genbank_to_fasta_db" bin)
> +                 (wrap-program prokka
> +                   `("PATH" ":" prefix
> +                     (,(string-append path ":" out "/bin"))))
> +                 (for-each
> +                  (lambda (binary)
> +                    (wrap-program binary
> +                      `("PERL5LIB" ":" prefix
> +                        (,(string-append perl5lib ":" out
> +                                         "/lib/perl5/site_perl")))))
> +                  (list prokka (string-append
> +                                bin "/prokka-genbank_to_fasta_db"))))
> +               #t))
> +           (delete 'check)
> +           (add-after 'install 'post-install-check
> +             (lambda* (#:key inputs #:allow-other-keys)
> +               ;; There are no tests, instead we run a sanity check on an
> +               ;; entire genome.
> +               (zero? (system* "bin/prokka"
> +                               "--noanno"
> +                               "--outdir" "example-out"
> +                               (assoc-ref inputs "example-genome"))))))))
> +      (native-inputs
> +       `(("example-genome"
> +          ,(origin
> +             (method url-fetch)
> +             (uri "http://www.ebi.ac.uk/ena/data/view/CP002565&display=fasta")
> +             (file-name (string-append "ena-genome-CP002565.fasta"))
> +             (sha256
> +              (base32
> +               "0dv3m29kgyssjc96zbmb5khkrk7cy7a66bsjk2ricwc302g5hgfy"))))))
> +      (inputs
> +       `(("perl" ,perl)
> +         ("bioperl" ,bioperl-minimal)
> +         ("blast+" ,blast+)
> +         ("hmmer" ,hmmer)
> +         ("aragorn" ,aragorn)
> +         ("prodigal" ,prodigal)
> +         ("parallel" ,parallel)
> +         ("infernal" ,infernal)
> +         ("barrnap" ,barrnap)
> +         ("minced" ,minced)
> +         ("tbl2asn" ,ncbi-tools)
> +         ("grep" ,grep)
> +         ("sed" ,sed)
> +         ("less" ,less)
> +         ("perl-time-piece" ,perl-time-piece)
> +         ("perl-xml-simple" ,perl-xml-simple)
> +         ("perl-digest-md5" ,perl-digest-md5)))
> +      (home-page "http://www.vicbioinformatics.com/software.prokka.shtml")
> +      (synopsis "Rapid prokaryotic genome annotation")
> +      (description
> +       "Prokka is a software tool for the rapid annotation of prokaryotic
> +genomes.  It produces GFF3, GBK and SQN files that are ready for editing in
> +Sequin and ultimately submitted to Genbank/DDJB/ENA. ")
> +      (license (list license:gpl2
> +                     ;; Available under various licenses.
> +                     (license:non-copyleft
> +                      "file://doc"
> +                      "See license files in the doc directory."))))))

Wow, how much did Torsten pay you to package these ;)
It looks fine, but there are few enough licenses to list explicitly.
Prokka is GPL3+, Rfam and Pfam are CC0, while Uniprot and HAMAP can be
covered by a non-copyleft procedure mentioning the CC
Attribution-Noderivs license.

Thanks for this!

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 487 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/4] gnu: Add barrnap.
  2016-12-03 19:36   ` Marius Bakke
@ 2016-12-04 15:51     ` Ricardo Wurmus
  0 siblings, 0 replies; 11+ messages in thread
From: Ricardo Wurmus @ 2016-12-04 15:51 UTC (permalink / raw)
  To: Marius Bakke; +Cc: guix-devel


Marius Bakke <mbakke@fastmail.com> writes:

> Ben Woodcroft <donttrustben@gmail.com> writes:
>
>> * gnu/packages/bioinformatics.scm (barrnap): New variable.

[…]

>> +16S) and eukaryotes (5S, 5.8S, 28S, 18S).  It takes FASTA DNA sequence as input,
>> +and write GFF3 as output.  It uses the NHMMER tool that comes with HMMER for
>> +HMM searching in RNA:DNA style.")
>> +   (license (list license:gpl3
>> +                  ;; The Rfam HMMs are under cc0, and the SILVA-derived HMMs are
>> +                  ;; academic-only.
>
> I don't think we can package the SILVA files. They are very much
> non-free according to the license text:

You’re right!  Ben, could you delete them from the package in a snippet?

-- 
Ricardo

GPG: BCA6 89B6 3655 3801 C3C6  2150 197A 5888 235F ACAC
http://elephly.net

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2016-12-04 16:23 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-03 10:38 [PATCH 1/4] gnu: Add perl-time-piece Ben Woodcroft
2016-12-03 10:38 ` [PATCH 2/4] gnu: Add ncbi-tools Ben Woodcroft
2016-12-03 19:19   ` Ricardo Wurmus
2016-12-03 19:30   ` Marius Bakke
2016-12-03 10:39 ` [PATCH 3/4] gnu: Add barrnap Ben Woodcroft
2016-12-03 19:20   ` Ricardo Wurmus
2016-12-03 19:36   ` Marius Bakke
2016-12-04 15:51     ` Ricardo Wurmus
2016-12-03 10:39 ` [PATCH 4/4] gnu: Add prokka Ben Woodcroft
2016-12-03 19:47   ` Marius Bakke
2016-12-03 18:55 ` [PATCH 1/4] gnu: Add perl-time-piece Marius Bakke

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/guix.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).