Ben Woodcroft writes: > * gnu/packages/bioinformatics.scm (prokka): New variable. > --- > gnu/packages/bioinformatics.scm | 110 ++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 110 insertions(+) > > diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm > index 4541791..c4f421c 100644 > --- a/gnu/packages/bioinformatics.scm > +++ b/gnu/packages/bioinformatics.scm > @@ -62,6 +62,7 @@ > #:use-module (gnu packages image) > #:use-module (gnu packages imagemagick) > #:use-module (gnu packages java) > + #:use-module (gnu packages less) > #:use-module (gnu packages linux) > #:use-module (gnu packages logging) > #:use-module (gnu packages machine-learning) > @@ -3743,6 +3744,115 @@ for sequences to be aligned and then, simultaneously with the alignment, > predicts the locations of structural units in the sequences.") > (license license:gpl2+))) > > +(define-public prokka > + ;; There has been many commits since the last released version 1.11 so we > + ;; package from git. > + (let ((commit "1caf2394850998f89a3782cc8846dc51978faac2")) > + (package > + (name "prokka") > + (version (string-append "1.11-1." (string-take commit 8))) > + (source > + (origin > + (method git-fetch) > + (uri (git-reference > + (url "https://github.com/tseemann/prokka.git") > + (commit commit))) > + (file-name (string-append name "-" version "-checkout")) > + (sha256 > + (base32 > + "1h8dpmrcfw5z3w9sydgv4439v1wdidyq4nx247fyqahvn88rfsnj")) > + (modules '((guix build utils))) > + ;; Remove bundled code. > + (snippet '(begin > + (delete-file-recursively "binaries") > + (delete-file-recursively "perl5") > + #t)))) > + (build-system gnu-build-system) > + (arguments > + `(#:phases > + (modify-phases %standard-phases > + (delete 'configure) > + (replace 'build > + (lambda _ > + (zero? (system* "bin/prokka" "--setupdb")))) > + (replace 'install > + (lambda* (#:key outputs #:allow-other-keys) > + ;; Set the path to the 'db' to refer to the share directory. > + (substitute* "bin/prokka" > + (("^my \\$DBDIR = .*") > + (string-append "my $DBDIR = '" > + (assoc-ref outputs "out") > + "/share/prokka/db';\n"))) > + (let* ((out (assoc-ref outputs "out")) > + (bin (string-append out "/bin")) > + (prokka (string-append bin "/prokka")) > + (share (string-append out "/share/prokka")) > + (path (getenv "PATH")) > + (perl5lib (getenv "PERL5LIB"))) > + (mkdir-p share) > + (copy-recursively "db" (string-append share "/db")) > + (install-file "bin/prokka" bin) > + (install-file "bin/prokka-genbank_to_fasta_db" bin) > + (wrap-program prokka > + `("PATH" ":" prefix > + (,(string-append path ":" out "/bin")))) > + (for-each > + (lambda (binary) > + (wrap-program binary > + `("PERL5LIB" ":" prefix > + (,(string-append perl5lib ":" out > + "/lib/perl5/site_perl"))))) > + (list prokka (string-append > + bin "/prokka-genbank_to_fasta_db")))) > + #t)) > + (delete 'check) > + (add-after 'install 'post-install-check > + (lambda* (#:key inputs #:allow-other-keys) > + ;; There are no tests, instead we run a sanity check on an > + ;; entire genome. > + (zero? (system* "bin/prokka" > + "--noanno" > + "--outdir" "example-out" > + (assoc-ref inputs "example-genome")))))))) > + (native-inputs > + `(("example-genome" > + ,(origin > + (method url-fetch) > + (uri "http://www.ebi.ac.uk/ena/data/view/CP002565&display=fasta") > + (file-name (string-append "ena-genome-CP002565.fasta")) > + (sha256 > + (base32 > + "0dv3m29kgyssjc96zbmb5khkrk7cy7a66bsjk2ricwc302g5hgfy")))))) > + (inputs > + `(("perl" ,perl) > + ("bioperl" ,bioperl-minimal) > + ("blast+" ,blast+) > + ("hmmer" ,hmmer) > + ("aragorn" ,aragorn) > + ("prodigal" ,prodigal) > + ("parallel" ,parallel) > + ("infernal" ,infernal) > + ("barrnap" ,barrnap) > + ("minced" ,minced) > + ("tbl2asn" ,ncbi-tools) > + ("grep" ,grep) > + ("sed" ,sed) > + ("less" ,less) > + ("perl-time-piece" ,perl-time-piece) > + ("perl-xml-simple" ,perl-xml-simple) > + ("perl-digest-md5" ,perl-digest-md5))) > + (home-page "http://www.vicbioinformatics.com/software.prokka.shtml") > + (synopsis "Rapid prokaryotic genome annotation") > + (description > + "Prokka is a software tool for the rapid annotation of prokaryotic > +genomes. It produces GFF3, GBK and SQN files that are ready for editing in > +Sequin and ultimately submitted to Genbank/DDJB/ENA. ") > + (license (list license:gpl2 > + ;; Available under various licenses. > + (license:non-copyleft > + "file://doc" > + "See license files in the doc directory.")))))) Wow, how much did Torsten pay you to package these ;) It looks fine, but there are few enough licenses to list explicitly. Prokka is GPL3+, Rfam and Pfam are CC0, while Uniprot and HAMAP can be covered by a non-copyleft procedure mentioning the CC Attribution-Noderivs license. Thanks for this!