From mboxrd@z Thu Jan 1 00:00:00 1970 From: Mark H Weaver Subject: Re: [PATCH] Add Blast+. Date: Tue, 16 Jun 2015 17:37:09 -0400 Message-ID: <87zj3ze3ne.fsf@netris.org> References: Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable Return-path: Received: from eggs.gnu.org ([2001:4830:134:3::10]:54596) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Z4yY9-0006oO-1T for guix-devel@gnu.org; Tue, 16 Jun 2015 17:37:42 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Z4yY5-0006vR-JC for guix-devel@gnu.org; Tue, 16 Jun 2015 17:37:40 -0400 Received: from world.peace.net ([50.252.239.5]:37231) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Z4yY5-0006oP-EU for guix-devel@gnu.org; Tue, 16 Jun 2015 17:37:37 -0400 In-Reply-To: (Ricardo Wurmus's message of "Tue, 16 Jun 2015 16:26:01 +0200") List-Id: "Development of GNU Guix and the GNU System distribution." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: guix-devel-bounces+gcggd-guix-devel=m.gmane.org@gnu.org Sender: guix-devel-bounces+gcggd-guix-devel=m.gmane.org@gnu.org To: Ricardo Wurmus Cc: guix-devel Ricardo Wurmus writes: > From 81cbb9bfa523d56c68d5f9f4feed3676edb5a414 Mon Sep 17 00:00:00 2001 > From: Ricardo Wurmus > Date: Tue, 16 Jun 2015 16:24:24 +0200 > Subject: [PATCH] gnu: Add Blast+. > > * gnu/packages/bioinformatics.scm (blast+): New variable. > --- > gnu/packages/bioinformatics.scm | 156 ++++++++++++++++++++++++++++++++++= ++++++ > 1 file changed, 156 insertions(+) > > diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatic= s.scm > index ac4c50d..4a55040 100644 > --- a/gnu/packages/bioinformatics.scm > +++ b/gnu/packages/bioinformatics.scm > @@ -31,6 +31,7 @@ > #:use-module (gnu packages base) > #:use-module (gnu packages boost) > #:use-module (gnu packages compression) > + #:use-module (gnu packages cpio) > #:use-module (gnu packages file) > #:use-module (gnu packages java) > #:use-module (gnu packages linux) > @@ -294,6 +295,161 @@ into separate processes; and more.") > (inputs > `(("python2-numpy" ,python2-numpy))))) >=20=20 > +(define-public blast+ > + (package > + (name "blast+") > + (version "2.2.30") > + (source (origin > + (method url-fetch) > + (uri (string-append > + "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+= /" > + version "/ncbi-blast-" version "+-src.tar.gz")) > + (sha256 > + (base32 > + "0h0fj5cpx6zpfwixgx5f5xbr4rn3cnai0x3j7grrg50vr18jvxr6"))= )) > + (build-system gnu-build-system) > + (arguments > + `(;; There are three(!) tests for this massive library, and all fai= l with > + ;; "unparsable timing stats". > + ;; ERR [127] -- [util/regexp] test_pcre.sh (unparsable timin= g stats) > + ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable ti= ming stats) > + ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsabl= e timing stats) > + #:tests? #f Just a guess, but maybe this is because you replaced "/bin/date" with "echo -n 0". How about replacing it with "date -d @0" instead? It would be great to get the tests working, even if we have to disable some of them. Otherwise we have no way of knowing that we're not distributing broken garbage :) > + #:out-of-source? #t > + #:parallel-build? #f ; not supported > + #:phases > + (modify-phases %standard-phases > + (add-before > + 'configure 'set-HOME > + ;; $HOME needs to be set at some point during the configure ph= ase > + (lambda _ (setenv "HOME" "/tmp") #t)) > + (add-after > + 'unpack 'enter-dir > + (lambda _ (chdir "c++") #t)) > + (add-after > + 'enter-dir 'fix-build-system > + (lambda _ > + ;; Proceed even though the weird build system says that gene= rated > + ;; files are out of date > + (setenv "NCBICXX_RECONF_POLICY" "warn") > + > + ;; Remove bundled bzip2 and zlib > + (delete-file-recursively "src/util/compress/bzip2") > + (delete-file-recursively "src/util/compress/zlib") > + (substitute* "src/util/compress/Makefile.in" > + (("bzip2 zlib api") "api")) > + > + ;; Remove useless msbuild directory > + (delete-file-recursively "src/build-system/project_tree_buil= der/msbuild") > + > + ;; Some of the files we're patching are > + ;; ISO-8859-1-encoded, so choose it as the default > + ;; encoding so the byte encoding is preserved. > + (with-fluids ((%default-port-encoding #f)) > + (substitute* (find-files "src/build-system" "config.*") "^config" > + (("LN_S=3D/bin/\\$LN_S") (string-append "LN_S=3D" (which= "ln"))) > + (("/bin/sh") (which "bash")) (which "sh") might be better. Bash behaves differently when it's invoked as 'sh'. > + (("^PATH=3D.*") ""))) > + > + ;; fix static and generated shebangs > + (substitute* (find-files "scripts/common/check" "\\.sh") "\\.sh$" > + (("/bin/sh") (which "bash"))) (which "sh") > + > + ;; rewrite "/var/tmp" in check script > + (substitute* "scripts/common/check/check_make_unix.sh" > + (("/var/tmp") (string-append (getcwd) "/build/build"))) Or maybe just "/tmp" ? > + > + ;; fix path to "echo" > + (substitute* '("src/build-system/Makefile.rules_with_autodep= .in" > + "src/build-system/Makefile.meta.gmake=3Dno" > + "src/build-system/Makefile.meta_r" > + "src/build-system/Makefile.requirements") > + (("/bin/echo") (which "echo"))) > + > + ;; fix path to "basename" > + (substitute* '("src/build-system/Makefile.in.top") > + (("/usr/bin/basename") (which "basename"))) > + > + ;; fix path to "mv" > + (substitute* '("src/build-system/Makefile.rules_with_autodep= .in" > + "src/build-system/Makefile.meta_p") > + (("/bin/mv") (which "mv"))) > + > + ;; fix path to "rm" > + (substitute* '("src/build-system/Makefile.mk.in" > + "src/build-system/Makefile.meta.in" > + "scripts/common/impl/run_with_lock.sh") > + (("/bin/rm") (which "rm"))) > + > + ;; fix path to "cp" > + (substitute* '("src/build-system/Makefile.configurables.real" > + "src/build-system/Makefile.mk.in" > + "src/build-system/configure" > + "src/build-system/configure.ac" > + "scripts/common/impl/if_diff.sh") > + (("/bin/cp") (which "cp"))) > + > + ;; fix path to "mkdir" > + (substitute* '("src/build-system/Makefile.mk.in" > + "src/build-system/Makefile.meta.in") > + (("/bin/mkdir") (which "mkdir"))) > + > + ;; fix path to "dirname" > + (substitute* '("src/build-system/Makefile.configurables.real" > + "src/build-system/Makefile.meta_p") > + (("/usr/bin/dirname") (which "dirname"))) > + > + ;; make call to "date" deterministic > + (substitute* "src/build-system/Makefile.meta_l" > + (("/bin/date") "echo -n 0")) All of these plus the ones for 'sh' could be combined into something like this: (untested) (define (which* cmd) (cond ((string=3D? cmd "date") ;; make call to "date" deterministic "date -d @0") ((which cmd) =3D> identity) (else (format (current-error-port) "WARNING: Unable to find absolute path for ~s~%" cmd) #f))) =20=20 (substitute* (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd) (or (which* cmd) all))) The definition must be placed at the beginning of a , i.e. before any non-definitions within a 'lambda', 'let', or similar forms. In this case it would go just inside the 'lambda' for 'fix-build-system'. I did something similar in the 'wicd' package. > + > + ;; do not reset PATH > + (substitute* (find-files "scripts/common/impl/" "\\.sh") "\\.sh$" > + (("^ *PATH=3D.*") "") > + (("action=3D/bin/") "action=3D") > + (("export PATH") "echo -n 0")) Why "echo -n 0" here? Maybe ":" would be better? It is a no-op built-in command in Bourne shell. > + #t)) > + (replace > + 'configure > + (lambda* (#:key inputs outputs #:allow-other-keys) > + (let ((out (assoc-ref outputs "out")) > + (lib (string-append (assoc-ref outputs "lib") "/lib")) > + (include (string-append (assoc-ref outputs "include") > + "/include/ncbi-tools++"))) How about lining up the initializers of this 'let'? > + ;; The 'configure' script doesn't recognize things like > + ;; '--enable-fast-install'. > + (zero? (system* "./configure.orig" > + (string-append "--with-build-root=3D" (get= cwd) "/build") > + (string-append "--prefix=3D" out) > + (string-append "--libdir=3D" lib) > + (string-append "--includedir=3D" include) > + (string-append "--with-bz2=3D" > + (assoc-ref inputs "bzip2")) > + (string-append "--with-z=3D" > + (assoc-ref inputs "zlib")) > + ;; Each library is built twice by default,= once > + ;; with "-static" in its name, and again > + ;; without. > + "--without-static" > + "--with-dll")))))))) > + (outputs '("out" ; 19 MB > + "lib" ; 203MB > + "include")) ; 32MB > + (inputs > + `(("bzip2" ,bzip2) > + ("zlib" ,zlib))) > + (native-inputs > + `(("cpio" ,cpio))) > + (home-page "http://blast.ncbi.nlm.nih.gov") > + (synopsis "Basic local alignment search tool") > + (description > + "BLAST is a popular method of performing a DNA or protein sequence > +similarity search, using heuristics to produce results quickly. It also > +calculates an =E2=80=9Cexpect value=E2=80=9D that estimates how many mat= ches would have > +occurred at a given score by chance, which can aid a user in judging how= much > +confidence to have in an alignment.") > + (license license:public-domain))) > + Is everything in here really in the public domain? I'd guess that in order to make this true, you'd need to remove bzip2 and zlib in a snippet, and even then I'd doubtful :) Actually, it might be a good idea for us to remove bundled stuff in a snippet whenever possible, since we won't be applying security updates to those things, and it's probably better to remove them than to distribute bundled source code with security holes. Thanks! Mark