unofficial mirror of guix-devel@gnu.org 
 help / color / mirror / code / Atom feed
* [PATCH] Add Blast+.
@ 2015-06-16 14:26 Ricardo Wurmus
  2015-06-16 21:37 ` Mark H Weaver
  0 siblings, 1 reply; 8+ messages in thread
From: Ricardo Wurmus @ 2015-06-16 14:26 UTC (permalink / raw)
  To: guix-devel

[-- Attachment #1: 0001-gnu-Add-Blast.patch --]
[-- Type: text/x-patch, Size: 8356 bytes --]

From 81cbb9bfa523d56c68d5f9f4feed3676edb5a414 Mon Sep 17 00:00:00 2001
From: Ricardo Wurmus <ricardo.wurmus@mdc-berlin.de>
Date: Tue, 16 Jun 2015 16:24:24 +0200
Subject: [PATCH] gnu: Add Blast+.

* gnu/packages/bioinformatics.scm (blast+): New variable.
---
 gnu/packages/bioinformatics.scm | 156 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 156 insertions(+)

diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index ac4c50d..4a55040 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -31,6 +31,7 @@
   #:use-module (gnu packages base)
   #:use-module (gnu packages boost)
   #:use-module (gnu packages compression)
+  #:use-module (gnu packages cpio)
   #:use-module (gnu packages file)
   #:use-module (gnu packages java)
   #:use-module (gnu packages linux)
@@ -294,6 +295,161 @@ into separate processes; and more.")
     (inputs
      `(("python2-numpy" ,python2-numpy)))))
 
+(define-public blast+
+  (package
+    (name "blast+")
+    (version "2.2.30")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
+                    version "/ncbi-blast-" version "+-src.tar.gz"))
+              (sha256
+               (base32
+                "0h0fj5cpx6zpfwixgx5f5xbr4rn3cnai0x3j7grrg50vr18jvxr6"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(;; There are three(!) tests for this massive library, and all fail with
+       ;; "unparsable timing stats".
+       ;; ERR [127] --  [util/regexp] test_pcre.sh     (unparsable timing stats)
+       ;; ERR [127] --  [serial/datatool] datatool.sh     (unparsable timing stats)
+       ;; ERR [127] --  [serial/datatool] datatool_xml.sh     (unparsable timing stats)
+       #:tests? #f
+       #:out-of-source? #t
+       #:parallel-build? #f ; not supported
+       #:phases
+       (modify-phases %standard-phases
+         (add-before
+          'configure 'set-HOME
+          ;; $HOME needs to be set at some point during the configure phase
+          (lambda _ (setenv "HOME" "/tmp") #t))
+         (add-after
+          'unpack 'enter-dir
+          (lambda _ (chdir "c++") #t))
+         (add-after
+          'enter-dir 'fix-build-system
+          (lambda _
+            ;; Proceed even though the weird build system says that generated
+            ;; files are out of date
+            (setenv "NCBICXX_RECONF_POLICY" "warn")
+
+            ;; Remove bundled bzip2 and zlib
+            (delete-file-recursively "src/util/compress/bzip2")
+            (delete-file-recursively "src/util/compress/zlib")
+            (substitute* "src/util/compress/Makefile.in"
+              (("bzip2 zlib api") "api"))
+
+            ;; Remove useless msbuild directory
+            (delete-file-recursively "src/build-system/project_tree_builder/msbuild")
+
+            ;; Some of the files we're patching are
+            ;; ISO-8859-1-encoded, so choose it as the default
+            ;; encoding so the byte encoding is preserved.
+            (with-fluids ((%default-port-encoding #f))
+              (substitute* (find-files "src/build-system" "config.*")
+                (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
+                (("/bin/sh") (which "bash"))
+                (("^PATH=.*") "")))
+
+            ;; fix static and generated shebangs
+            (substitute* (find-files "scripts/common/check" "\\.sh")
+              (("/bin/sh") (which "bash")))
+
+            ;; rewrite "/var/tmp" in check script
+            (substitute* "scripts/common/check/check_make_unix.sh"
+              (("/var/tmp") (string-append (getcwd) "/build/build")))
+
+            ;; fix path to "echo"
+            (substitute* '("src/build-system/Makefile.rules_with_autodep.in"
+                           "src/build-system/Makefile.meta.gmake=no"
+                           "src/build-system/Makefile.meta_r"
+                           "src/build-system/Makefile.requirements")
+              (("/bin/echo") (which "echo")))
+
+            ;; fix path to "basename"
+            (substitute* '("src/build-system/Makefile.in.top")
+              (("/usr/bin/basename") (which "basename")))
+
+            ;; fix path to "mv"
+            (substitute* '("src/build-system/Makefile.rules_with_autodep.in"
+                           "src/build-system/Makefile.meta_p")
+              (("/bin/mv") (which "mv")))
+
+            ;; fix path to "rm"
+            (substitute* '("src/build-system/Makefile.mk.in"
+                           "src/build-system/Makefile.meta.in"
+                           "scripts/common/impl/run_with_lock.sh")
+              (("/bin/rm") (which "rm")))
+
+            ;; fix path to "cp"
+            (substitute* '("src/build-system/Makefile.configurables.real"
+                           "src/build-system/Makefile.mk.in"
+                           "src/build-system/configure"
+                           "src/build-system/configure.ac"
+                           "scripts/common/impl/if_diff.sh")
+              (("/bin/cp") (which "cp")))
+
+            ;; fix path to "mkdir"
+            (substitute* '("src/build-system/Makefile.mk.in"
+                           "src/build-system/Makefile.meta.in")
+              (("/bin/mkdir") (which "mkdir")))
+
+            ;; fix path to "dirname"
+            (substitute* '("src/build-system/Makefile.configurables.real"
+                           "src/build-system/Makefile.meta_p")
+              (("/usr/bin/dirname") (which "dirname")))
+
+            ;; make call to "date" deterministic
+            (substitute* "src/build-system/Makefile.meta_l"
+              (("/bin/date") "echo -n 0"))
+
+            ;; do not reset PATH
+            (substitute* (find-files "scripts/common/impl/" "\\.sh")
+              (("^ *PATH=.*") "")
+              (("action=/bin/") "action=")
+              (("export PATH") "echo -n 0"))
+            #t))
+         (replace
+          'configure
+          (lambda* (#:key inputs outputs #:allow-other-keys)
+            (let ((out (assoc-ref outputs "out"))
+                  (lib (string-append (assoc-ref outputs "lib") "/lib"))
+                  (include (string-append (assoc-ref outputs "include")
+                                          "/include/ncbi-tools++")))
+              ;; The 'configure' script doesn't recognize things like
+              ;; '--enable-fast-install'.
+              (zero? (system* "./configure.orig"
+                              (string-append "--with-build-root=" (getcwd) "/build")
+                              (string-append "--prefix=" out)
+                              (string-append "--libdir=" lib)
+                              (string-append "--includedir=" include)
+                              (string-append "--with-bz2="
+                                             (assoc-ref inputs "bzip2"))
+                              (string-append "--with-z="
+                                             (assoc-ref inputs "zlib"))
+                              ;; Each library is built twice by default, once
+                              ;; with "-static" in its name, and again
+                              ;; without.
+                              "--without-static"
+                              "--with-dll"))))))))
+    (outputs '("out"       ; 19 MB
+               "lib"       ; 203MB
+               "include")) ; 32MB
+    (inputs
+     `(("bzip2" ,bzip2)
+       ("zlib" ,zlib)))
+    (native-inputs
+     `(("cpio" ,cpio)))
+    (home-page "http://blast.ncbi.nlm.nih.gov")
+    (synopsis "Basic local alignment search tool")
+    (description
+     "BLAST is a popular method of performing a DNA or protein sequence
+similarity search, using heuristics to produce results quickly.  It also
+calculates an “expect value” that estimates how many matches would have
+occurred at a given score by chance, which can aid a user in judging how much
+confidence to have in an alignment.")
+    (license license:public-domain)))
+
 (define-public bowtie
   (package
     (name "bowtie")
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] Add Blast+.
  2015-06-16 14:26 [PATCH] Add Blast+ Ricardo Wurmus
@ 2015-06-16 21:37 ` Mark H Weaver
  2015-06-23  8:06   ` Ricardo Wurmus
  0 siblings, 1 reply; 8+ messages in thread
From: Mark H Weaver @ 2015-06-16 21:37 UTC (permalink / raw)
  To: Ricardo Wurmus; +Cc: guix-devel

Ricardo Wurmus <ricardo.wurmus@mdc-berlin.de> writes:

> From 81cbb9bfa523d56c68d5f9f4feed3676edb5a414 Mon Sep 17 00:00:00 2001
> From: Ricardo Wurmus <ricardo.wurmus@mdc-berlin.de>
> Date: Tue, 16 Jun 2015 16:24:24 +0200
> Subject: [PATCH] gnu: Add Blast+.
>
> * gnu/packages/bioinformatics.scm (blast+): New variable.
> ---
>  gnu/packages/bioinformatics.scm | 156 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 156 insertions(+)
>
> diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
> index ac4c50d..4a55040 100644
> --- a/gnu/packages/bioinformatics.scm
> +++ b/gnu/packages/bioinformatics.scm
> @@ -31,6 +31,7 @@
>    #:use-module (gnu packages base)
>    #:use-module (gnu packages boost)
>    #:use-module (gnu packages compression)
> +  #:use-module (gnu packages cpio)
>    #:use-module (gnu packages file)
>    #:use-module (gnu packages java)
>    #:use-module (gnu packages linux)
> @@ -294,6 +295,161 @@ into separate processes; and more.")
>      (inputs
>       `(("python2-numpy" ,python2-numpy)))))
>  
> +(define-public blast+
> +  (package
> +    (name "blast+")
> +    (version "2.2.30")
> +    (source (origin
> +              (method url-fetch)
> +              (uri (string-append
> +                    "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
> +                    version "/ncbi-blast-" version "+-src.tar.gz"))
> +              (sha256
> +               (base32
> +                "0h0fj5cpx6zpfwixgx5f5xbr4rn3cnai0x3j7grrg50vr18jvxr6"))))
> +    (build-system gnu-build-system)
> +    (arguments
> +     `(;; There are three(!) tests for this massive library, and all fail with
> +       ;; "unparsable timing stats".
> +       ;; ERR [127] --  [util/regexp] test_pcre.sh     (unparsable timing stats)
> +       ;; ERR [127] --  [serial/datatool] datatool.sh     (unparsable timing stats)
> +       ;; ERR [127] --  [serial/datatool] datatool_xml.sh     (unparsable timing stats)
> +       #:tests? #f

Just a guess, but maybe this is because you replaced "/bin/date" with
"echo -n 0".  How about replacing it with "date -d @0" instead?

It would be great to get the tests working, even if we have to disable
some of them.  Otherwise we have no way of knowing that we're not
distributing broken garbage :)

> +       #:out-of-source? #t
> +       #:parallel-build? #f ; not supported
> +       #:phases
> +       (modify-phases %standard-phases
> +         (add-before
> +          'configure 'set-HOME
> +          ;; $HOME needs to be set at some point during the configure phase
> +          (lambda _ (setenv "HOME" "/tmp") #t))
> +         (add-after
> +          'unpack 'enter-dir
> +          (lambda _ (chdir "c++") #t))
> +         (add-after
> +          'enter-dir 'fix-build-system
> +          (lambda _
> +            ;; Proceed even though the weird build system says that generated
> +            ;; files are out of date
> +            (setenv "NCBICXX_RECONF_POLICY" "warn")
> +
> +            ;; Remove bundled bzip2 and zlib
> +            (delete-file-recursively "src/util/compress/bzip2")
> +            (delete-file-recursively "src/util/compress/zlib")
> +            (substitute* "src/util/compress/Makefile.in"
> +              (("bzip2 zlib api") "api"))
> +
> +            ;; Remove useless msbuild directory
> +            (delete-file-recursively "src/build-system/project_tree_builder/msbuild")
> +
> +            ;; Some of the files we're patching are
> +            ;; ISO-8859-1-encoded, so choose it as the default
> +            ;; encoding so the byte encoding is preserved.
> +            (with-fluids ((%default-port-encoding #f))
> +              (substitute* (find-files "src/build-system" "config.*")

"^config"

> +                (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
> +                (("/bin/sh") (which "bash"))

(which "sh") might be better.  Bash behaves differently when it's
invoked as 'sh'.

> +                (("^PATH=.*") "")))
> +
> +            ;; fix static and generated shebangs
> +            (substitute* (find-files "scripts/common/check" "\\.sh")

"\\.sh$"

> +              (("/bin/sh") (which "bash")))

(which "sh")

> +
> +            ;; rewrite "/var/tmp" in check script
> +            (substitute* "scripts/common/check/check_make_unix.sh"
> +              (("/var/tmp") (string-append (getcwd) "/build/build")))

Or maybe just "/tmp" ?

> +
> +            ;; fix path to "echo"
> +            (substitute* '("src/build-system/Makefile.rules_with_autodep.in"
> +                           "src/build-system/Makefile.meta.gmake=no"
> +                           "src/build-system/Makefile.meta_r"
> +                           "src/build-system/Makefile.requirements")
> +              (("/bin/echo") (which "echo")))
> +
> +            ;; fix path to "basename"
> +            (substitute* '("src/build-system/Makefile.in.top")
> +              (("/usr/bin/basename") (which "basename")))
> +
> +            ;; fix path to "mv"
> +            (substitute* '("src/build-system/Makefile.rules_with_autodep.in"
> +                           "src/build-system/Makefile.meta_p")
> +              (("/bin/mv") (which "mv")))
> +
> +            ;; fix path to "rm"
> +            (substitute* '("src/build-system/Makefile.mk.in"
> +                           "src/build-system/Makefile.meta.in"
> +                           "scripts/common/impl/run_with_lock.sh")
> +              (("/bin/rm") (which "rm")))
> +
> +            ;; fix path to "cp"
> +            (substitute* '("src/build-system/Makefile.configurables.real"
> +                           "src/build-system/Makefile.mk.in"
> +                           "src/build-system/configure"
> +                           "src/build-system/configure.ac"
> +                           "scripts/common/impl/if_diff.sh")
> +              (("/bin/cp") (which "cp")))
> +
> +            ;; fix path to "mkdir"
> +            (substitute* '("src/build-system/Makefile.mk.in"
> +                           "src/build-system/Makefile.meta.in")
> +              (("/bin/mkdir") (which "mkdir")))
> +
> +            ;; fix path to "dirname"
> +            (substitute* '("src/build-system/Makefile.configurables.real"
> +                           "src/build-system/Makefile.meta_p")
> +              (("/usr/bin/dirname") (which "dirname")))
> +
> +            ;; make call to "date" deterministic
> +            (substitute* "src/build-system/Makefile.meta_l"
> +              (("/bin/date") "echo -n 0"))

All of these plus the ones for 'sh' could be combined into something
like this: (untested)

  (define (which* cmd)
    (cond ((string=? cmd "date")
           ;; make call to "date" deterministic
           "date -d @0")
          ((which cmd)
           => identity)
          (else
           (format (current-error-port)
                   "WARNING: Unable to find absolute path for ~s~%"
                   cmd)
           #f)))
  
  (substitute* <file-list>
    (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
     (or (which* cmd) all)))

The definition must be placed at the beginning of a <body>, i.e. before
any non-definitions within a 'lambda', 'let', or similar forms.  In this
case it would go just inside the 'lambda' for 'fix-build-system'.

I did something similar in the 'wicd' package.

> +
> +            ;; do not reset PATH
> +            (substitute* (find-files "scripts/common/impl/" "\\.sh")

"\\.sh$"

> +              (("^ *PATH=.*") "")
> +              (("action=/bin/") "action=")
> +              (("export PATH") "echo -n 0"))

Why "echo -n 0" here?  Maybe ":" would be better?  It is a no-op
built-in command in Bourne shell.

> +            #t))
> +         (replace
> +          'configure
> +          (lambda* (#:key inputs outputs #:allow-other-keys)
> +            (let ((out (assoc-ref outputs "out"))
> +                  (lib (string-append (assoc-ref outputs "lib") "/lib"))
> +                  (include (string-append (assoc-ref outputs "include")
> +                                          "/include/ncbi-tools++")))

How about lining up the initializers of this 'let'?

> +              ;; The 'configure' script doesn't recognize things like
> +              ;; '--enable-fast-install'.
> +              (zero? (system* "./configure.orig"
> +                              (string-append "--with-build-root=" (getcwd) "/build")
> +                              (string-append "--prefix=" out)
> +                              (string-append "--libdir=" lib)
> +                              (string-append "--includedir=" include)
> +                              (string-append "--with-bz2="
> +                                             (assoc-ref inputs "bzip2"))
> +                              (string-append "--with-z="
> +                                             (assoc-ref inputs "zlib"))
> +                              ;; Each library is built twice by default, once
> +                              ;; with "-static" in its name, and again
> +                              ;; without.
> +                              "--without-static"
> +                              "--with-dll"))))))))
> +    (outputs '("out"       ; 19 MB
> +               "lib"       ; 203MB
> +               "include")) ; 32MB
> +    (inputs
> +     `(("bzip2" ,bzip2)
> +       ("zlib" ,zlib)))
> +    (native-inputs
> +     `(("cpio" ,cpio)))
> +    (home-page "http://blast.ncbi.nlm.nih.gov")
> +    (synopsis "Basic local alignment search tool")
> +    (description
> +     "BLAST is a popular method of performing a DNA or protein sequence
> +similarity search, using heuristics to produce results quickly.  It also
> +calculates an “expect value” that estimates how many matches would have
> +occurred at a given score by chance, which can aid a user in judging how much
> +confidence to have in an alignment.")
> +    (license license:public-domain)))
> +

Is everything in here really in the public domain?  I'd guess that in
order to make this true, you'd need to remove bzip2 and zlib in a
snippet, and even then I'd doubtful :)

Actually, it might be a good idea for us to remove bundled stuff in a
snippet whenever possible, since we won't be applying security updates
to those things, and it's probably better to remove them than to
distribute bundled source code with security holes.

     Thanks!
       Mark

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Add Blast+.
  2015-06-16 21:37 ` Mark H Weaver
@ 2015-06-23  8:06   ` Ricardo Wurmus
  2015-06-27 10:14     ` Ludovic Courtès
  2015-06-27 18:29     ` Mark H Weaver
  0 siblings, 2 replies; 8+ messages in thread
From: Ricardo Wurmus @ 2015-06-23  8:06 UTC (permalink / raw)
  To: Mark H Weaver; +Cc: guix-devel

[-- Attachment #1: Type: text/plain, Size: 3343 bytes --]

Hi Mark,

thank you for the review!

>> +     `(;; There are three(!) tests for this massive library, and all fail with
>> +       ;; "unparsable timing stats".
>> +       ;; ERR [127] --  [util/regexp] test_pcre.sh     (unparsable timing stats)
>> +       ;; ERR [127] --  [serial/datatool] datatool.sh     (unparsable timing stats)
>> +       ;; ERR [127] --  [serial/datatool] datatool_xml.sh     (unparsable timing stats)
>> +       #:tests? #f
>
> Just a guess, but maybe this is because you replaced "/bin/date" with
> "echo -n 0".  How about replacing it with "date -d @0" instead?

I tried that but I still get the same problem.  The test script is
generated from a template in common/check/check_make_unix.sh.  We are
substituting "/var/tmp" for "/tmp" and the "(/usr)/bin" prefix, but this
should not have an impact on the functionality of the generated script.

I'm not sure why it's failing -- it works fine in "guix environment".
It's very time-consuming to recompile the whole thing until the tests
are reached.

> It would be great to get the tests working, even if we have to disable
> some of them.  Otherwise we have no way of knowing that we're not
> distributing broken garbage :)

It's hard to know in this case even with the tests, because it's only
three tests and they seem hardly representative of the library.

>> +
>> +            ;; rewrite "/var/tmp" in check script
>> +            (substitute* "scripts/common/check/check_make_unix.sh"
>> +              (("/var/tmp") (string-append (getcwd) "/build/build")))
>
> Or maybe just "/tmp" ?

Yes, that also works.

> All of these plus the ones for 'sh' could be combined into something
> like this: (untested)

[...]

This works great!  Thank you.

>> +              (("^ *PATH=.*") "")
>> +              (("action=/bin/") "action=")
>> +              (("export PATH") "echo -n 0"))
>
> Why "echo -n 0" here?  Maybe ":" would be better?  It is a no-op
> built-in command in Bourne shell.

Yes, this works.  I needed to replace it with a no-op because "export
PATH" is also found in the middle of a long chain of commands, so
replacing it with the empty string results in a syntax error.

> Is everything in here really in the public domain?  I'd guess that in
> order to make this true, you'd need to remove bzip2 and zlib in a
> snippet, and even then I'd doubtful :)

I've moved the code to remove the bundled stuff to a snippet.

The NCBI code was released into the public domain.  However, it appears
that some third-party headers and some build scripts are under a
different license:

  * Expat:
    * ncbi-blast-2.2.30+-src/c++/include/util/bitset/
    * ncbi-blast-2.2.30+-src/c++/src/html/ncbi_menu*.js
  * Boost license:
    * ncbi-blast-2.2.30+-src/c++/include/util/impl/floating_point_comparison.hpp
  * LGPL 2+:
    * ncbi-blast-2.2.30+-src/c++/include/dbapi/driver/odbc/unix_odbc/
  * ASL 2.0:
    * ncbi-blast-2.2.30+-src/c++/src/corelib/teamcity_*

I could not find mention of any other licenses.  Is this correct, then:

    ;; Most of the sources are in the public domain, with the following exceptions:
    ;; ...(the above list)...
    (license (list license:public-domain
                   license:expat
                   license:boost1.0
                   license:lgpl2.0+
                   license:asl2.0))

What do you think?

~~ Ricardo


[-- Attachment #2: 0001-gnu-Add-Blast.patch --]
[-- Type: text/x-patch, Size: 8586 bytes --]

From 8d131f66ba0378738e5b837f78c411edb241d35a Mon Sep 17 00:00:00 2001
From: Ricardo Wurmus <ricardo.wurmus@mdc-berlin.de>
Date: Tue, 16 Jun 2015 16:24:24 +0200
Subject: [PATCH] gnu: Add Blast+.

* gnu/packages/bioinformatics.scm (blast+): New variable.
---
 gnu/packages/bioinformatics.scm | 155 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 155 insertions(+)

diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 3defac8..4603329 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -31,6 +31,7 @@
   #:use-module (gnu packages base)
   #:use-module (gnu packages boost)
   #:use-module (gnu packages compression)
+  #:use-module (gnu packages cpio)
   #:use-module (gnu packages file)
   #:use-module (gnu packages java)
   #:use-module (gnu packages linux)
@@ -258,6 +259,160 @@ into separate processes; and more.")
     (inputs
      `(("python2-numpy" ,python2-numpy)))))
 
+(define-public blast+
+  (package
+    (name "blast+")
+    (version "2.2.30")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append
+                    "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
+                    version "/ncbi-blast-" version "+-src.tar.gz"))
+              (sha256
+               (base32
+                "0h0fj5cpx6zpfwixgx5f5xbr4rn3cnai0x3j7grrg50vr18jvxr6"))
+              (modules '((guix build utils)))
+              (snippet
+               '(begin
+                  ;; Remove bundled bzip2 and zlib
+                  (delete-file-recursively "c++/src/util/compress/bzip2")
+                  (delete-file-recursively "c++/src/util/compress/zlib")
+                  (substitute* "c++/src/util/compress/Makefile.in"
+                    (("bzip2 zlib api") "api"))
+                  ;; Remove useless msbuild directory
+                  (delete-file-recursively
+                   "c++/src/build-system/project_tree_builder/msbuild")))))
+    (build-system gnu-build-system)
+    (arguments
+     `(;; There are three(!) tests for this massive library, and all fail with
+       ;; "unparsable timing stats".
+       ;; ERR [127] --  [util/regexp] test_pcre.sh     (unparsable timing stats)
+       ;; ERR [127] --  [serial/datatool] datatool.sh     (unparsable timing stats)
+       ;; ERR [127] --  [serial/datatool] datatool_xml.sh     (unparsable timing stats)
+       #:tests? #f
+       #:out-of-source? #t
+       #:parallel-build? #f ; not supported
+       #:phases
+       (modify-phases %standard-phases
+         (add-before
+          'configure 'set-HOME
+          ;; $HOME needs to be set at some point during the configure phase
+          (lambda _ (setenv "HOME" "/tmp") #t))
+         (add-after
+          'unpack 'enter-dir
+          (lambda _ (chdir "c++") #t))
+         (add-after
+          'enter-dir 'fix-build-system
+          (lambda _
+            (define (which* cmd)
+              (cond ((string=? cmd "date")
+                     ;; make call to "date" deterministic
+                     "date -d @0")
+                    ((which cmd)
+                     => identity)
+                    (else
+                     (format (current-error-port)
+                             "WARNING: Unable to find absolute path for ~s~%"
+                             cmd)
+                     #f)))
+
+            ;; Proceed even though the weird build system says that generated
+            ;; files are out of date
+            (setenv "NCBICXX_RECONF_POLICY" "warn")
+
+            ;; Rewrite hardcoded paths to various tools
+            (substitute* (append (find-files "scripts/common/check" "\\.sh$")
+                                 '("scripts/common/impl/if_diff.sh"
+                                   "scripts/common/impl/run_with_lock.sh"
+                                   "src/build-system/Makefile.configurables.real"
+                                   "src/build-system/Makefile.in.top"
+                                   "src/build-system/Makefile.meta.gmake=no"
+                                   "src/build-system/Makefile.meta.in"
+                                   "src/build-system/Makefile.meta_l"
+                                   "src/build-system/Makefile.meta_p"
+                                   "src/build-system/Makefile.meta_r"
+                                   "src/build-system/Makefile.mk.in"
+                                   "src/build-system/Makefile.requirements"
+                                   "src/build-system/Makefile.rules_with_autodep.in"
+                                   "src/build-system/configure"
+                                   "src/build-system/configure.ac"))
+              (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
+               (or (which* cmd) all)))
+
+            ;; Some of the files we're patching are
+            ;; ISO-8859-1-encoded, so choose it as the default
+            ;; encoding so the byte encoding is preserved.
+            (with-fluids ((%default-port-encoding #f))
+              (substitute* (find-files "src/build-system" "^config.*")
+                (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
+                (("^PATH=.*") "")))
+
+            ;; rewrite "/var/tmp" in check script
+            (substitute* "scripts/common/check/check_make_unix.sh"
+              (("/var/tmp") "/tmp"))
+
+            ;; do not reset PATH
+            (substitute* (find-files "scripts/common/impl/" "\\.sh$")
+              (("^ *PATH=.*") "")
+              (("action=/bin/") "action=")
+              (("export PATH") ":"))
+            #t))
+         (replace
+          'configure
+          (lambda* (#:key inputs outputs #:allow-other-keys)
+            (let ((out     (assoc-ref outputs "out"))
+                  (lib     (string-append (assoc-ref outputs "lib") "/lib"))
+                  (include (string-append (assoc-ref outputs "include")
+                                          "/include/ncbi-tools++")))
+              ;; The 'configure' script doesn't recognize things like
+              ;; '--enable-fast-install'.
+              (zero? (system* "./configure.orig"
+                              (string-append "--with-build-root=" (getcwd) "/build")
+                              (string-append "--prefix=" out)
+                              (string-append "--libdir=" lib)
+                              (string-append "--includedir=" include)
+                              (string-append "--with-bz2="
+                                             (assoc-ref inputs "bzip2"))
+                              (string-append "--with-z="
+                                             (assoc-ref inputs "zlib"))
+                              ;; Each library is built twice by default, once
+                              ;; with "-static" in its name, and again
+                              ;; without.
+                              "--without-static"
+                              "--with-dll"))))))))
+    (outputs '("out"       ;  19 MB
+               "lib"       ; 203 MB
+               "include")) ;  32 MB
+    (inputs
+     `(("bzip2" ,bzip2)
+       ("zlib" ,zlib)))
+    (native-inputs
+     `(("cpio" ,cpio)))
+    (home-page "http://blast.ncbi.nlm.nih.gov")
+    (synopsis "Basic local alignment search tool")
+    (description
+     "BLAST is a popular method of performing a DNA or protein sequence
+similarity search, using heuristics to produce results quickly.  It also
+calculates an “expect value” that estimates how many matches would have
+occurred at a given score by chance, which can aid a user in judging how much
+confidence to have in an alignment.")
+    ;; Most of the sources are in the public domain, with the following
+    ;; exceptions:
+    ;;   * Expat:
+    ;;     * ./c++/include/util/bitset/
+    ;;     * ./c++/src/html/ncbi_menu*.js
+    ;;   * Boost license:
+    ;;     * ./c++/include/util/impl/floating_point_comparison.hpp
+    ;;   * LGPL 2+:
+    ;;     * ./c++/include/dbapi/driver/odbc/unix_odbc/
+    ;;   * ASL 2.0:
+    ;;     * ./c++/src/corelib/teamcity_*
+    (license (list license:public-domain
+                   license:expat
+                   license:boost1.0
+                   license:lgpl2.0+
+                   license:asl2.0))))
+
 (define-public bowtie
   (package
     (name "bowtie")
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] Add Blast+.
  2015-06-23  8:06   ` Ricardo Wurmus
@ 2015-06-27 10:14     ` Ludovic Courtès
  2015-06-27 13:10       ` Ben Woodcroft
  2015-06-27 18:29     ` Mark H Weaver
  1 sibling, 1 reply; 8+ messages in thread
From: Ludovic Courtès @ 2015-06-27 10:14 UTC (permalink / raw)
  To: Ricardo Wurmus; +Cc: guix-devel

Ricardo Wurmus <ricardo.wurmus@mdc-berlin.de> skribis:

> From 8d131f66ba0378738e5b837f78c411edb241d35a Mon Sep 17 00:00:00 2001
> From: Ricardo Wurmus <ricardo.wurmus@mdc-berlin.de>
> Date: Tue, 16 Jun 2015 16:24:24 +0200
> Subject: [PATCH] gnu: Add Blast+.
>
> * gnu/packages/bioinformatics.scm (blast+): New variable.

Mark, do you have anything to add on this one?  At first sight it looks
good to me.

Ludo’.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Add Blast+.
  2015-06-27 10:14     ` Ludovic Courtès
@ 2015-06-27 13:10       ` Ben Woodcroft
  2015-06-27 14:09         ` Ricardo Wurmus
  0 siblings, 1 reply; 8+ messages in thread
From: Ben Woodcroft @ 2015-06-27 13:10 UTC (permalink / raw)
  To: Ludovic Courtès, Ricardo Wurmus; +Cc: guix-devel



On 27/06/15 20:14, Ludovic Courtès wrote:
> Ricardo Wurmus <ricardo.wurmus@mdc-berlin.de> skribis:
>
>>  From 8d131f66ba0378738e5b837f78c411edb241d35a Mon Sep 17 00:00:00 2001
>> From: Ricardo Wurmus <ricardo.wurmus@mdc-berlin.de>
>> Date: Tue, 16 Jun 2015 16:24:24 +0200
>> Subject: [PATCH] gnu: Add Blast+.
>>
>> * gnu/packages/bioinformatics.scm (blast+): New variable.
> Mark, do you have anything to add on this one?  At first sight it looks
> good to me.
>
2.2.31+ just got released, maybe upgrade first?

ben

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Add Blast+.
  2015-06-27 13:10       ` Ben Woodcroft
@ 2015-06-27 14:09         ` Ricardo Wurmus
  0 siblings, 0 replies; 8+ messages in thread
From: Ricardo Wurmus @ 2015-06-27 14:09 UTC (permalink / raw)
  To: Ben Woodcroft; +Cc: guix-devel


Ben Woodcroft <b.woodcroft@uq.edu.au> writes:

> 2.2.31+ just got released, maybe upgrade first?

Ah, good to know.  I’ll build the latest version on Monday and push that
if there are no further objections.

~~ Ricardo

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Add Blast+.
  2015-06-23  8:06   ` Ricardo Wurmus
  2015-06-27 10:14     ` Ludovic Courtès
@ 2015-06-27 18:29     ` Mark H Weaver
  2015-06-29 14:15       ` Ricardo Wurmus
  1 sibling, 1 reply; 8+ messages in thread
From: Mark H Weaver @ 2015-06-27 18:29 UTC (permalink / raw)
  To: Ricardo Wurmus; +Cc: guix-devel

Ricardo Wurmus <ricardo.wurmus@mdc-berlin.de> writes:

>> It would be great to get the tests working, even if we have to disable
>> some of them.  Otherwise we have no way of knowing that we're not
>> distributing broken garbage :)
>
> It's hard to know in this case even with the tests, because it's only
> three tests and they seem hardly representative of the library.

Okay, fair enough.

>> Is everything in here really in the public domain?  I'd guess that in
>> order to make this true, you'd need to remove bzip2 and zlib in a
>> snippet, and even then I'd doubtful :)
>
> I've moved the code to remove the bundled stuff to a snippet.
>
> The NCBI code was released into the public domain.  However, it appears
> that some third-party headers and some build scripts are under a
> different license:
>
>   * Expat:
>     * ncbi-blast-2.2.30+-src/c++/include/util/bitset/
>     * ncbi-blast-2.2.30+-src/c++/src/html/ncbi_menu*.js
>   * Boost license:
>     * ncbi-blast-2.2.30+-src/c++/include/util/impl/floating_point_comparison.hpp
>   * LGPL 2+:
>     * ncbi-blast-2.2.30+-src/c++/include/dbapi/driver/odbc/unix_odbc/
>   * ASL 2.0:
>     * ncbi-blast-2.2.30+-src/c++/src/corelib/teamcity_*
>
> I could not find mention of any other licenses.  Is this correct, then:
>
>     ;; Most of the sources are in the public domain, with the following exceptions:
>     ;; ...(the above list)...
>     (license (list license:public-domain
>                    license:expat
>                    license:boost1.0
>                    license:lgpl2.0+
>                    license:asl2.0))

Looks good, thanks!  See below for more comments on the patch.

> From 8d131f66ba0378738e5b837f78c411edb241d35a Mon Sep 17 00:00:00 2001
> From: Ricardo Wurmus <ricardo.wurmus@mdc-berlin.de>
> Date: Tue, 16 Jun 2015 16:24:24 +0200
> Subject: [PATCH] gnu: Add Blast+.
>
> * gnu/packages/bioinformatics.scm (blast+): New variable.
> ---
>  gnu/packages/bioinformatics.scm | 155 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 155 insertions(+)
>
> diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
> index 3defac8..4603329 100644
> --- a/gnu/packages/bioinformatics.scm
> +++ b/gnu/packages/bioinformatics.scm
> @@ -31,6 +31,7 @@
>    #:use-module (gnu packages base)
>    #:use-module (gnu packages boost)
>    #:use-module (gnu packages compression)
> +  #:use-module (gnu packages cpio)
>    #:use-module (gnu packages file)
>    #:use-module (gnu packages java)
>    #:use-module (gnu packages linux)
> @@ -258,6 +259,160 @@ into separate processes; and more.")
>      (inputs
>       `(("python2-numpy" ,python2-numpy)))))
>  
> +(define-public blast+
> +  (package
> +    (name "blast+")
> +    (version "2.2.30")
> +    (source (origin
> +              (method url-fetch)
> +              (uri (string-append
> +                    "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
> +                    version "/ncbi-blast-" version "+-src.tar.gz"))
> +              (sha256
> +               (base32
> +                "0h0fj5cpx6zpfwixgx5f5xbr4rn3cnai0x3j7grrg50vr18jvxr6"))
> +              (modules '((guix build utils)))
> +              (snippet
> +               '(begin
> +                  ;; Remove bundled bzip2 and zlib
> +                  (delete-file-recursively "c++/src/util/compress/bzip2")
> +                  (delete-file-recursively "c++/src/util/compress/zlib")
> +                  (substitute* "c++/src/util/compress/Makefile.in"
> +                    (("bzip2 zlib api") "api"))
> +                  ;; Remove useless msbuild directory
> +                  (delete-file-recursively
> +                   "c++/src/build-system/project_tree_builder/msbuild")))))

As with phase procedures, the snippet should return a boolean, so please
add #t here.

[...]
> +            ;; Proceed even though the weird build system says that generated
> +            ;; files are out of date
> +            (setenv "NCBICXX_RECONF_POLICY" "warn")
> +
> +            ;; Rewrite hardcoded paths to various tools
> +            (substitute* (append (find-files "scripts/common/check" "\\.sh$")
> +                                 '("scripts/common/impl/if_diff.sh"
> +                                   "scripts/common/impl/run_with_lock.sh"
> +                                   "src/build-system/Makefile.configurables.real"
> +                                   "src/build-system/Makefile.in.top"
> +                                   "src/build-system/Makefile.meta.gmake=no"
> +                                   "src/build-system/Makefile.meta.in"
> +                                   "src/build-system/Makefile.meta_l"
> +                                   "src/build-system/Makefile.meta_p"
> +                                   "src/build-system/Makefile.meta_r"
> +                                   "src/build-system/Makefile.mk.in"
> +                                   "src/build-system/Makefile.requirements"
> +                                   "src/build-system/Makefile.rules_with_autodep.in"
> +                                   "src/build-system/configure"
> +                                   "src/build-system/configure.ac"))
> +              (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
> +               (or (which* cmd) all)))

It would be better to patch configure.ac first, and then configure, and
then the others.  As you have it here, the modification times will
indicate that 'configure' is older than 'configure.ac' and thus out of
date.  Perhaps this is why you needed to setenv NCBICXX_RECONF_POLICY?

Otherwise, the patch looks great, thanks!

     Mark

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Add Blast+.
  2015-06-27 18:29     ` Mark H Weaver
@ 2015-06-29 14:15       ` Ricardo Wurmus
  0 siblings, 0 replies; 8+ messages in thread
From: Ricardo Wurmus @ 2015-06-29 14:15 UTC (permalink / raw)
  To: Mark H Weaver; +Cc: guix-devel


Mark H Weaver <mhw@netris.org> writes:

>> +              (snippet
>> +               '(begin
>> +                  ;; Remove bundled bzip2 and zlib
>> +                  (delete-file-recursively "c++/src/util/compress/bzip2")
>> +                  (delete-file-recursively "c++/src/util/compress/zlib")
>> +                  (substitute* "c++/src/util/compress/Makefile.in"
>> +                    (("bzip2 zlib api") "api"))
>> +                  ;; Remove useless msbuild directory
>> +                  (delete-file-recursively
>> +                   "c++/src/build-system/project_tree_builder/msbuild")))))
>
> As with phase procedures, the snippet should return a boolean, so please
> add #t here.

Done.

>> +            ;; Proceed even though the weird build system says that generated
>> +            ;; files are out of date
>> +            (setenv "NCBICXX_RECONF_POLICY" "warn")
>> +
>> +            ;; Rewrite hardcoded paths to various tools
>> +            (substitute* (append (find-files "scripts/common/check" "\\.sh$")
>> +                                 '("scripts/common/impl/if_diff.sh"
>> +                                   "scripts/common/impl/run_with_lock.sh"
>> +                                   "src/build-system/Makefile.configurables.real"
>> +                                   "src/build-system/Makefile.in.top"
>> +                                   "src/build-system/Makefile.meta.gmake=no"
>> +                                   "src/build-system/Makefile.meta.in"
>> +                                   "src/build-system/Makefile.meta_l"
>> +                                   "src/build-system/Makefile.meta_p"
>> +                                   "src/build-system/Makefile.meta_r"
>> +                                   "src/build-system/Makefile.mk.in"
>> +                                   "src/build-system/Makefile.requirements"
>> +                                   "src/build-system/Makefile.rules_with_autodep.in"
>> +                                   "src/build-system/configure"
>> +                                   "src/build-system/configure.ac"))
>> +              (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
>> +               (or (which* cmd) all)))
>
> It would be better to patch configure.ac first, and then configure, and
> then the others.  As you have it here, the modification times will
> indicate that 'configure' is older than 'configure.ac' and thus out of
> date.  Perhaps this is why you needed to setenv NCBICXX_RECONF_POLICY?

You were right.  I reordered the patching and could remove the
variable.  Thank you for the review and sharing your insights!

I updated to Blast 2.2.31, built successfully and just pushed.

~~ Ricardo

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2015-06-29 14:15 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-06-16 14:26 [PATCH] Add Blast+ Ricardo Wurmus
2015-06-16 21:37 ` Mark H Weaver
2015-06-23  8:06   ` Ricardo Wurmus
2015-06-27 10:14     ` Ludovic Courtès
2015-06-27 13:10       ` Ben Woodcroft
2015-06-27 14:09         ` Ricardo Wurmus
2015-06-27 18:29     ` Mark H Weaver
2015-06-29 14:15       ` Ricardo Wurmus

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/guix.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).