unofficial mirror of guix-devel@gnu.org 
 help / color / mirror / code / Atom feed
* [PATCH] Add fraggenescan.
@ 2015-12-20 12:36 Ben Woodcroft
  2015-12-20 13:03 ` Ricardo Wurmus
  0 siblings, 1 reply; 8+ messages in thread
From: Ben Woodcroft @ 2015-12-20 12:36 UTC (permalink / raw)
  To: guix-devel@gnu.org

[-- Attachment #1: Type: text/plain, Size: 39 bytes --]

Turned out to be a little messy. TIA..

[-- Attachment #2: 0001-gnu-Add-fraggenescan.patch --]
[-- Type: text/x-patch, Size: 4716 bytes --]

From f10c4178cef360a8472c988c9ea8aace15daa954 Mon Sep 17 00:00:00 2001
From: Ben Woodcroft <donttrustben@gmail.com>
Date: Sun, 20 Dec 2015 22:23:17 +1000
Subject: [PATCH] gnu: Add fraggenescan.

* gnu/packages/bioinformatics.scm (fraggenescan): New variable.
---
 gnu/packages/bioinformatics.scm | 80 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 7c573e1..3e4e05b 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -1354,6 +1354,86 @@ supports next-generation sequencing data in fasta/q and csfasta/q format from
 Illumina, Roche 454, and the SOLiD platform.")
     (license license:gpl3)))
 
+(define-public fraggenescan
+  (package
+    (name "fraggenescan")
+    (version "1.20")
+    (source
+     (origin
+       (method url-fetch)
+       (uri
+        (string-append "mirror://sourceforge/fraggenescan/"
+                       "FragGeneScan" version ".tar.gz"))
+       (sha256
+        (base32 "1zzigqmvqvjyqv4945kv6nc5ah2xxm1nxgrlsnbzav3f5c0n0pyj"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (delete 'configure)
+         (add-before 'build 'patch-run-script
+           (lambda* (#:key outputs #:allow-other-keys)
+             (let* ((out (string-append (assoc-ref outputs "out")))
+                    (share (string-append out "/share/fraggenescan/")))
+               (substitute* "run_FragGeneScan.pl"
+                 (("system\\(\"rm")
+                  (string-append "system(\"" (which "rm")))
+                 (("system\\(\"mv")
+                  (string-append "system(\"" (which "mv")))
+                 ;; This script and other programs expect the training files
+                 ;; to be in the non-standard location bin/train/XXX. Change
+                 ;; this to be share/fraggenescan/train/XXX instead.
+                 (("^\\$train.file = \\$dir.\\\"train/\\\".\\$FGS_train_file;")
+                  (string-append "$train_file = \""
+                                 share
+                                 "train/\".$FGS_train_file;")))
+               (substitute* "run_hmm.c"
+                 (("^  strcat\\(train_dir, \\\"train/\\\"\\);")
+                  (string-append "  strcpy(train_dir, \"" share "/train/\");")))
+               (substitute* "post_process.pl"
+                 (("^my \\$dir = substr\\(\\$0, 0, length\\(\\$0)-15\\);")
+                  (string-append "my $dir = \"" share "\";"))))
+             #t))
+         (replace 'build
+           (lambda _ (and (zero? (system* "make" "clean"))
+                          (zero? (system* "make" "fgs")))))
+         (delete 'check)
+         (replace 'install
+           (lambda* (#:key outputs #:allow-other-keys)
+             (let* ((out (string-append (assoc-ref outputs "out")))
+                    (bin (string-append out "/bin/"))
+                    (share (string-append out "/share/fraggenescan/train")))
+               (install-file "run_FragGeneScan.pl" bin)
+               (install-file "FragGeneScan" bin)
+               (install-file "FGS_gff.py" bin)
+               (install-file "post_process.pl" bin)
+               (copy-recursively "train" share))))
+         (add-after 'install 'post-install-check
+           ;; In lieu of 'make check', run one of the examples and check the
+           ;; output files gets created.
+           (lambda* (#:key outputs #:allow-other-keys)
+             (let* ((out (string-append (assoc-ref outputs "out")))
+                    (bin (string-append out "/bin/")))
+               (and (zero? (system* (string-append bin "run_FragGeneScan.pl")
+                             "-genome=./example/NC_000913.fna"
+                             "-out=./test2"
+                             "-complete=1"
+                             "-train=complete"))
+                    (file-exists? "test2.faa")
+                    (file-exists? "test2.ffn")
+                    (file-exists? "test2.gff")
+                    (file-exists? "test2.out"))))))))
+    (inputs
+     `(("perl" ,perl)
+       ("python" ,python-2))) ;not compatible with python 3.
+    (home-page "https://sourceforge.net/projects/fraggenescan/")
+    (synopsis "Finds potentially fragmented genes in short reads")
+    (description
+     "FragGeneScan is a program for predicting bacterial and archaeal genes in
+short and error-prone DNA sequencing reads.  It can also be applied to predict
+genes in incomplete assemblies or complete genomes.")
+    (license license:gpl1)))
+
 (define-public grit
   (package
     (name "grit")
-- 
2.5.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] Add fraggenescan.
  2015-12-20 12:36 [PATCH] Add fraggenescan Ben Woodcroft
@ 2015-12-20 13:03 ` Ricardo Wurmus
  2015-12-27 21:53   ` Ben Woodcroft
  0 siblings, 1 reply; 8+ messages in thread
From: Ricardo Wurmus @ 2015-12-20 13:03 UTC (permalink / raw)
  To: Ben Woodcroft; +Cc: guix-devel@gnu.org

Hi Ben,

thanks for your patch!

> From f10c4178cef360a8472c988c9ea8aace15daa954 Mon Sep 17 00:00:00 2001
> From: Ben Woodcroft <donttrustben@gmail.com>
> Date: Sun, 20 Dec 2015 22:23:17 +1000
> Subject: [PATCH] gnu: Add fraggenescan.

> * gnu/packages/bioinformatics.scm (fraggenescan): New variable.
> ---
>  gnu/packages/bioinformatics.scm | 80 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 80 insertions(+)

[...]

> +(define-public fraggenescan
> +  (package
> +    (name "fraggenescan")
> +    (version "1.20")
> +    (source
> +     (origin
> +       (method url-fetch)
> +       (uri
> +        (string-append "mirror://sourceforge/fraggenescan/"
> +                       "FragGeneScan" version ".tar.gz"))
> +       (sha256
> +        (base32 "1zzigqmvqvjyqv4945kv6nc5ah2xxm1nxgrlsnbzav3f5c0n0pyj"))))
> +    (build-system gnu-build-system)
> +    (arguments
> +     `(#:phases
> +       (modify-phases %standard-phases
> +         (delete 'configure)
> +         (add-before 'build 'patch-run-script

This phase does more than just patch the run script (it changes paths in
two scripts and a C source file).  Could you find a better name?

> +           (lambda* (#:key outputs #:allow-other-keys)
> +             (let* ((out (string-append (assoc-ref outputs "out")))
> +                    (share (string-append out "/share/fraggenescan/")))
> +               (substitute* "run_FragGeneScan.pl"
> +                 (("system\\(\"rm")
> +                  (string-append "system(\"" (which "rm")))
> +                 (("system\\(\"mv")
> +                  (string-append "system(\"" (which "mv")))
> +                 ;; This script and other programs expect the training files
> +                 ;; to be in the non-standard location bin/train/XXX. Change
> +                 ;; this to be share/fraggenescan/train/XXX instead.
> +                 (("^\\$train.file = \\$dir.\\\"train/\\\".\\$FGS_train_file;")
> +                  (string-append "$train_file = \""
> +                                 share
> +                                 "train/\".$FGS_train_file;")))

I might look clearer if you captured a part of the matches,

(("(^\\$train\\.file = )\\$dir\\.\\\"(train/\\\"\\.\\$FGS_train_file;)" _ pre post)
 (string-append pre "\"" share post))

Or since there’s really just one line beginning with “$train.file” maybe
you could do this:

(("(^\\$train\\.file = ).*" _ prefix)
 (string-append prefix "\"" share "train/\".$FGS_train_file;"))

The regular expressions above look quite scary, so maybe the latter
proposal is best here.

> +               (substitute* "run_hmm.c"
> +                 (("^  strcat\\(train_dir, \\\"train/\\\"\\);")
> +                  (string-append "  strcpy(train_dir, \"" share "/train/\");")))

Why do you replace “strcat” with “strcpy” here?

> +               (substitute* "post_process.pl"
> +                 (("^my \\$dir = substr\\(\\$0, 0, length\\(\\$0)-15\\);")
> +                  (string-append "my $dir = \"" share "\";"))))

I would also suggest simplifying the regular expression here.  There is
only one line matching “^my \\$dir =”.

> +             #t))
> +         (replace 'build
> +           (lambda _ (and (zero? (system* "make" "clean"))
> +                          (zero? (system* "make" "fgs")))))

Why must “make clean” be run first?  I know the README says so, but is
it really required?  If it is not you could just use the default build
phase, possibly specifying “fgs” as the target.

> +         (delete 'check)

How about “#:tests? #f” instead?

> +         (replace 'install
> +           (lambda* (#:key outputs #:allow-other-keys)
> +             (let* ((out (string-append (assoc-ref outputs "out")))
> +                    (bin (string-append out "/bin/"))
> +                    (share (string-append out "/share/fraggenescan/train")))
> +               (install-file "run_FragGeneScan.pl" bin)
> +               (install-file "FragGeneScan" bin)
> +               (install-file "FGS_gff.py" bin)
> +               (install-file "post_process.pl" bin)
> +               (copy-recursively "train" share))))
> +         (add-after 'install 'post-install-check
> +           ;; In lieu of 'make check', run one of the examples and check the
> +           ;; output files gets created.

Oh, I see.  Maybe you could delete the “check” phase right before this,
so that it’s obvious you are moving it after the “install” phase.

> +           (lambda* (#:key outputs #:allow-other-keys)
> +             (let* ((out (string-append (assoc-ref outputs "out")))
> +                    (bin (string-append out "/bin/")))
> +               (and (zero? (system* (string-append bin "run_FragGeneScan.pl")
> +                             "-genome=./example/NC_000913.fna"
> +                             "-out=./test2"
> +                             "-complete=1"
> +                             "-train=complete"))
> +                    (file-exists? "test2.faa")
> +                    (file-exists? "test2.ffn")
> +                    (file-exists? "test2.gff")
> +                    (file-exists? "test2.out"))))))))
> +    (inputs
> +     `(("perl" ,perl)
> +       ("python" ,python-2))) ;not compatible with python 3.
> +    (home-page "https://sourceforge.net/projects/fraggenescan/")
> +    (synopsis "Finds potentially fragmented genes in short reads")
> +    (description
> +     "FragGeneScan is a program for predicting bacterial and archaeal genes in
> +short and error-prone DNA sequencing reads.  It can also be applied to predict
> +genes in incomplete assemblies or complete genomes.")
> +    (license license:gpl1)))

I didn’t see any mention of a particular GPL version.  The README says
this:

    License
    ============
    Copyright (C) 2010 Mina Rho, Yuzhen Ye and Haixu Tang.
    You may redistribute this software under the terms of the GNU General Public License.

This looks like it could be any version of the GPL (as it is implied
when R packages just declare “GPL” as a license).  I would do this, but
I’m not sure that’s okay:

    ;; Released under any version of the GPL
    (license license:gpl3+)

~~ Ricardo

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Add fraggenescan.
  2015-12-20 13:03 ` Ricardo Wurmus
@ 2015-12-27 21:53   ` Ben Woodcroft
  2015-12-28  0:38     ` Leo Famulari
  0 siblings, 1 reply; 8+ messages in thread
From: Ben Woodcroft @ 2015-12-27 21:53 UTC (permalink / raw)
  To: Ricardo Wurmus; +Cc: guix-devel@gnu.org

[-- Attachment #1: Type: text/plain, Size: 6568 bytes --]

Heya,

On 20/12/15 23:03, Ricardo Wurmus wrote:
> thanks for your patch!
and thank you.
>> +(define-public fraggenescan
>> +  (package
>> +    (name "fraggenescan")
>> +    (version "1.20")
>> +    (source
>> +     (origin
>> +       (method url-fetch)
>> +       (uri
>> +        (string-append "mirror://sourceforge/fraggenescan/"
>> +                       "FragGeneScan" version ".tar.gz"))
>> +       (sha256
>> +        (base32 "1zzigqmvqvjyqv4945kv6nc5ah2xxm1nxgrlsnbzav3f5c0n0pyj"))))
>> +    (build-system gnu-build-system)
>> +    (arguments
>> +     `(#:phases
>> +       (modify-phases %standard-phases
>> +         (delete 'configure)
>> +         (add-before 'build 'patch-run-script
> This phase does more than just patch the run script (it changes paths in
> two scripts and a C source file).  Could you find a better name?
ok, done.
>> +           (lambda* (#:key outputs #:allow-other-keys)
>> +             (let* ((out (string-append (assoc-ref outputs "out")))
>> +                    (share (string-append out "/share/fraggenescan/")))
>> +               (substitute* "run_FragGeneScan.pl"
>> +                 (("system\\(\"rm")
>> +                  (string-append "system(\"" (which "rm")))
>> +                 (("system\\(\"mv")
>> +                  (string-append "system(\"" (which "mv")))
>> +                 ;; This script and other programs expect the training files
>> +                 ;; to be in the non-standard location bin/train/XXX. Change
>> +                 ;; this to be share/fraggenescan/train/XXX instead.
>> +                 (("^\\$train.file = \\$dir.\\\"train/\\\".\\$FGS_train_file;")
>> +                  (string-append "$train_file = \""
>> +                                 share
>> +                                 "train/\".$FGS_train_file;")))
> I might look clearer if you captured a part of the matches,
>
> (("(^\\$train\\.file = )\\$dir\\.\\\"(train/\\\"\\.\\$FGS_train_file;)" _ pre post)
>   (string-append pre "\"" share post))
>
> Or since there’s really just one line beginning with “$train.file” maybe
> you could do this:
>
> (("(^\\$train\\.file = ).*" _ prefix)
>   (string-append prefix "\"" share "train/\".$FGS_train_file;"))
>
> The regular expressions above look quite scary, so maybe the latter
> proposal is best here.
Indeed they are scary, and writing regexes for Guix rarely fails to trip 
me up. I'd actually prefer a less powerful alternative (just a regular 
string replacement), and better yet one that stops the build process 
when it matches 0 or 2 or more times. There's nothing like that around 
is there?

But for now I just took your suggestions about shortening the regexes.
>
>> +               (substitute* "run_hmm.c"
>> +                 (("^  strcat\\(train_dir, \\\"train/\\\"\\);")
>> +                  (string-append "  strcpy(train_dir, \"" share "/train/\");")))
> Why do you replace “strcat” with “strcpy” here?
The line above does a strcpy we don't want, so strcat would keep that. I 
could remove the line above if you want, but this effectively makes no 
difference to the running of the program.

[..]
>> +             #t))
>> +         (replace 'build
>> +           (lambda _ (and (zero? (system* "make" "clean"))
>> +                          (zero? (system* "make" "fgs")))))
> Why must “make clean” be run first?  I know the README says so, but is
> it really required?  If it is not you could just use the default build
> phase, possibly specifying “fgs” as the target.
Yeh the tarball comes with compiled files. I've added a comment.
>
>> +         (delete 'check)
> How about “#:tests? #f” instead?
>
>> +         (replace 'install
>> +           (lambda* (#:key outputs #:allow-other-keys)
>> +             (let* ((out (string-append (assoc-ref outputs "out")))
>> +                    (bin (string-append out "/bin/"))
>> +                    (share (string-append out "/share/fraggenescan/train")))
>> +               (install-file "run_FragGeneScan.pl" bin)
>> +               (install-file "FragGeneScan" bin)
>> +               (install-file "FGS_gff.py" bin)
>> +               (install-file "post_process.pl" bin)
>> +               (copy-recursively "train" share))))
>> +         (add-after 'install 'post-install-check
>> +           ;; In lieu of 'make check', run one of the examples and check the
>> +           ;; output files gets created.
> Oh, I see.  Maybe you could delete the “check” phase right before this,
> so that it’s obvious you are moving it after the “install” phase.
ok, done.
>
>> +           (lambda* (#:key outputs #:allow-other-keys)
>> +             (let* ((out (string-append (assoc-ref outputs "out")))
>> +                    (bin (string-append out "/bin/")))
>> +               (and (zero? (system* (string-append bin "run_FragGeneScan.pl")
>> +                             "-genome=./example/NC_000913.fna"
>> +                             "-out=./test2"
>> +                             "-complete=1"
>> +                             "-train=complete"))
>> +                    (file-exists? "test2.faa")
>> +                    (file-exists? "test2.ffn")
>> +                    (file-exists? "test2.gff")
>> +                    (file-exists? "test2.out"))))))))
>> +    (inputs
>> +     `(("perl" ,perl)
>> +       ("python" ,python-2))) ;not compatible with python 3.
>> +    (home-page "https://sourceforge.net/projects/fraggenescan/")
>> +    (synopsis "Finds potentially fragmented genes in short reads")
>> +    (description
>> +     "FragGeneScan is a program for predicting bacterial and archaeal genes in
>> +short and error-prone DNA sequencing reads.  It can also be applied to predict
>> +genes in incomplete assemblies or complete genomes.")
>> +    (license license:gpl1)))
> I didn’t see any mention of a particular GPL version.  The README says
> this:
>
>      License
>      ============
>      Copyright (C) 2010 Mina Rho, Yuzhen Ye and Haixu Tang.
>      You may redistribute this software under the terms of the GNU General Public License.
>
> This looks like it could be any version of the GPL (as it is implied
> when R packages just declare “GPL” as a license).  I would do this, but
> I’m not sure that’s okay:
>
>      ;; Released under any version of the GPL
>      (license license:gpl3+)
It seems your interpretation was better than mine. The authors said 
gpl3+ over email.

Thanks,
ben

[-- Attachment #2: 0001-gnu-Add-fraggenescan.patch --]
[-- Type: text/x-patch, Size: 4717 bytes --]

From a140b0816f095a7a13d6bdd4dcbddbae1d020fbb Mon Sep 17 00:00:00 2001
From: Ben Woodcroft <donttrustben@gmail.com>
Date: Sun, 20 Dec 2015 22:23:17 +1000
Subject: [PATCH] gnu: Add fraggenescan.

* gnu/packages/bioinformatics.scm (fraggenescan): New variable.
---
 gnu/packages/bioinformatics.scm | 81 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 7c573e1..afa32f2 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -1354,6 +1354,87 @@ supports next-generation sequencing data in fasta/q and csfasta/q format from
 Illumina, Roche 454, and the SOLiD platform.")
     (license license:gpl3)))
 
+(define-public fraggenescan
+  (package
+    (name "fraggenescan")
+    (version "1.20")
+    (source
+     (origin
+       (method url-fetch)
+       (uri
+        (string-append "mirror://sourceforge/fraggenescan/"
+                       "FragGeneScan" version ".tar.gz"))
+       (sha256
+        (base32 "1zzigqmvqvjyqv4945kv6nc5ah2xxm1nxgrlsnbzav3f5c0n0pyj"))))
+    (build-system gnu-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (delete 'configure)
+         (add-before 'build 'patch-paths
+           (lambda* (#:key outputs #:allow-other-keys)
+             (let* ((out (string-append (assoc-ref outputs "out")))
+                    (share (string-append out "/share/fraggenescan/")))
+               (substitute* "run_FragGeneScan.pl"
+                 (("system\\(\"rm")
+                  (string-append "system(\"" (which "rm")))
+                 (("system\\(\"mv")
+                  (string-append "system(\"" (which "mv")))
+                 ;; This script and other programs expect the training files
+                 ;; to be in the non-standard location bin/train/XXX. Change
+                 ;; this to be share/fraggenescan/train/XXX instead.
+                 (("^\\$train.file = \\$dir.*")
+                  (string-append "$train_file = \""
+                                 share
+                                 "train/\".$FGS_train_file;")))
+               (substitute* "run_hmm.c"
+                 (("^  strcat\\(train_dir, \\\"train/\\\"\\);")
+                  (string-append "  strcpy(train_dir, \"" share "/train/\");")))
+               (substitute* "post_process.pl"
+                 (("^my \\$dir = substr.*")
+                  (string-append "my $dir = \"" share "\";"))))
+             #t))
+         (replace 'build
+           (lambda _ (and (zero? (system* "make" "clean"))
+                          (zero? (system* "make" "fgs")))))
+         (replace 'install
+           (lambda* (#:key outputs #:allow-other-keys)
+             (let* ((out (string-append (assoc-ref outputs "out")))
+                    (bin (string-append out "/bin/"))
+                    (share (string-append out "/share/fraggenescan/train")))
+               (install-file "run_FragGeneScan.pl" bin)
+               (install-file "FragGeneScan" bin)
+               (install-file "FGS_gff.py" bin)
+               (install-file "post_process.pl" bin)
+               (copy-recursively "train" share))))
+         (delete 'check)
+         (add-after 'install 'post-install-check
+           ;; In lieu of 'make check', run one of the examples and check the
+           ;; output files gets created.
+           (lambda* (#:key outputs #:allow-other-keys)
+             (let* ((out (string-append (assoc-ref outputs "out")))
+                    (bin (string-append out "/bin/")))
+               (and (zero? (system* (string-append bin "run_FragGeneScan.pl")
+                             "-genome=./example/NC_000913.fna"
+                             "-out=./test2"
+                             "-complete=1"
+                             "-train=complete"))
+                    (file-exists? "test2.faa")
+                    (file-exists? "test2.ffn")
+                    (file-exists? "test2.gff")
+                    (file-exists? "test2.out"))))))))
+    (inputs
+     `(("perl" ,perl)
+       ("python" ,python-2))) ;not compatible with python 3.
+    (home-page "https://sourceforge.net/projects/fraggenescan/")
+    (synopsis "Finds potentially fragmented genes in short reads")
+    (description
+     "FragGeneScan is a program for predicting bacterial and archaeal genes in
+short and error-prone DNA sequencing reads.  It can also be applied to predict
+genes in incomplete assemblies or complete genomes.")
+    ;; GPL3+ according to private correspondense with the authors.
+    (license license:gpl3+)))
+
 (define-public grit
   (package
     (name "grit")
-- 
2.5.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] Add fraggenescan.
  2015-12-27 21:53   ` Ben Woodcroft
@ 2015-12-28  0:38     ` Leo Famulari
  2015-12-28  0:56       ` Ben Woodcroft
  0 siblings, 1 reply; 8+ messages in thread
From: Leo Famulari @ 2015-12-28  0:38 UTC (permalink / raw)
  To: Ben Woodcroft; +Cc: guix-devel@gnu.org

On Mon, Dec 28, 2015 at 07:53:27AM +1000, Ben Woodcroft wrote:
> On 20/12/15 23:03, Ricardo Wurmus wrote:

[...]

> >>+               (substitute* "run_hmm.c"
> >>+                 (("^  strcat\\(train_dir, \\\"train/\\\"\\);")
> >>+                  (string-append "  strcpy(train_dir, \"" share "/train/\");")))
> >Why do you replace “strcat” with “strcpy” here?
> The line above does a strcpy we don't want, so strcat would keep that. I
> could remove the line above if you want, but this effectively makes no
> difference to the running of the program.

It looks like this substition has two parts:
1) Provide the correct path to some resources ("train files") for the
program.
2) Change a function call from strcat() to strcpy().

Can you elaborate more on part 2?

I undertand that the intended effect is to discard a value assigned to
'train_dir' [0] in the previous function call. This discarded value is
'argv[0]' minus 12 bytes [1]. The reason for this is that the program is
looking for the "train files" in the same directory as 'argv[0]', but we
have installed the "train files" somewhere else. Is that right?

You mention in a comment in the patch that this program and other
programs expect the "train files" to be in one place, but we are
patching this line to account for the fact that we have installed them
in another location.  Will the other programs have to be patched as
well? If so, do you think it would be better to let the files be
installed in the unusual location chosen by upstream?

[0] 'train_dir' is something like the --prefix parameter in a ./configure
script. Many paths are generated based on it, and they all seem to be
assigned to buffers with a length of 4096 bytes.

[1] The magic number 12 is equal to the length of the basename of
argv[0], "fraggenescan". For the benefit of those reading along...

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Add fraggenescan.
  2015-12-28  0:38     ` Leo Famulari
@ 2015-12-28  0:56       ` Ben Woodcroft
  2015-12-28  5:43         ` Leo Famulari
  0 siblings, 1 reply; 8+ messages in thread
From: Ben Woodcroft @ 2015-12-28  0:56 UTC (permalink / raw)
  To: Leo Famulari; +Cc: guix-devel@gnu.org



On 28/12/15 10:38, Leo Famulari wrote:
> On Mon, Dec 28, 2015 at 07:53:27AM +1000, Ben Woodcroft wrote:
>> On 20/12/15 23:03, Ricardo Wurmus wrote:
> [...]
>
>>>> +               (substitute* "run_hmm.c"
>>>> +                 (("^  strcat\\(train_dir, \\\"train/\\\"\\);")
>>>> +                  (string-append "  strcpy(train_dir, \"" share "/train/\");")))
>>> Why do you replace “strcat” with “strcpy” here?
>> The line above does a strcpy we don't want, so strcat would keep that. I
>> could remove the line above if you want, but this effectively makes no
>> difference to the running of the program.
> It looks like this substition has two parts:
> 1) Provide the correct path to some resources ("train files") for the
> program.
> 2) Change a function call from strcat() to strcpy().
>
> Can you elaborate more on part 2?
>
> I undertand that the intended effect is to discard a value assigned to
> 'train_dir' [0] in the previous function call. This discarded value is
> 'argv[0]' minus 12 bytes [1]. The reason for this is that the program is
> looking for the "train files" in the same directory as 'argv[0]', but we
> have installed the "train files" somewhere else. Is that right?
Yes that is right. As is often the case for bioinformatics programs, it 
isn't intended to be 'installed', you are just expected to add the 
directory where compilation happens to your $PATH. So they don't mind 
stretching the conventions.
> You mention in a comment in the patch that this program and other
> programs expect the "train files" to be in one place, but we are
> patching this line to account for the fact that we have installed them
> in another location.  Will the other programs have to be patched as
> well?
Yep, and I believe I have done so. Did I miss something?

If you mean other programs that use fraggenescan, they would most likely 
only call "run_FragGeneScan.pl" so I don't see an issue there.
> If so, do you think it would be better to let the files be
> installed in the unusual location chosen by upstream?
Not after the work I did trying to do the 'right' thing.. I would 
imagine this program will not likely be updated with much, and if there 
was major updates I would guess that they would require substantive 
changes to the package definition anyway.

[..]

Thanks for the thoughts. I hope there wasn't too much gut feeling in my 
reply.
ben

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Add fraggenescan.
  2015-12-28  0:56       ` Ben Woodcroft
@ 2015-12-28  5:43         ` Leo Famulari
  2015-12-29 23:23           ` Ludovic Courtès
  0 siblings, 1 reply; 8+ messages in thread
From: Leo Famulari @ 2015-12-28  5:43 UTC (permalink / raw)
  To: Ben Woodcroft; +Cc: guix-devel@gnu.org

On Mon, Dec 28, 2015 at 10:56:51AM +1000, Ben Woodcroft wrote:
> 
> 
> On 28/12/15 10:38, Leo Famulari wrote:
> >On Mon, Dec 28, 2015 at 07:53:27AM +1000, Ben Woodcroft wrote:
> >>On 20/12/15 23:03, Ricardo Wurmus wrote:
> >[...]
> >
> >>>>+               (substitute* "run_hmm.c"
> >>>>+                 (("^  strcat\\(train_dir, \\\"train/\\\"\\);")
> >>>>+                  (string-append "  strcpy(train_dir, \"" share "/train/\");")))
> >>>Why do you replace “strcat” with “strcpy” here?
> >>The line above does a strcpy we don't want, so strcat would keep that. I
> >>could remove the line above if you want, but this effectively makes no
> >>difference to the running of the program.
> >It looks like this substition has two parts:
> >1) Provide the correct path to some resources ("train files") for the
> >program.
> >2) Change a function call from strcat() to strcpy().
> >
> >Can you elaborate more on part 2?
> >
> >I undertand that the intended effect is to discard a value assigned to
> >'train_dir' [0] in the previous function call. This discarded value is
> >'argv[0]' minus 12 bytes [1]. The reason for this is that the program is
> >looking for the "train files" in the same directory as 'argv[0]', but we
> >have installed the "train files" somewhere else. Is that right?
> Yes that is right. As is often the case for bioinformatics programs, it
> isn't intended to be 'installed', you are just expected to add the directory
> where compilation happens to your $PATH. So they don't mind stretching the
> conventions.

Sounds like a good use case for `guix environment`.

Okay, I just wanted to make sure I understood correctly. Can't be too
careful with strings in C!

> >You mention in a comment in the patch that this program and other
> >programs expect the "train files" to be in one place, but we are
> >patching this line to account for the fact that we have installed them
> >in another location.  Will the other programs have to be patched as
> >well?
> Yep, and I believe I have done so. Did I miss something?
> 
> If you mean other programs that use fraggenescan, they would most likely
> only call "run_FragGeneScan.pl" so I don't see an issue there.

Okay.

> >If so, do you think it would be better to let the files be
> >installed in the unusual location chosen by upstream?
> Not after the work I did trying to do the 'right' thing.. I would imagine
> this program will not likely be updated with much, and if there was major
> updates I would guess that they would require substantive changes to the
> package definition anyway.

Fair enough, I just wanted to ask since I'm not familiar with this
program or the ones that will be interacting with it.

> 
> [..]
> 
> Thanks for the thoughts. I hope there wasn't too much gut feeling in my
> reply.

Not at all! Thanks for wrangling all those backslashes!

> ben

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Add fraggenescan.
  2015-12-28  5:43         ` Leo Famulari
@ 2015-12-29 23:23           ` Ludovic Courtès
  2015-12-30  1:19             ` Leo Famulari
  0 siblings, 1 reply; 8+ messages in thread
From: Ludovic Courtès @ 2015-12-29 23:23 UTC (permalink / raw)
  To: Leo Famulari; +Cc: guix-devel@gnu.org

Leo, feel free to apply the version that looks good to you.

Ludo’.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Add fraggenescan.
  2015-12-29 23:23           ` Ludovic Courtès
@ 2015-12-30  1:19             ` Leo Famulari
  0 siblings, 0 replies; 8+ messages in thread
From: Leo Famulari @ 2015-12-30  1:19 UTC (permalink / raw)
  To: Ludovic Courtès; +Cc: guix-devel@gnu.org

On Wed, Dec 30, 2015 at 12:23:31AM +0100, Ludovic Courtès wrote:
> Leo, feel free to apply the version that looks good to you.

I pushed the 2nd version (integrating Ricardo's suggestions) as
19f4554c94.

Thanks, Ben!

> 
> Ludo’.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2015-12-30  1:19 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-20 12:36 [PATCH] Add fraggenescan Ben Woodcroft
2015-12-20 13:03 ` Ricardo Wurmus
2015-12-27 21:53   ` Ben Woodcroft
2015-12-28  0:38     ` Leo Famulari
2015-12-28  0:56       ` Ben Woodcroft
2015-12-28  5:43         ` Leo Famulari
2015-12-29 23:23           ` Ludovic Courtès
2015-12-30  1:19             ` Leo Famulari

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/guix.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).