From 3d34e82ee67f5ee0b226de350f40d7f881169a56 Mon Sep 17 00:00:00 2001 From: Roel Janssen Date: Wed, 21 Apr 2021 14:24:07 +0200 Subject: [PATCH] gnu: Add pbgzip. * gnu/packages/bioinformatics.scm (pbgzip): New variable. --- gnu/packages/bioinformatics.scm | 42 ++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 31205c473a..35601378c2 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -3,7 +3,7 @@ ;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft ;;; Copyright © 2015, 2016, 2018, 2019, 2020 Pjotr Prins ;;; Copyright © 2015 Andreas Enge -;;; Copyright © 2016, 2020 Roel Janssen +;;; Copyright © 2016, 2020, 2021 Roel Janssen ;;; Copyright © 2016, 2017, 2018, 2019, 2020, 2021 Efraim Flashner ;;; Copyright © 2016, 2020 Marius Bakke ;;; Copyright © 2016, 2018 Raoul Bonnal @@ -569,6 +569,46 @@ input and output BAMs must adhere to the PacBio BAM format specification. Non-PacBio BAMs will cause exceptions to be thrown.") (license license:bsd-3))) +(define-public pbgzip + (let ((commit "2b09f97b5f20b6d83c63a5c6b408d152e3982974")) + (package + (name "pbgzip") + (version (string-take commit 7)) + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/nh13/pbgzip") + (commit commit))) + (file-name (string-append name "-" version)) + (sha256 + (base32 + "1mlmq0v96irbz71bgw5zcc43g1x32zwnxx21a5p1f1ch4cikw1yd")))) + (build-system gnu-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'unpack 'autogen + (lambda _ + (zero? (system* "sh" "autogen.sh"))))))) + (native-inputs + `(("autoconf" ,autoconf) + ("automake" ,automake))) + (inputs + `(("zlib" ,zlib))) + (home-page "https://github.com/nh13/pbgzip") + (synopsis "Parallel Block GZIP") + (description "This package implements parallel block gzip. For many +formats, in particular genomics data formats, data are compressed in +fixed-length blocks such that they can be easily indexed based on a (genomic) +coordinate order, since typically each block is sorted according to this order. +This allows for each block to be individually compressed (deflated), or more +importantly, decompressed (inflated), with the latter enabling random retrieval +of data in large files (gigabytes to terabytes). @code{pbgzip} is not limited +to any particular format, but certain features are tailored to genomics data +formats when enabled. Parallel decompression is somewhat faster, but truly the +speedup comes during compression.") + (license license:expat)))) + (define-public blasr-libcpp (package (name "blasr-libcpp") -- 2.31.1