diff mbox

[3/3] gnu: Add khmer.

Message ID 1466126601-30932-4-git-send-email-donttrustben@gmail.com
State New
Headers show

Commit Message

Ben Woodcroft June 17, 2016, 1:23 a.m. UTC
* gnu/packages/bioinformatics.scm (khmer): New variable.
* gnu/packages/patches/khmer-use-libraries.patch: New file.
* gnu/local.mk (dist_patch_DATA): Add it.
---
 gnu/local.mk                                   |  1 +
 gnu/packages/bioinformatics.scm                | 92 ++++++++++++++++++++++++++
 gnu/packages/patches/khmer-use-libraries.patch | 16 +++++
 3 files changed, 109 insertions(+)
 create mode 100644 gnu/packages/patches/khmer-use-libraries.patch

Comments

Leo Famulari June 17, 2016, 7:23 a.m. UTC | #1
On Fri, Jun 17, 2016 at 11:23:21AM +1000, Ben Woodcroft wrote:
> +    (build-system python-build-system)
> +    (arguments
> +     `(#:phases
> +       (modify-phases %standard-phases
> +         (add-after 'unpack 'set-paths

This phase deletes bundled libraries and then copies one of the
libraries back in from another package (murmur-hash).

Can khmer refer to murmur-hash without it being bundled at all?

> +           (lambda* (#:key inputs outputs #:allow-other-keys)
> +             ;; Delete bundled libraries.
> +             (delete-file-recursively "third-party/zlib")
> +             (delete-file-recursively "third-party/bzip2")
> +             ;; Replace bundled seqan.
> +             (let* ((seqan-all "third-party/seqan")
> +                    (seqan-include (string-append
> +                                    seqan-all "/core/include/seqan")))
> +               (delete-file-recursively seqan-all)
> +               (mkdir-p seqan-include)
> +               (rmdir seqan-include)

Here it makes the directory seqan-include and then removes it. Should it
be reversed? Would it be simpler to delete the directory and then use
copy-recursively, which I don't think requires mkdir-p?

> +               (copy-file (string-append (assoc-ref inputs "seqan")
> +                                         "/include/seqan")
> +                          seqan-include))
> +             ;; Replace bundled MurmurHash.

> +             (let ((smhasher "third-party/smhasher/"))
> +               (delete-file-recursively smhasher)
> +               (mkdir smhasher)
> +               (for-each
> +                (lambda (file)
> +                  (copy-file
> +                   (string-append
> +                    (assoc-ref inputs "murmur-hash") "/include/" file)
> +                   (string-append smhasher file)))
> +                (list "MurmurHash3.cpp" "MurmurHash3.h"))
> +               (rename-file
> +                (string-append smhasher "MurmurHash3.cpp")
> +                (string-append smhasher "MurmurHash3.cc")))
> +             (setenv "CC" "gcc")

I think this variable setting should be in its own phase.
Ben Woodcroft June 17, 2016, 11:41 p.m. UTC | #2
On 17/06/16 17:23, Leo Famulari wrote:
> On Fri, Jun 17, 2016 at 11:23:21AM +1000, Ben Woodcroft wrote:
>> +    (build-system python-build-system)
>> +    (arguments
>> +     `(#:phases
>> +       (modify-phases %standard-phases
>> +         (add-after 'unpack 'set-paths
> This phase deletes bundled libraries and then copies one of the
> libraries back in from another package (murmur-hash).
>
> Can khmer refer to murmur-hash without it being bundled at all?

By changing the Makefile or similar so that it refers to a 'murmur-hash' 
shared library? Maybe, but I don't think SMHasher creates one of these.

If you mean just referring to the code as it is in murmur-hash, then 
we'd have to change the Makefile so that it refers to that code. I 
figured copying the code in achieves the same thing with less effort on 
our part.

>> +           (lambda* (#:key inputs outputs #:allow-other-keys)
>> +             ;; Delete bundled libraries.
>> +             (delete-file-recursively "third-party/zlib")
>> +             (delete-file-recursively "third-party/bzip2")
>> +             ;; Replace bundled seqan.
>> +             (let* ((seqan-all "third-party/seqan")
>> +                    (seqan-include (string-append
>> +                                    seqan-all "/core/include/seqan")))
>> +               (delete-file-recursively seqan-all)
>> +               (mkdir-p seqan-include)
>> +               (rmdir seqan-include)
> Here it makes the directory seqan-include and then removes it. Should it
> be reversed? Would it be simpler to delete the directory and then use
> copy-recursively, which I don't think requires mkdir-p?

The issue is that there are other (unused) files in 'third-part/seqan' 
that aren't useful code, and I wanted to be sure to delete all the 
bundle before starting the build. How about the slightly simplified

+             (let* ((seqan-all "third-party/seqan")
+                    (seqan-include (string-append
+                                    seqan-all "/core/include")))
+               (delete-file-recursively seqan-all)
+               (mkdir-p seqan-include)
+               (copy-file (string-append (assoc-ref inputs "seqan")
+                                         "/include/seqan")
+                          (string-append seqan-include "/seqan")))

It would be better if I knew a way of copying directory into another 
directory, like an "install-directory".

>
>> +               (copy-file (string-append (assoc-ref inputs "seqan")
>> +                                         "/include/seqan")
>> +                          seqan-include))
>> +             ;; Replace bundled MurmurHash.
>> +             (let ((smhasher "third-party/smhasher/"))
>> +               (delete-file-recursively smhasher)
>> +               (mkdir smhasher)
>> +               (for-each
>> +                (lambda (file)
>> +                  (copy-file
>> +                   (string-append
>> +                    (assoc-ref inputs "murmur-hash") "/include/" file)
>> +                   (string-append smhasher file)))
>> +                (list "MurmurHash3.cpp" "MurmurHash3.h"))
>> +               (rename-file
>> +                (string-append smhasher "MurmurHash3.cpp")
>> +                (string-append smhasher "MurmurHash3.cc")))
>> +             (setenv "CC" "gcc")
> I think this variable setting should be in its own phase.

OK.

ben
Leo Famulari June 25, 2016, 5:30 p.m. UTC | #3
On Sat, Jun 18, 2016 at 09:41:46AM +1000, Ben Woodcroft wrote:
> It would be better if I knew a way of copying directory into another
> directory, like an "install-directory".

How about copy-recursively, from (guix build utils)?
diff mbox

Patch

diff --git a/gnu/local.mk b/gnu/local.mk
index 55fea0e..bbbe986 100644
--- a/gnu/local.mk
+++ b/gnu/local.mk
@@ -590,6 +590,7 @@  dist_patch_DATA =						\
   %D%/packages/patches/jasper-CVE-2016-2089.patch		\
   %D%/packages/patches/jasper-CVE-2016-2116.patch		\
   %D%/packages/patches/jbig2dec-ignore-testtest.patch		\
+  %D%/packages/patches/khmer-use-libraries.patch                \
   %D%/packages/patches/kmod-module-directory.patch		\
   %D%/packages/patches/ldc-disable-tests.patch			\
   %D%/packages/patches/lftp-dont-save-unknown-host-fingerprint.patch \
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 22ed71a..7445d7b 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -2301,6 +2301,98 @@  command, or queried for specific k-mers with @code{jellyfish query}.")
     ;; files such as lib/jsoncpp.cpp are released under the Expat license.
     (license (list license:gpl3+ license:expat))))
 
+(define-public khmer
+  (package
+    (name "khmer")
+    (version "2.0")
+    (source
+     (origin
+       (method url-fetch)
+       (uri
+        (string-append
+         "https://pypi.python.org/packages/"
+         "52/3b/2c52a13937197391775f274ed75b4a33b2d7767a904faaf4032e14e10a55/"
+         "khmer-" version ".tar.gz"))
+       (sha256
+        (base32
+         "0wb05shqh77v00256qlm68vbbx3kl76fyzihszbz5nhanl4ni33a"))
+       (patches (search-patches "khmer-use-libraries.patch"))))
+    (build-system python-build-system)
+    (arguments
+     `(#:phases
+       (modify-phases %standard-phases
+         (add-after 'unpack 'set-paths
+           (lambda* (#:key inputs outputs #:allow-other-keys)
+             ;; Delete bundled libraries.
+             (delete-file-recursively "third-party/zlib")
+             (delete-file-recursively "third-party/bzip2")
+             ;; Replace bundled seqan.
+             (let* ((seqan-all "third-party/seqan")
+                    (seqan-include (string-append
+                                    seqan-all "/core/include/seqan")))
+               (delete-file-recursively seqan-all)
+               (mkdir-p seqan-include)
+               (rmdir seqan-include)
+               (copy-file (string-append (assoc-ref inputs "seqan")
+                                         "/include/seqan")
+                          seqan-include))
+             ;; Replace bundled MurmurHash.
+             (let ((smhasher "third-party/smhasher/"))
+               (delete-file-recursively smhasher)
+               (mkdir smhasher)
+               (for-each
+                (lambda (file)
+                  (copy-file
+                   (string-append
+                    (assoc-ref inputs "murmur-hash") "/include/" file)
+                   (string-append smhasher file)))
+                (list "MurmurHash3.cpp" "MurmurHash3.h"))
+               (rename-file
+                (string-append smhasher "MurmurHash3.cpp")
+                (string-append smhasher "MurmurHash3.cc")))
+             (setenv "CC" "gcc")
+             #t))
+         ;; It is simpler to test after installation.
+         (delete 'check)
+         (add-after 'install 'post-install-check
+           (lambda* (#:key inputs outputs #:allow-other-keys)
+             (let ((out (assoc-ref outputs "out")))
+               (setenv "PATH"
+                       (string-append
+                        (getenv "PATH")
+                        ":"
+                        (assoc-ref outputs "out")
+                        "/bin"))
+               (setenv "PYTHONPATH"
+                       (string-append
+                        (getenv "PYTHONPATH")
+                        ":"
+                        out
+                        "/lib/python"
+                        (string-take (string-take-right
+                                      (assoc-ref inputs "python") 5) 3)
+                        "/site-packages"))
+               (with-directory-excursion "build"
+                 (zero? (system* "nosetests" "khmer" "--attr"
+                                 "!known_failing")))))))))
+    (native-inputs
+     `(("murmur-hash" ,murmur-hash)
+       ("seqan" ,seqan)
+       ("python-nose" ,python-nose)))
+    (inputs
+     `(("zlib" ,zlib)
+       ("bzip2" ,bzip2)
+       ("python-screed" ,python-screed)
+       ("python-bz2file" ,python-bz2file)))
+    (home-page "https://khmer.readthedocs.org/")
+    (synopsis "K-mer counting, filtering and graph traversal library")
+    (description "The khmer software is a set of command-line tools for
+working with DNA shotgun sequencing data from genomes, transcriptomes,
+metagenomes and single cells.  Khmer can make de novo assemblies faster, and
+sometimes better.  Khmer can also identify and fix problems with shotgun
+data.")
+    (license license:bsd-3)))
+
 (define-public macs
   (package
     (name "macs")
diff --git a/gnu/packages/patches/khmer-use-libraries.patch b/gnu/packages/patches/khmer-use-libraries.patch
new file mode 100644
index 0000000..47d163a
--- /dev/null
+++ b/gnu/packages/patches/khmer-use-libraries.patch
@@ -0,0 +1,16 @@ 
+Change setup.cfg so that the bundled zlib and bzip2 are not used.  This cannot
+currently be achieved using "--library z,bz2" as instructed in the setup.py.
+
+diff --git a/setup.cfg b/setup.cfg
+index c054092..080992e 100644
+--- a/setup.cfg
++++ b/setup.cfg
+@@ -1,7 +1,7 @@
+ [build_ext]
+ define = SEQAN_HAS_BZIP2,SEQAN_HAS_ZLIB
+ undef = NO_UNIQUE_RC
+-# libraries = z,bz2
++libraries = z,bz2
+ ## if using system libraries
+ include-dirs = lib:third-party/zlib:third-party/bzip2:third-party/seqan/core/include:third-party/smhasher
+ # include-dirs = lib