all messages for Guix-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
From: Eric Bavier <bavier@cray.com>
To: guix-devel@gnu.org
Subject: fftw runtime cpu detection
Date: Thu, 5 Apr 2018 17:13:29 -0500	[thread overview]
Message-ID: <20180405221329.GT105827@pe06.us.cray.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 1653 bytes --]

Hello Guix,

I recently discovered that the FFTW library can do runtime cpu
detection.  In order to do this, the package needs to be configured to
build SIMD "codelets", like how our 'fftw-avx' currently does.  Then,
based on the instruction support detected at runtime, make those
kernels available to the fftw "planner" for execution.

I tested this on two systems: 1) system with sse2, and 2) system with
avx2.  I configured the library with "--enable-sse2 --enable-avx
--enable-avx2", then ran the following on both systems:

1)
$ ./tests/bench --verbose=3 --verify 'ibcd11x7x6v10'
Planning ibcd11x7x6v10...
using plan_many_dft
estimate-planner time: 0.004355 s
using plan_many_dft
planner time: 0.035684 s
(dft-rank>=2/1
  (dft-vrank>=1-x11/1
    (dft-rank>=2/1
      (dft-vrank>=1-x7/1
        (dft-direct-6-x10 "n1bv_6_sse2"))
      (dft-direct-7-x60 "n1bv_7_sse2")))
  (dft-direct-11-x420 "n1bv_11_sse2"))
flops: 36800 add, 9700 mul, 26260 fma
estimated cost: 99057.699080, pcost = 115706.000000
ibcd11x7x6v10 4.33362e-16 7.27264e-16 8.46842e-16

2)
$ ./tests/bench --verbose=3 --verify 'ibcd11x7x6v10'
Planning ibcd11x7x6v10...
using plan_many_dft
estimate-planner time: 0.001485 s
using plan_many_dft
planner time: 0.025788 s
(dft-rank>=2/1
  (dft-rank>=2/1
    (dft-vrank>=1-x77/1
      (dft-direct-6-x10 "n1bv_6_sse2"))
    (dft-vrank>=1-x11/1
      (dft-direct-7-x60 "n1bv_7_avx")))
  (dft-direct-11-x420 "n1bv_11_avx"))
flops: 12280 add, 2810 mul, 6950 fma
estimated cost: 28996.283180, pcost = 40767.000000
ibcd11x7x6v10 2.24601e-07 3.90447e-07 2.42548e-07


The attached patch is a WIP.

-- 
Eric Bavier, Scientific Libraries, Cray Inc.

[-- Attachment #2: guix-fftw-codelets.patch --]
[-- Type: text/x-patch, Size: 3748 bytes --]

diff --git a/gnu/packages/algebra.scm b/gnu/packages/algebra.scm
index 2aa1777db..96c78ea81 100644
--- a/gnu/packages/algebra.scm
+++ b/gnu/packages/algebra.scm
@@ -533,17 +533,26 @@ a C program.")
     (build-system gnu-build-system)
     (arguments
      '(#:configure-flags
-       '("--enable-shared" "--enable-openmp" "--enable-threads")
-       #:phases (alist-cons-before
-                 'build 'no-native
-                 (lambda _
-                   ;; By default '-mtune=native' is used.  However, that may
-                   ;; cause the use of ISA extensions (SSE2, etc.) that are
-                   ;; not necessarily available on the user's machine when
-                   ;; that package is built on a different machine.
-                   (substitute* (find-files "." "Makefile$")
-                     (("-mtune=native") "")))
-                 %standard-phases)))
+       `("--enable-shared" "--enable-openmp" "--enable-threads"
+         ,@(let ((system (or (%current-target-system) (%current-system))))
+             (cond
+              ((or (string-prefix? "x86_64" system)
+                   (string-prefix? "i686" system))
+               ;; Enable AVX & co. for codelets.  See details at:
+               ;; <http://fftw.org/fftw3_doc/Installation-on-Unix.html>.
+               '("--enable-avx" "--enable-avx2"
+                 "--enable-avx512" "--enable-avx-128-fma"))
+              ((string-prefix? "aarch64" system)
+               '("--enable-neon" "--enable-armv8-cntvct-el0"))
+              ((string-prefix? "armv7" system)
+               '("--enable-neon" "--enable-armv7a-cntvct"))
+              ((string-prefix? "mips" system)
+               '("--enable-mips-zbus-timer"))))
+         ;; By default '-mtune=native' is used.  However, that may cause the
+         ;; use of ISA extensions (e.g. AVX) that are not necessarily
+         ;; available on the user's machine when that package is built on a
+         ;; different machine.
+         "ax_cv_c_flags__mtune_native=no")))
     (native-inputs `(("perl" ,perl)))
     (home-page "http://fftw.org")
     (synopsis "Computing the discrete Fourier transform")
@@ -560,7 +569,7 @@ cosine/ sine transforms or DCT/DST).")
     (arguments
      (substitute-keyword-arguments (package-arguments fftw)
        ((#:configure-flags cf)
-        `(cons "--enable-float" ,cf))))
+        `(cons  "--enable-float" "--enable-sse" ,cf))))
     (description
      (string-append (package-description fftw)
                     "  Single-precision version."))))
@@ -592,29 +601,6 @@ cosine/ sine transforms or DCT/DST).")
                (base32
                 "0wsms8narnbhfsa8chdflv2j9hzspvflblnqdn7hw8x5xdzrnq1v"))))))
 
-(define-public fftw-avx
-  (package
-    (inherit fftw-3.3.7)
-    (name "fftw-avx")
-    (arguments
-     (substitute-keyword-arguments (package-arguments fftw-3.3.7)
-       ((#:configure-flags flags ''())
-        ;; Enable AVX & co.  See details at:
-        ;; <http://fftw.org/fftw3_doc/Installation-on-Unix.html>.
-        `(append '("--enable-avx" "--enable-avx2" "--enable-avx512"
-                   "--enable-avx-128-fma")
-                 ,flags))
-       ((#:substitutable? _ #f)
-        ;; To run the tests, we must have a CPU that supports all these
-        ;; extensions.  Since we cannot be sure that machines in the build
-        ;; farm support them, disable substitutes altogether.
-        #f)
-       ((#:phases _)
-        ;; Since we're not providing binaries, let '-mtune=native' through.
-        '%standard-phases)))
-    (synopsis "Computing the discrete Fourier transform (AVX2-optimized)")
-    (supported-systems '("x86_64-linux"))))
-
 (define-public java-la4j
   (package
     (name "java-la4j")

             reply	other threads:[~2018-04-05 22:13 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-05 22:13 Eric Bavier [this message]
2018-04-06  7:54 ` fftw runtime cpu detection Chris Marusich
2018-04-06 15:08   ` Eric Bavier
2018-04-06  8:05 ` Ludovic Courtès
2018-04-06 15:02   ` Eric Bavier
2018-04-06 15:09     ` Ludovic Courtès
2018-04-06 18:37       ` Marius Bakke
2018-04-17 21:29         ` Eric Bavier
2018-04-18 21:36           ` Ludovic Courtès

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180405221329.GT105827@pe06.us.cray.com \
    --to=bavier@cray.com \
    --cc=guix-devel@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/guix.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.