all messages for Guix-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
* fftw runtime cpu detection
@ 2018-04-05 22:13 Eric Bavier
  2018-04-06  7:54 ` Chris Marusich
  2018-04-06  8:05 ` Ludovic Courtès
  0 siblings, 2 replies; 9+ messages in thread
From: Eric Bavier @ 2018-04-05 22:13 UTC (permalink / raw)
  To: guix-devel

[-- Attachment #1: Type: text/plain, Size: 1653 bytes --]

Hello Guix,

I recently discovered that the FFTW library can do runtime cpu
detection.  In order to do this, the package needs to be configured to
build SIMD "codelets", like how our 'fftw-avx' currently does.  Then,
based on the instruction support detected at runtime, make those
kernels available to the fftw "planner" for execution.

I tested this on two systems: 1) system with sse2, and 2) system with
avx2.  I configured the library with "--enable-sse2 --enable-avx
--enable-avx2", then ran the following on both systems:

1)
$ ./tests/bench --verbose=3 --verify 'ibcd11x7x6v10'
Planning ibcd11x7x6v10...
using plan_many_dft
estimate-planner time: 0.004355 s
using plan_many_dft
planner time: 0.035684 s
(dft-rank>=2/1
  (dft-vrank>=1-x11/1
    (dft-rank>=2/1
      (dft-vrank>=1-x7/1
        (dft-direct-6-x10 "n1bv_6_sse2"))
      (dft-direct-7-x60 "n1bv_7_sse2")))
  (dft-direct-11-x420 "n1bv_11_sse2"))
flops: 36800 add, 9700 mul, 26260 fma
estimated cost: 99057.699080, pcost = 115706.000000
ibcd11x7x6v10 4.33362e-16 7.27264e-16 8.46842e-16

2)
$ ./tests/bench --verbose=3 --verify 'ibcd11x7x6v10'
Planning ibcd11x7x6v10...
using plan_many_dft
estimate-planner time: 0.001485 s
using plan_many_dft
planner time: 0.025788 s
(dft-rank>=2/1
  (dft-rank>=2/1
    (dft-vrank>=1-x77/1
      (dft-direct-6-x10 "n1bv_6_sse2"))
    (dft-vrank>=1-x11/1
      (dft-direct-7-x60 "n1bv_7_avx")))
  (dft-direct-11-x420 "n1bv_11_avx"))
flops: 12280 add, 2810 mul, 6950 fma
estimated cost: 28996.283180, pcost = 40767.000000
ibcd11x7x6v10 2.24601e-07 3.90447e-07 2.42548e-07


The attached patch is a WIP.

-- 
Eric Bavier, Scientific Libraries, Cray Inc.

[-- Attachment #2: guix-fftw-codelets.patch --]
[-- Type: text/x-patch, Size: 3748 bytes --]

diff --git a/gnu/packages/algebra.scm b/gnu/packages/algebra.scm
index 2aa1777db..96c78ea81 100644
--- a/gnu/packages/algebra.scm
+++ b/gnu/packages/algebra.scm
@@ -533,17 +533,26 @@ a C program.")
     (build-system gnu-build-system)
     (arguments
      '(#:configure-flags
-       '("--enable-shared" "--enable-openmp" "--enable-threads")
-       #:phases (alist-cons-before
-                 'build 'no-native
-                 (lambda _
-                   ;; By default '-mtune=native' is used.  However, that may
-                   ;; cause the use of ISA extensions (SSE2, etc.) that are
-                   ;; not necessarily available on the user's machine when
-                   ;; that package is built on a different machine.
-                   (substitute* (find-files "." "Makefile$")
-                     (("-mtune=native") "")))
-                 %standard-phases)))
+       `("--enable-shared" "--enable-openmp" "--enable-threads"
+         ,@(let ((system (or (%current-target-system) (%current-system))))
+             (cond
+              ((or (string-prefix? "x86_64" system)
+                   (string-prefix? "i686" system))
+               ;; Enable AVX & co. for codelets.  See details at:
+               ;; <http://fftw.org/fftw3_doc/Installation-on-Unix.html>.
+               '("--enable-avx" "--enable-avx2"
+                 "--enable-avx512" "--enable-avx-128-fma"))
+              ((string-prefix? "aarch64" system)
+               '("--enable-neon" "--enable-armv8-cntvct-el0"))
+              ((string-prefix? "armv7" system)
+               '("--enable-neon" "--enable-armv7a-cntvct"))
+              ((string-prefix? "mips" system)
+               '("--enable-mips-zbus-timer"))))
+         ;; By default '-mtune=native' is used.  However, that may cause the
+         ;; use of ISA extensions (e.g. AVX) that are not necessarily
+         ;; available on the user's machine when that package is built on a
+         ;; different machine.
+         "ax_cv_c_flags__mtune_native=no")))
     (native-inputs `(("perl" ,perl)))
     (home-page "http://fftw.org")
     (synopsis "Computing the discrete Fourier transform")
@@ -560,7 +569,7 @@ cosine/ sine transforms or DCT/DST).")
     (arguments
      (substitute-keyword-arguments (package-arguments fftw)
        ((#:configure-flags cf)
-        `(cons "--enable-float" ,cf))))
+        `(cons  "--enable-float" "--enable-sse" ,cf))))
     (description
      (string-append (package-description fftw)
                     "  Single-precision version."))))
@@ -592,29 +601,6 @@ cosine/ sine transforms or DCT/DST).")
                (base32
                 "0wsms8narnbhfsa8chdflv2j9hzspvflblnqdn7hw8x5xdzrnq1v"))))))
 
-(define-public fftw-avx
-  (package
-    (inherit fftw-3.3.7)
-    (name "fftw-avx")
-    (arguments
-     (substitute-keyword-arguments (package-arguments fftw-3.3.7)
-       ((#:configure-flags flags ''())
-        ;; Enable AVX & co.  See details at:
-        ;; <http://fftw.org/fftw3_doc/Installation-on-Unix.html>.
-        `(append '("--enable-avx" "--enable-avx2" "--enable-avx512"
-                   "--enable-avx-128-fma")
-                 ,flags))
-       ((#:substitutable? _ #f)
-        ;; To run the tests, we must have a CPU that supports all these
-        ;; extensions.  Since we cannot be sure that machines in the build
-        ;; farm support them, disable substitutes altogether.
-        #f)
-       ((#:phases _)
-        ;; Since we're not providing binaries, let '-mtune=native' through.
-        '%standard-phases)))
-    (synopsis "Computing the discrete Fourier transform (AVX2-optimized)")
-    (supported-systems '("x86_64-linux"))))
-
 (define-public java-la4j
   (package
     (name "java-la4j")

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-04-18 21:36 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-04-05 22:13 fftw runtime cpu detection Eric Bavier
2018-04-06  7:54 ` Chris Marusich
2018-04-06 15:08   ` Eric Bavier
2018-04-06  8:05 ` Ludovic Courtès
2018-04-06 15:02   ` Eric Bavier
2018-04-06 15:09     ` Ludovic Courtès
2018-04-06 18:37       ` Marius Bakke
2018-04-17 21:29         ` Eric Bavier
2018-04-18 21:36           ` Ludovic Courtès

Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/guix.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.