unofficial mirror of guix-devel@gnu.org 
 help / color / mirror / code / Atom feed
* fftw runtime cpu detection
@ 2018-04-05 22:13 Eric Bavier
  2018-04-06  7:54 ` Chris Marusich
  2018-04-06  8:05 ` Ludovic Courtès
  0 siblings, 2 replies; 9+ messages in thread
From: Eric Bavier @ 2018-04-05 22:13 UTC (permalink / raw)
  To: guix-devel

[-- Attachment #1: Type: text/plain, Size: 1653 bytes --]

Hello Guix,

I recently discovered that the FFTW library can do runtime cpu
detection.  In order to do this, the package needs to be configured to
build SIMD "codelets", like how our 'fftw-avx' currently does.  Then,
based on the instruction support detected at runtime, make those
kernels available to the fftw "planner" for execution.

I tested this on two systems: 1) system with sse2, and 2) system with
avx2.  I configured the library with "--enable-sse2 --enable-avx
--enable-avx2", then ran the following on both systems:

1)
$ ./tests/bench --verbose=3 --verify 'ibcd11x7x6v10'
Planning ibcd11x7x6v10...
using plan_many_dft
estimate-planner time: 0.004355 s
using plan_many_dft
planner time: 0.035684 s
(dft-rank>=2/1
  (dft-vrank>=1-x11/1
    (dft-rank>=2/1
      (dft-vrank>=1-x7/1
        (dft-direct-6-x10 "n1bv_6_sse2"))
      (dft-direct-7-x60 "n1bv_7_sse2")))
  (dft-direct-11-x420 "n1bv_11_sse2"))
flops: 36800 add, 9700 mul, 26260 fma
estimated cost: 99057.699080, pcost = 115706.000000
ibcd11x7x6v10 4.33362e-16 7.27264e-16 8.46842e-16

2)
$ ./tests/bench --verbose=3 --verify 'ibcd11x7x6v10'
Planning ibcd11x7x6v10...
using plan_many_dft
estimate-planner time: 0.001485 s
using plan_many_dft
planner time: 0.025788 s
(dft-rank>=2/1
  (dft-rank>=2/1
    (dft-vrank>=1-x77/1
      (dft-direct-6-x10 "n1bv_6_sse2"))
    (dft-vrank>=1-x11/1
      (dft-direct-7-x60 "n1bv_7_avx")))
  (dft-direct-11-x420 "n1bv_11_avx"))
flops: 12280 add, 2810 mul, 6950 fma
estimated cost: 28996.283180, pcost = 40767.000000
ibcd11x7x6v10 2.24601e-07 3.90447e-07 2.42548e-07


The attached patch is a WIP.

-- 
Eric Bavier, Scientific Libraries, Cray Inc.

[-- Attachment #2: guix-fftw-codelets.patch --]
[-- Type: text/x-patch, Size: 3748 bytes --]

diff --git a/gnu/packages/algebra.scm b/gnu/packages/algebra.scm
index 2aa1777db..96c78ea81 100644
--- a/gnu/packages/algebra.scm
+++ b/gnu/packages/algebra.scm
@@ -533,17 +533,26 @@ a C program.")
     (build-system gnu-build-system)
     (arguments
      '(#:configure-flags
-       '("--enable-shared" "--enable-openmp" "--enable-threads")
-       #:phases (alist-cons-before
-                 'build 'no-native
-                 (lambda _
-                   ;; By default '-mtune=native' is used.  However, that may
-                   ;; cause the use of ISA extensions (SSE2, etc.) that are
-                   ;; not necessarily available on the user's machine when
-                   ;; that package is built on a different machine.
-                   (substitute* (find-files "." "Makefile$")
-                     (("-mtune=native") "")))
-                 %standard-phases)))
+       `("--enable-shared" "--enable-openmp" "--enable-threads"
+         ,@(let ((system (or (%current-target-system) (%current-system))))
+             (cond
+              ((or (string-prefix? "x86_64" system)
+                   (string-prefix? "i686" system))
+               ;; Enable AVX & co. for codelets.  See details at:
+               ;; <http://fftw.org/fftw3_doc/Installation-on-Unix.html>.
+               '("--enable-avx" "--enable-avx2"
+                 "--enable-avx512" "--enable-avx-128-fma"))
+              ((string-prefix? "aarch64" system)
+               '("--enable-neon" "--enable-armv8-cntvct-el0"))
+              ((string-prefix? "armv7" system)
+               '("--enable-neon" "--enable-armv7a-cntvct"))
+              ((string-prefix? "mips" system)
+               '("--enable-mips-zbus-timer"))))
+         ;; By default '-mtune=native' is used.  However, that may cause the
+         ;; use of ISA extensions (e.g. AVX) that are not necessarily
+         ;; available on the user's machine when that package is built on a
+         ;; different machine.
+         "ax_cv_c_flags__mtune_native=no")))
     (native-inputs `(("perl" ,perl)))
     (home-page "http://fftw.org")
     (synopsis "Computing the discrete Fourier transform")
@@ -560,7 +569,7 @@ cosine/ sine transforms or DCT/DST).")
     (arguments
      (substitute-keyword-arguments (package-arguments fftw)
        ((#:configure-flags cf)
-        `(cons "--enable-float" ,cf))))
+        `(cons  "--enable-float" "--enable-sse" ,cf))))
     (description
      (string-append (package-description fftw)
                     "  Single-precision version."))))
@@ -592,29 +601,6 @@ cosine/ sine transforms or DCT/DST).")
                (base32
                 "0wsms8narnbhfsa8chdflv2j9hzspvflblnqdn7hw8x5xdzrnq1v"))))))
 
-(define-public fftw-avx
-  (package
-    (inherit fftw-3.3.7)
-    (name "fftw-avx")
-    (arguments
-     (substitute-keyword-arguments (package-arguments fftw-3.3.7)
-       ((#:configure-flags flags ''())
-        ;; Enable AVX & co.  See details at:
-        ;; <http://fftw.org/fftw3_doc/Installation-on-Unix.html>.
-        `(append '("--enable-avx" "--enable-avx2" "--enable-avx512"
-                   "--enable-avx-128-fma")
-                 ,flags))
-       ((#:substitutable? _ #f)
-        ;; To run the tests, we must have a CPU that supports all these
-        ;; extensions.  Since we cannot be sure that machines in the build
-        ;; farm support them, disable substitutes altogether.
-        #f)
-       ((#:phases _)
-        ;; Since we're not providing binaries, let '-mtune=native' through.
-        '%standard-phases)))
-    (synopsis "Computing the discrete Fourier transform (AVX2-optimized)")
-    (supported-systems '("x86_64-linux"))))
-
 (define-public java-la4j
   (package
     (name "java-la4j")

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: fftw runtime cpu detection
  2018-04-05 22:13 fftw runtime cpu detection Eric Bavier
@ 2018-04-06  7:54 ` Chris Marusich
  2018-04-06 15:08   ` Eric Bavier
  2018-04-06  8:05 ` Ludovic Courtès
  1 sibling, 1 reply; 9+ messages in thread
From: Chris Marusich @ 2018-04-06  7:54 UTC (permalink / raw)
  To: Eric Bavier; +Cc: guix-devel

[-- Attachment #1: Type: text/plain, Size: 810 bytes --]

Eric Bavier <bavier@cray.com> writes:

> I recently discovered that the FFTW library can do runtime cpu
> detection.

Cool!  I'm not familiar with this library, but the patch seems pretty
reasonable to me.

> In order to do this, the package needs to be configured to build SIMD
> "codelets", like how our 'fftw-avx' currently does.  Then, based on
> the instruction support detected at runtime, make those kernels
> available to the fftw "planner" for execution.

So, if we choose the right configure flags at build time (for the given
architecture), then at runtime, the software will detect the CPU and
either perform better or make more features available.  Is that right?

I see you added the "--enable-sse" configure flag, and it not present
before.  Why did you add it?

-- 
Chris

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: fftw runtime cpu detection
  2018-04-05 22:13 fftw runtime cpu detection Eric Bavier
  2018-04-06  7:54 ` Chris Marusich
@ 2018-04-06  8:05 ` Ludovic Courtès
  2018-04-06 15:02   ` Eric Bavier
  1 sibling, 1 reply; 9+ messages in thread
From: Ludovic Courtès @ 2018-04-06  8:05 UTC (permalink / raw)
  To: Eric Bavier; +Cc: guix-devel

Hello Eric,

Eric Bavier <bavier@cray.com> skribis:

> I recently discovered that the FFTW library can do runtime cpu
> detection.  In order to do this, the package needs to be configured to
> build SIMD "codelets", like how our 'fftw-avx' currently does.  Then,
> based on the instruction support detected at runtime, make those
> kernels available to the fftw "planner" for execution.

That’s really good news!  Thanks for testing it.

The patch LGTM.  Can you confirm that the planner won’t ever try to use
the AVX2 codelets, for instance when running the test suite on an x86_64
box that lacks AVX2?

If that’s the case, I’d be in favor of pushing this patch to core-updates.

Thanks,
Ludo’.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: fftw runtime cpu detection
  2018-04-06  8:05 ` Ludovic Courtès
@ 2018-04-06 15:02   ` Eric Bavier
  2018-04-06 15:09     ` Ludovic Courtès
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Bavier @ 2018-04-06 15:02 UTC (permalink / raw)
  To: Ludovic Courtès; +Cc: guix-devel

On Fri, Apr 06, 2018 at 10:05:43AM +0200, Ludovic Courtès wrote:
> Hello Eric,
> 
> Eric Bavier <bavier@cray.com> skribis:
> 
> > I recently discovered that the FFTW library can do runtime cpu
> > detection.  In order to do this, the package needs to be configured to
> > build SIMD "codelets", like how our 'fftw-avx' currently does.  Then,
> > based on the instruction support detected at runtime, make those
> > kernels available to the fftw "planner" for execution.
> 
> That’s really good news!  Thanks for testing it.
> 
> The patch LGTM.  Can you confirm that the planner won’t ever try to use
> the AVX2 codelets, for instance when running the test suite on an x86_64
> box that lacks AVX2?

Yes, I've successfully run 'make check' on an sse2-only machine where
'--enable-avx' and '--enable-avx2' where configured.  I'll check on an
i686 machine tonight.

> If that’s the case, I’d be in favor of pushing this patch to core-updates.

Great.  I'll do some more testing.  Should I send a finalized patch to
guix-patches when it's ready?

-- 
Eric Bavier, Scientific Libraries, Cray Inc.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: fftw runtime cpu detection
  2018-04-06  7:54 ` Chris Marusich
@ 2018-04-06 15:08   ` Eric Bavier
  0 siblings, 0 replies; 9+ messages in thread
From: Eric Bavier @ 2018-04-06 15:08 UTC (permalink / raw)
  To: Chris Marusich; +Cc: guix-devel

On Fri, Apr 06, 2018 at 12:54:19AM -0700, Chris Marusich wrote:
> Eric Bavier <bavier@cray.com> writes:
> 
> > I recently discovered that the FFTW library can do runtime cpu
> > detection.
> 
> Cool!  I'm not familiar with this library, but the patch seems pretty
> reasonable to me.

Thanks for looking at it.

> > In order to do this, the package needs to be configured to build SIMD
> > "codelets", like how our 'fftw-avx' currently does.  Then, based on
> > the instruction support detected at runtime, make those kernels
> > available to the fftw "planner" for execution.
> 
> So, if we choose the right configure flags at build time (for the given
> architecture), then at runtime, the software will detect the CPU and
> either perform better or make more features available.  Is that right?

That's the idea, yes.  The simd kernels will execute if the cpu
supports them and the fftw planner finds they are faster in practice
than other kernels.

> 
> I see you added the "--enable-sse" configure flag, and it not present
> before.  Why did you add it?
> 

In the documentation, I had seen it listed as the simd flag for
single-precision.  But now I see a comment in configure.ac that says
the --enable-sse2 flag Does The Right Thing when --enable-float is
given, so it can be left out.  Thanks for checking!

-- 
Eric Bavier, Scientific Libraries, Cray Inc.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: fftw runtime cpu detection
  2018-04-06 15:02   ` Eric Bavier
@ 2018-04-06 15:09     ` Ludovic Courtès
  2018-04-06 18:37       ` Marius Bakke
  0 siblings, 1 reply; 9+ messages in thread
From: Ludovic Courtès @ 2018-04-06 15:09 UTC (permalink / raw)
  To: Eric Bavier; +Cc: guix-devel

Eric Bavier <bavier@cray.com> skribis:

> On Fri, Apr 06, 2018 at 10:05:43AM +0200, Ludovic Courtès wrote:
>> Hello Eric,
>> 
>> Eric Bavier <bavier@cray.com> skribis:
>> 
>> > I recently discovered that the FFTW library can do runtime cpu
>> > detection.  In order to do this, the package needs to be configured to
>> > build SIMD "codelets", like how our 'fftw-avx' currently does.  Then,
>> > based on the instruction support detected at runtime, make those
>> > kernels available to the fftw "planner" for execution.
>> 
>> That’s really good news!  Thanks for testing it.
>> 
>> The patch LGTM.  Can you confirm that the planner won’t ever try to use
>> the AVX2 codelets, for instance when running the test suite on an x86_64
>> box that lacks AVX2?
>
> Yes, I've successfully run 'make check' on an sse2-only machine where
> '--enable-avx' and '--enable-avx2' where configured.  I'll check on an
> i686 machine tonight.

OK.

>> If that’s the case, I’d be in favor of pushing this patch to core-updates.
>
> Great.  I'll do some more testing.  Should I send a finalized patch to
> guix-patches when it's ready?

If Marius has no objections, I think you could push it directly to
core-updates.

Thank you,
Ludo’.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: fftw runtime cpu detection
  2018-04-06 15:09     ` Ludovic Courtès
@ 2018-04-06 18:37       ` Marius Bakke
  2018-04-17 21:29         ` Eric Bavier
  0 siblings, 1 reply; 9+ messages in thread
From: Marius Bakke @ 2018-04-06 18:37 UTC (permalink / raw)
  To: Ludovic Courtès, Eric Bavier; +Cc: guix-devel

[-- Attachment #1: Type: text/plain, Size: 602 bytes --]

Ludovic Courtès <ludovic.courtes@inria.fr> writes:

> Eric Bavier <bavier@cray.com> skribis:
>
>> On Fri, Apr 06, 2018 at 10:05:43AM +0200, Ludovic Courtès wrote:
>>
>>> If that’s the case, I’d be in favor of pushing this patch to core-updates.
>>
>> Great.  I'll do some more testing.  Should I send a finalized patch to
>> guix-patches when it's ready?
>
> If Marius has no objections, I think you could push it directly to
> core-updates.

Sounds good to me.  I just pushed a couple of full-rebuild commits to
fix bootstrap-tarballs, so the Big Rebuild is still some days off.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 487 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: fftw runtime cpu detection
  2018-04-06 18:37       ` Marius Bakke
@ 2018-04-17 21:29         ` Eric Bavier
  2018-04-18 21:36           ` Ludovic Courtès
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Bavier @ 2018-04-17 21:29 UTC (permalink / raw)
  To: Marius Bakke; +Cc: Eric Bavier, guix-devel, Ludovic Courtès

[-- Attachment #1: Type: text/plain, Size: 1022 bytes --]

Hello Guix,

On Fri, 06 Apr 2018 20:37:42 +0200
Marius Bakke <mbakke@fastmail.com> wrote:

> Ludovic Courtès <ludovic.courtes@inria.fr> writes:
> 
> > Eric Bavier <bavier@cray.com> skribis:
> >  
> >> On Fri, Apr 06, 2018 at 10:05:43AM +0200, Ludovic Courtès wrote:
> >>  
> >>> If that’s the case, I’d be in favor of pushing this patch to core-updates.  
> >>
> >> Great.  I'll do some more testing.  Should I send a finalized patch to
> >> guix-patches when it's ready?  
> >
> > If Marius has no objections, I think you could push it directly to
> > core-updates.  
> 
> Sounds good to me.  I just pushed a couple of full-rebuild commits to
> fix bootstrap-tarballs, so the Big Rebuild is still some days off.

I just pushed commit 65bb22796f854cbc3eae053a80b1d64365dad376 to
core-updates.  I built a good portion of fftwf's dependents to check
things out.  The dependent audio libraries seemed to pass their tests
on the build machine (an avx cpu), which gives me confidence.

`~Eric

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: fftw runtime cpu detection
  2018-04-17 21:29         ` Eric Bavier
@ 2018-04-18 21:36           ` Ludovic Courtès
  0 siblings, 0 replies; 9+ messages in thread
From: Ludovic Courtès @ 2018-04-18 21:36 UTC (permalink / raw)
  To: Eric Bavier; +Cc: Eric Bavier, guix-devel

Eric Bavier <ericbavier@centurylink.net> skribis:

> Hello Guix,
>
> On Fri, 06 Apr 2018 20:37:42 +0200
> Marius Bakke <mbakke@fastmail.com> wrote:
>
>> Ludovic Courtès <ludovic.courtes@inria.fr> writes:
>> 
>> > Eric Bavier <bavier@cray.com> skribis:
>> >  
>> >> On Fri, Apr 06, 2018 at 10:05:43AM +0200, Ludovic Courtès wrote:
>> >>  
>> >>> If that’s the case, I’d be in favor of pushing this patch to core-updates.  
>> >>
>> >> Great.  I'll do some more testing.  Should I send a finalized patch to
>> >> guix-patches when it's ready?  
>> >
>> > If Marius has no objections, I think you could push it directly to
>> > core-updates.  
>> 
>> Sounds good to me.  I just pushed a couple of full-rebuild commits to
>> fix bootstrap-tarballs, so the Big Rebuild is still some days off.
>
> I just pushed commit 65bb22796f854cbc3eae053a80b1d64365dad376 to
> core-updates.  I built a good portion of fftwf's dependents to check
> things out.  The dependent audio libraries seemed to pass their tests
> on the build machine (an avx cpu), which gives me confidence.

Awesome, thank you!

Ludo’.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-04-18 21:36 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-05 22:13 fftw runtime cpu detection Eric Bavier
2018-04-06  7:54 ` Chris Marusich
2018-04-06 15:08   ` Eric Bavier
2018-04-06  8:05 ` Ludovic Courtès
2018-04-06 15:02   ` Eric Bavier
2018-04-06 15:09     ` Ludovic Courtès
2018-04-06 18:37       ` Marius Bakke
2018-04-17 21:29         ` Eric Bavier
2018-04-18 21:36           ` Ludovic Courtès

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/guix.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).