Disable SIMD instructions by default. They can be enabled by setting -DPOCKETFFT_NO_VECTORS=0. diff --git a/pocketfft_hdronly.h b/pocketfft_hdronly.h index d75ada6..b2d0a23 100644 --- a/pocketfft_hdronly.h +++ b/pocketfft_hdronly.h @@ -39,6 +39,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef POCKETFFT_HDRONLY_H #define POCKETFFT_HDRONLY_H +#ifndef POCKETFFT_NO_VECTORS +#define POCKETFFT_NO_VECTORS 1 +#endif + #ifndef __cplusplus #error This file is C++ and requires a C++ compiler. #endif @@ -106,29 +110,29 @@ constexpr bool FORWARD = true, BACKWARD = false; // only enable vector support for gcc>=5.0 and clang>=5.0 -#ifndef POCKETFFT_NO_VECTORS -#define POCKETFFT_NO_VECTORS +#if !(POCKETFFT_NO_VECTORS) +#define POCKETFFT_NO_VECTORS 1 #if defined(__INTEL_COMPILER) // do nothing. This is necessary because this compiler also sets __GNUC__. #elif defined(__clang__) // AppleClang has their own version numbering #ifdef __apple_build_version__ # if (__clang_major__ > 9) || (__clang_major__ == 9 && __clang_minor__ >= 1) -# undef POCKETFFT_NO_VECTORS +#define POCKETFFT_NO_VECTORS 0 # endif #elif __clang_major__ >= 5 -# undef POCKETFFT_NO_VECTORS +#define POCKETFFT_NO_VECTORS 0 #endif #elif defined(__GNUC__) #if __GNUC__>=5 -#undef POCKETFFT_NO_VECTORS +#define POCKETFFT_NO_VECTORS 0 #endif #endif #endif template struct VLEN { static constexpr size_t val=1; }; -#ifndef POCKETFFT_NO_VECTORS +#if !(POCKETFFT_NO_VECTORS) #if (defined(__AVX512F__)) template<> struct VLEN { static constexpr size_t val=16; }; template<> struct VLEN { static constexpr size_t val=8; }; @@ -145,7 +149,7 @@ template<> struct VLEN { static constexpr size_t val=2; }; template<> struct VLEN { static constexpr size_t val=4; }; template<> struct VLEN { static constexpr size_t val=2; }; #else -#define POCKETFFT_NO_VECTORS +#define POCKETFFT_NO_VECTORS 1 #endif #endif @@ -180,7 +184,7 @@ template class arr T *p; size_t sz; -#if defined(POCKETFFT_NO_VECTORS) +#if POCKETFFT_NO_VECTORS static T *ralloc(size_t num) { if (num==0) return nullptr; @@ -3026,7 +3030,7 @@ class rev_iter template struct VTYPE {}; template using vtype_t = typename VTYPE::type; -#ifndef POCKETFFT_NO_VECTORS +#if !(POCKETFFT_NO_VECTORS) template<> struct VTYPE { using type = float __attribute__ ((vector_size (VLEN::val*sizeof(float)))); @@ -3139,7 +3143,7 @@ POCKETFFT_NOINLINE void general_nd(const cndarr &in, ndarr &out, auto storage = alloc_tmp(in.shape(), len, sizeof(T)); const auto &tin(iax==0? in : out); multi_iter it(tin, out, axes[iax]); -#ifndef POCKETFFT_NO_VECTORS +#if !(POCKETFFT_NO_VECTORS) if (vlen>1) while (it.remaining()>=vlen) { @@ -3245,7 +3249,7 @@ template POCKETFFT_NOINLINE void general_r2c( constexpr auto vlen = VLEN::val; auto storage = alloc_tmp(in.shape(), len, sizeof(T)); multi_iter it(in, out, axis); -#ifndef POCKETFFT_NO_VECTORS +#if !(POCKETFFT_NO_VECTORS) if (vlen>1) while (it.remaining()>=vlen) { @@ -3300,7 +3304,7 @@ template POCKETFFT_NOINLINE void general_c2r( constexpr auto vlen = VLEN::val; auto storage = alloc_tmp(out.shape(), len, sizeof(T)); multi_iter it(in, out, axis); -#ifndef POCKETFFT_NO_VECTORS +#if !(POCKETFFT_NO_VECTORS) if (vlen>1) while (it.remaining()>=vlen) {