diff --git a/src/simd/AVX.h b/src/simd/AVX.h index b8344fdcefb0313bd2818fd6cad526fdc8689557..f54100977122b25bb9e5b735c0393149001647e4 100644 --- a/src/simd/AVX.h +++ b/src/simd/AVX.h @@ -56,6 +56,9 @@ namespace avx { inline double getComponent ( const double4_t & v, int i ) { return reinterpret_cast<const double*>(&v)[i]; } inline double getComponent ( const double4_t & v, unsigned long i ) { return reinterpret_cast<const double*>(&v)[i]; } + inline bool getBoolComponent ( const double4_t & v, int i ) { return (reinterpret_cast<const uint64_t*>(&v)[i]) != 0; } + inline bool getBoolComponent ( const double4_t & v, unsigned long i ) { return (reinterpret_cast<const uint64_t*>(&v)[i]) != 0; } + inline double4_t hadd( double4_t a, double4_t b ) { return _mm256_hadd_pd ( a,b); } inline double4_t horizontalSum ( double4_t a ) diff --git a/src/simd/AVX2.h b/src/simd/AVX2.h index b6e969662c9b1c36f653c3dd83fad162797e9e8d..552cbb645b1f56daf13c5be564f61894861a1815 100644 --- a/src/simd/AVX2.h +++ b/src/simd/AVX2.h @@ -58,6 +58,9 @@ namespace avx2 { inline double getComponent ( const double4_t & v, int i ) { return reinterpret_cast<const double*>(&v)[i]; } inline double getComponent ( const double4_t & v, unsigned long i ) { return reinterpret_cast<const double*>(&v)[i]; } + inline bool getBoolComponent ( const double4_t & v, int i ) { return (reinterpret_cast<const uint64_t*>(&v)[i]) != 0; } + inline bool getBoolComponent ( const double4_t & v, unsigned long i ) { return (reinterpret_cast<const uint64_t*>(&v)[i]) != 0; } + inline double4_t hadd( double4_t a, double4_t b ) { return _mm256_hadd_pd ( a,b); } inline double4_t horizontalSum ( double4_t a ) diff --git a/src/simd/QPX.h b/src/simd/QPX.h index 1108423bb7505fa0a8e09b98b82f5d431e7c4fb8..cccada7b3d28112efe480a21decc40eecc24dac1 100644 --- a/src/simd/QPX.h +++ b/src/simd/QPX.h @@ -81,6 +81,9 @@ inline void loadNeighbors( const double * mem_addr, double4_t & r_left, double4_ inline double getComponent ( const double4_t & v, int i ) { return v[i]; } inline double getComponent ( const double4_t & v, unsigned long i ) { return v[i]; } +inline bool getBoolComponent ( const double4_t & v, int i ) { return (reinterpret_cast<const uint64_t*>(&v)[i]) != 0; } +inline bool getBoolComponent ( const double4_t & v, unsigned long i ) { return (reinterpret_cast<const uint64_t*>(&v)[i]) != 0; } + inline double4_t hadd( double4_t a, double4_t b ) { //TODO is there an instruction for this? diff --git a/src/simd/SSE2.h b/src/simd/SSE2.h index 396c5c693206527d779477f978af0b33c959c431..2f3ff942510a9768665cb56ac4bf4ae22a96c49f 100644 --- a/src/simd/SSE2.h +++ b/src/simd/SSE2.h @@ -120,6 +120,9 @@ namespace sse2 { inline double getComponent ( const double4_t & v, int i ) { return reinterpret_cast<const double*>(&v)[i]; } inline double getComponent ( const double4_t & v, unsigned long i ) { return reinterpret_cast<const double*>(&v)[i]; } + inline bool getBoolComponent ( const double4_t & v, int i ) { return (reinterpret_cast<const uint64_t*>(&v)[i]) != 0; } + inline bool getBoolComponent ( const double4_t & v, unsigned long i ) { return (reinterpret_cast<const uint64_t*>(&v)[i]) != 0; } + inline double4_t hadd( double4_t a, double4_t b ) { double4_t res; res.low = _mm_set_pd( getComponent(b,0) + getComponent(b,1), @@ -222,10 +225,10 @@ namespace sse2 { } inline double4_t blendv( double4_t a, double4_t b, double4_t mask) { - return make_double4 ( (uint64_t)(getComponent(mask,3)) ? getComponent(b,3) : getComponent(a,3), - (uint64_t)(getComponent(mask,2)) ? getComponent(b,2) : getComponent(a,2), - (uint64_t)(getComponent(mask,1)) ? getComponent(b,1) : getComponent(a,1), - (uint64_t)(getComponent(mask,0)) ? getComponent(b,0) : getComponent(a,0) + return make_double4 ( getBoolComponent(mask,3) ? getComponent(b,3) : getComponent(a,3), + getBoolComponent(mask,2) ? getComponent(b,2) : getComponent(a,2), + getBoolComponent(mask,1) ? getComponent(b,1) : getComponent(a,1), + getBoolComponent(mask,0) ? getComponent(b,0) : getComponent(a,0) ); } diff --git a/src/simd/SSE4.h b/src/simd/SSE4.h index 95ab434410d182c4e9d5d83b80c36e19cb4b85ba..831fd98ea55da681a769ce0291fabfa7c3710b3f 100644 --- a/src/simd/SSE4.h +++ b/src/simd/SSE4.h @@ -120,6 +120,9 @@ namespace sse4 { inline double getComponent ( const double4_t & v, int i ) { return reinterpret_cast<const double*>(&v)[i]; } inline double getComponent ( const double4_t & v, unsigned long i ) { return reinterpret_cast<const double*>(&v)[i]; } + inline bool getBoolComponent ( const double4_t & v, int i ) { return (reinterpret_cast<const uint64_t*>(&v)[i]) != 0; } + inline bool getBoolComponent ( const double4_t & v, unsigned long i ) { return (reinterpret_cast<const uint64_t*>(&v)[i]) != 0; } + inline double4_t hadd( double4_t a, double4_t b ) { double4_t res; res.low = _mm_hadd_pd( a.low , b.low ); diff --git a/src/simd/Scalar.h b/src/simd/Scalar.h index d0a30684606597c7b3ccae58f50111018ddd2202..96c37137b93c0d756e7e9370f16237850f226037 100644 --- a/src/simd/Scalar.h +++ b/src/simd/Scalar.h @@ -93,6 +93,9 @@ inline void loadNeighbors( const double * p, double4_t & r_left, double4_t & r_c inline double getComponent ( const double4_t & v, int i ) { return v[(unsigned int)(i)]; } inline double getComponent ( const double4_t & v, unsigned long i ) { return v[(unsigned int)(i)]; } +inline bool getBoolComponent ( const double4_t & v, int i ) { return (v.asUInt((unsigned int)(i))) != 0; } +inline bool getBoolComponent ( const double4_t & v, unsigned long i ) { return (v.asUInt((unsigned int)(i))) != 0; } + inline double4_t hadd( double4_t a, double4_t b ) diff --git a/tests/simd/CMakeLists.txt b/tests/simd/CMakeLists.txt index cf0f461d4116a967c29f9df2f17580bc48db7a6f..060dd5b616511af6b610875464cab33a6deb2c60 100644 --- a/tests/simd/CMakeLists.txt +++ b/tests/simd/CMakeLists.txt @@ -25,6 +25,10 @@ waLBerla_compile_test( NAME AVX_SSE4_Equivalence FILES SIMD_Equivalence.cpp set_property ( TARGET AVX_SSE4_Equivalence PROPERTY COMPILE_FLAGS "${MarchNativeString} -DIS0_AVX -DIS1_SSE4" ) waLBerla_execute_test( NAME AVX_SSE4_Equivalence ) +waLBerla_compile_test( NAME AVX_SSE2_Equivalence FILES SIMD_Equivalence.cpp ) +set_property ( TARGET AVX_SSE2_Equivalence PROPERTY COMPILE_FLAGS "${MarchNativeString} -DIS0_AVX -DIS1_SSE2" ) +waLBerla_execute_test( NAME AVX_SSE2_Equivalence ) + waLBerla_compile_test( NAME SSE4_SSE2_Equivalence FILES SIMD_Equivalence.cpp ) set_property ( TARGET SSE4_SSE2_Equivalence PROPERTY COMPILE_FLAGS "${MarchNativeString} -DIS0_SSE2 -DIS1_SSE4" ) diff --git a/tests/simd/SIMD_Equivalence.cpp b/tests/simd/SIMD_Equivalence.cpp index c8db56040cd3344a904d9976773f0860395e1235..5aeec8ef8052ab51faf3e85efc306b3c77432c83 100644 --- a/tests/simd/SIMD_Equivalence.cpp +++ b/tests/simd/SIMD_Equivalence.cpp @@ -169,6 +169,22 @@ using namespace walberla; using namespace simd; +void print0( const is0::double4_t& vec) +{ + WALBERLA_LOG_DEVEL( is0::getComponent(vec, 0 ) ); + WALBERLA_LOG_DEVEL( is0::getComponent(vec, 1 ) ); + WALBERLA_LOG_DEVEL( is0::getComponent(vec, 2 ) ); + WALBERLA_LOG_DEVEL( is0::getComponent(vec, 3 ) ); +} + +void print1( const is1::double4_t& vec) +{ + WALBERLA_LOG_DEVEL( is1::getComponent(vec, 0 ) ); + WALBERLA_LOG_DEVEL( is1::getComponent(vec, 1 ) ); + WALBERLA_LOG_DEVEL( is1::getComponent(vec, 2 ) ); + WALBERLA_LOG_DEVEL( is1::getComponent(vec, 3 ) ); +} + void checkVecEqual ( is0::double4_t a, is1::double4_t b, const std::string & description = "" ) { if ( description.size() > 0) @@ -234,6 +250,14 @@ void comparisonAndBlend() is0::double4_t maskvA = is0::compareGE( inA, is0::make_double4( 3.0) ); is1::double4_t maskvB = is1::compareGE( inB, is1::make_double4( 3.0) ); + WALBERLA_LOG_DEVEL("-------------------"); + print0(maskvA); + print1(maskvB); + WALBERLA_LOG_DEVEL("-------------------"); + print0(is0::blendv(inA, is0::make_zero(), maskvA )); + print1(is1::blendv(inB, is1::make_zero(), maskvB )); + WALBERLA_LOG_DEVEL("-------------------"); + checkVecEqual( is0::blendv(inA, is0::make_zero(), maskvA ), is1::blendv(inB, is1::make_zero(), maskvB ), "comparisonAndBlend");