diff --git a/apps/benchmarks/ProbeVsExtraMessage/CMakeLists.txt b/apps/benchmarks/ProbeVsExtraMessage/CMakeLists.txt index 7330aa0ddb83f973d47071a4dba1a4c03baed242..a5cd53649d6ac5c011c026346d137dbcb862a20c 100644 --- a/apps/benchmarks/ProbeVsExtraMessage/CMakeLists.txt +++ b/apps/benchmarks/ProbeVsExtraMessage/CMakeLists.txt @@ -1,2 +1,7 @@ +waLBerla_add_executable ( NAME PackPerformance + FILES PackPerformance.cpp + DEPENDS core ) + waLBerla_add_executable ( NAME ProbeVsExtraMessage + FILES ProbeVsExtraMessage.cpp DEPENDS core postprocessing stencil ) diff --git a/apps/benchmarks/ProbeVsExtraMessage/PackPerformance.cpp b/apps/benchmarks/ProbeVsExtraMessage/PackPerformance.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6479a3f641778ea859e126450c808febcce0d5e1 --- /dev/null +++ b/apps/benchmarks/ProbeVsExtraMessage/PackPerformance.cpp @@ -0,0 +1,112 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file PackPerformance.h +//! \author Sebastian Eibl <sebastian.eibl@fau.de> +// +//====================================================================================================================== + +#include "core/DataTypes.h" +#include "core/Environment.h" +#include "core/math/Vector3.h" +#include "core/mpi/BufferSystem.h" +#include "core/mpi/MPIManager.h" +#include "core/timing/TimingPool.h" + +#include <array> +#include <iostream> +#include <sstream> + +namespace walberla { + +int main( int /*argc*/, char ** /*argv*/ ) +{ + const size_t numElements = 100000000; + mpi::SendBuffer sb0; + mpi::SendBuffer sb1; + mpi::SendBuffer sb2; + Vector3<real_t> v(1,2,3); + WcTimer timer0; + WcTimer timer1; + WcTimer timer2; + + for (size_t i = 0; i < numElements; ++i) + { + sb0 << v; + sb1 << v; + sb2 << v; + } + + WALBERLA_LOG_DEVEL_VAR(sb0.size()); + sb0.clear(); + sb1.clear(); + sb2.clear(); + + timer0.start(); + for (size_t i = 0; i < numElements; ++i) + { + sb0 << v; + } + timer0.end(); + + WALBERLA_LOG_DEVEL_VAR(sb0.size()); + sb0.clear(); + + timer1.start(); + for (size_t i = 0; i < numElements; ++i) + { + sb1 << v[0] << v[1] << v[2]; + } + timer1.end(); + + WALBERLA_LOG_DEVEL_VAR(sb0.size()); + sb0.clear(); + + timer2.start(); + for (size_t i = 0; i < numElements; ++i) + { + auto pos = sb2.forward(sizeof(real_t) * 3); + memcpy(pos, v.data(), sizeof(real_t) * 3); + } + timer2.end(); + + WALBERLA_LOG_DEVEL_VAR(sb0.size()); + sb0.clear(); + + //auto ptr0 = sb0.ptr(); + //auto ptr1 = sb1.ptr(); + //for (auto i = 0; i < numElements; ++i) + //{ + // WALBERLA_ASSERT_EQUAL(*ptr0, *ptr1); + // ++ptr0; + // ++ptr1; + //} + + WALBERLA_LOG_DEVEL("native: " << timer0.total()); + WALBERLA_LOG_DEVEL("elementwise: " << timer1.total()); + WALBERLA_LOG_DEVEL("memcpy: " << timer2.total()); + + return 0; +} + +} // namespace walberla + +int main( int argc, char* argv[] ) +{ + walberla::mpi::Environment mpiEnv( argc, argv ); + WALBERLA_UNUSED(mpiEnv); + + return walberla::main( argc, argv ); +} diff --git a/src/core/math/GenericAABB.h b/src/core/math/GenericAABB.h index 4664a48a3e821e356488629fbf70fa2b3fbda897..9b321e738f53548dba55094722425937967bef12 100644 --- a/src/core/math/GenericAABB.h +++ b/src/core/math/GenericAABB.h @@ -196,7 +196,10 @@ public: inline friend mpi::GenericRecvBuffer< ET > & operator>>( mpi::GenericRecvBuffer< ET > & buf, GenericAABB< T > & aabb ) { buf.readDebugMarker( "bb" ); - buf >> aabb.minCorner_ >> aabb.maxCorner_; + static_assert ( std::is_trivially_copyable< GenericAABB< T > >::value, + "type has to be trivially copyable for the memcpy to work correctly" ); + auto pos = buf.skip(sizeof(GenericAABB< T >)); + std::memcpy(&aabb, pos, sizeof(GenericAABB< T >)); WALBERLA_ASSERT( aabb.checkInvariant() ); return buf; } diff --git a/src/core/math/GenericAABB.impl.h b/src/core/math/GenericAABB.impl.h index 7d24f164f2fbae77cbb715d3e4ac2fb35ba987ff..1fae800a88c6f7bcfb1d7a2b24cc9a30411c8066 100644 --- a/src/core/math/GenericAABB.impl.h +++ b/src/core/math/GenericAABB.impl.h @@ -1883,7 +1883,11 @@ template< typename T, // Element type of SendBuffer mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const GenericAABB< VT > & aabb ) { buf.addDebugMarker( "bb" ); - return buf << aabb.minCorner() << aabb.maxCorner(); + static_assert ( std::is_trivially_copyable< GenericAABB< VT > >::value, + "type has to be trivially copyable for the memcpy to work correctly" ); + auto pos = buf.forward(sizeof(GenericAABB< VT >)); + std::memcpy(pos, &aabb, sizeof(GenericAABB< VT >)); + return buf; } diff --git a/src/core/math/Matrix2.h b/src/core/math/Matrix2.h index 8e0504359bb722f912d159557034c5bdab9773e6..e3d8893333b3e8ba27f59b3565d551379b8fd774 100644 --- a/src/core/math/Matrix2.h +++ b/src/core/math/Matrix2.h @@ -958,9 +958,11 @@ template< typename T, // Element type of SendBuffer typename MT > // Element type of matrix mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Matrix2<MT> & m ) { - for(unsigned int i=0; i<4; ++i) - buf << m[i]; - + buf.addDebugMarker( "m2" ); + static_assert ( std::is_trivially_copyable< Matrix2<MT> >::value, + "type has to be trivially copyable for the memcpy to work correctly" ); + auto pos = buf.forward(sizeof(Matrix2<MT>)); + std::memcpy(pos, &m, sizeof(Matrix2<MT>)); return buf; } @@ -968,9 +970,12 @@ template< typename T, // Element type of RecvBuffer typename MT > // Element type of matrix mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Matrix2<MT> & m ) { - for(unsigned int i=0; i<4; ++i) - buf >> m[i]; - + buf.readDebugMarker( "m2" ); + static_assert ( std::is_trivially_copyable< Matrix2<MT> >::value, + "type has to be trivially copyable for the memcpy to work correctly" ); + auto pos = buf.skip(sizeof(Matrix2<MT>)); + //suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess + std::memcpy(static_cast<void*>(&m), pos, sizeof(Matrix2<MT>)); return buf; } diff --git a/src/core/math/Matrix3.h b/src/core/math/Matrix3.h index 311a041200f49c08bb21c4f456c382dc2d0fb75b..88c7818b7926b05747cdaa09578aa8bbc32c6cb1 100644 --- a/src/core/math/Matrix3.h +++ b/src/core/math/Matrix3.h @@ -35,6 +35,7 @@ #include <type_traits> #include <algorithm> +#include <array> #include <cmath> #include <iostream> #include <limits> @@ -174,6 +175,7 @@ public: template< typename Other > inline const Vector3<HIGH> solve( const Vector3<Other> &rhs ) const; inline Type trace() const; inline Type* data() {return v_;} + inline Type const * data() const {return v_;} //@} //******************************************************************************************************************* @@ -246,9 +248,9 @@ private: * 6 & 7 & 8 \\ * \end{array}\right)\f] **/ - Type v_[9] = {Type(1), Type(0), Type(0), - Type(0), Type(1), Type(0), - Type(0), Type(0), Type(1)}; + std::array<Type, 9> v_ = {{Type(1), Type(0), Type(0), + Type(0), Type(1), Type(0), + Type(0), Type(0), Type(1)}}; //@} //******************************************************************************************************************* }; @@ -1769,9 +1771,11 @@ namespace mpi { typename MT > // Element type of matrix mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Matrix3<MT> & m ) { - for(unsigned int i=0; i<9; ++i) - buf << m[i]; - + buf.addDebugMarker( "m3" ); + static_assert ( std::is_trivially_copyable< Matrix3<MT> >::value, + "type has to be trivially copyable for the memcpy to work correctly" ); + auto pos = buf.forward(sizeof(Matrix3<MT>)); + std::memcpy(pos, &m, sizeof(Matrix3<MT>)); return buf; } @@ -1779,9 +1783,12 @@ namespace mpi { typename MT > // Element type of matrix mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Matrix3<MT> & m ) { - for(unsigned int i=0; i<9; ++i) - buf >> m[i]; - + buf.readDebugMarker( "m3" ); + static_assert ( std::is_trivially_copyable< Matrix3<MT> >::value, + "type has to be trivially copyable for the memcpy to work correctly" ); + auto pos = buf.skip(sizeof(Matrix3<MT>)); + //suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess + std::memcpy(static_cast<void*>(&m), pos, sizeof(Matrix3<MT>)); return buf; } diff --git a/src/core/math/Quaternion.h b/src/core/math/Quaternion.h index dd3a4298ab22a0fc814ab65a7e27b90321a5a888..90fd652de353026cab2612aed1be1570d754ea66 100644 --- a/src/core/math/Quaternion.h +++ b/src/core/math/Quaternion.h @@ -161,6 +161,8 @@ public: inline void rotateZ( Type angle ); inline void swap( Quaternion& q ) /* throw() */; inline const Vector3<Type> getEulerAnglesXYZ() const; + inline Type* data() {return v_;} + inline Type const * data() const {return v_;} //@} //********************************************************************************************** @@ -1097,7 +1099,10 @@ namespace mpi { mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const math::Quaternion<VT> & quat ) { buf.addDebugMarker( "q4" ); - buf << quat[0] << quat[1] << quat[2] << quat[3]; + static_assert ( std::is_trivially_copyable< math::Quaternion<VT> >::value, + "type has to be trivially copyable for the memcpy to work correctly" ); + auto pos = buf.forward(sizeof(math::Quaternion<VT>)); + std::memcpy(pos, &quat, sizeof(math::Quaternion<VT>)); return buf; } @@ -1106,9 +1111,11 @@ namespace mpi { mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, math::Quaternion<VT> & quat ) { buf.readDebugMarker( "q4" ); - VT tmp1, tmp2, tmp3, tmp4; - buf >> tmp1 >> tmp2 >> tmp3 >> tmp4; - quat.set(tmp1, tmp2, tmp3, tmp4); + static_assert ( std::is_trivially_copyable< math::Quaternion<VT> >::value, + "type has to be trivially copyable for the memcpy to work correctly" ); + auto pos = buf.skip(sizeof(math::Quaternion<VT>)); + //suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess + std::memcpy(static_cast<void*>(&quat), pos, sizeof(math::Quaternion<VT>)); return buf; } diff --git a/src/core/math/Vector2.h b/src/core/math/Vector2.h index c9b9dba4be0d8c5e38fc9296ed037e47ced1559c..b62d58f4651a743e33a0b9c6f0e4f7c860d174dd 100644 --- a/src/core/math/Vector2.h +++ b/src/core/math/Vector2.h @@ -160,6 +160,7 @@ public: inline Type sqrLength() const; inline Vector2<Length> getNormalized() const; inline Type* data() {return v_;} + inline Type const * data() const {return v_;} //@} //******************************************************************************************************************* @@ -1609,7 +1610,10 @@ namespace mpi { mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Vector2<VT> & vec ) { buf.addDebugMarker( "v2" ); - buf << vec[0] << vec[1]; + static_assert ( std::is_trivially_copyable< Vector2<VT> >::value, + "type has to be trivially copyable for the memcpy to work correctly" ); + auto pos = buf.forward(sizeof(Vector2<VT>)); + std::memcpy(pos, &vec, sizeof(Vector2<VT>)); return buf; } @@ -1618,7 +1622,11 @@ namespace mpi { mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Vector2<VT> & vec ) { buf.readDebugMarker( "v2" ); - buf >> vec[0] >> vec[1] ; + static_assert ( std::is_trivially_copyable< Vector2<VT> >::value, + "type has to be trivially copyable for the memcpy to work correctly" ); + auto pos = buf.skip(sizeof(Vector2<VT>)); + //suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess + std::memcpy(static_cast<void*>(&vec), pos, sizeof(Vector2<VT>)); return buf; } diff --git a/src/core/math/Vector3.h b/src/core/math/Vector3.h index c5ec3e7b8ccb7840fc59905c7d6eaa081661f4f2..267e90c38f7b9752820a8397d7729c36134e3c96 100644 --- a/src/core/math/Vector3.h +++ b/src/core/math/Vector3.h @@ -166,6 +166,7 @@ public: inline Vector3<Length> getNormalizedOrZero() const; inline void reset(); inline Type* data() {return v_;} + inline Type const * data() const {return v_;} //@} //******************************************************************************************************************* @@ -1864,7 +1865,10 @@ namespace mpi { mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Vector3<VT> & vec ) { buf.addDebugMarker( "v3" ); - buf << vec[0] << vec[1] << vec[2]; + static_assert ( std::is_trivially_copyable< Vector3<VT> >::value, + "type has to be trivially copyable for the memcpy to work correctly" ); + auto pos = buf.forward(sizeof(Vector3<VT>)); + std::memcpy(pos, &vec, sizeof(Vector3<VT>)); return buf; } @@ -1873,7 +1877,11 @@ namespace mpi { mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Vector3<VT> & vec ) { buf.readDebugMarker( "v3" ); - buf >> vec[0] >> vec[1] >> vec[2]; + static_assert ( std::is_trivially_copyable< Vector3<VT> >::value, + "type has to be trivially copyable for the memcpy to work correctly" ); + auto pos = buf.skip(sizeof(Vector3<VT>)); + //suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess + std::memcpy(static_cast<void*>(&vec), pos, sizeof(Vector3<VT>)); return buf; } diff --git a/tests/core/math/GenericAABBTest.cpp b/tests/core/math/GenericAABBTest.cpp index ee6db55766e9528b00de3a7a6bb323ca0677ee3b..f1d386ade803ce23909bf47a5df4f6b1307618d2 100644 --- a/tests/core/math/GenericAABBTest.cpp +++ b/tests/core/math/GenericAABBTest.cpp @@ -102,7 +102,7 @@ void testNonEmptyAABB( const GenericAABB< T > & aabb ) WALBERLA_CHECK_EQUAL( tmpAABB, aabb.getIntersection( intersectingBox ) ); WALBERLA_CHECK_EQUAL( aabb.getIntersection( intersectingBox ), tmpAABB ); WALBERLA_CHECK_FLOAT_EQUAL( tmpAABB.volume(), aabb.volume() / T(8) ); - WALBERLA_CHECK_IDENTICAL( tmpAABB.volume(), aabb.intersectionVolume( intersectingBox ) ); + WALBERLA_CHECK_FLOAT_EQUAL( tmpAABB.volume(), aabb.intersectionVolume( intersectingBox ) ); intersectingBox.init( aabb.minCorner() + aabb.sizes(), aabb.maxCorner() + aabb.sizes() ); tmpAABB = aabb; @@ -231,7 +231,7 @@ void testAnyAABB( const GenericAABB< T > & aabb ) WALBERLA_CHECK( aabb.intersectsClosedInterval( aabb ) ); WALBERLA_CHECK_EQUAL( aabb.getIntersection( aabb ), aabb ); - WALBERLA_CHECK_IDENTICAL( aabb.intersectionVolume( aabb ), aabb.volume() ); + WALBERLA_CHECK_FLOAT_EQUAL( aabb.intersectionVolume( aabb ), aabb.volume() ); WALBERLA_CHECK( aabb.isIdentical( aabb ) ); WALBERLA_CHECK( aabb.isEqual( aabb ) );