Commit 34b71db9 authored by Martin Bauer's avatar Martin Bauer

Merge branch 'OptimizedPackUnpack' into 'master'

using memcpy instead of packing individual elements is faster

See merge request !208
parents 56276b30 5c57da72
Pipeline #15933 passed with stages
in 222 minutes and 58 seconds
waLBerla_add_executable ( NAME PackPerformance
FILES PackPerformance.cpp
DEPENDS core )
waLBerla_add_executable ( NAME ProbeVsExtraMessage
FILES ProbeVsExtraMessage.cpp
DEPENDS core postprocessing stencil )
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file PackPerformance.h
//! \author Sebastian Eibl <sebastian.eibl@fau.de>
//
//======================================================================================================================
#include "core/DataTypes.h"
#include "core/Environment.h"
#include "core/math/Vector3.h"
#include "core/mpi/BufferSystem.h"
#include "core/mpi/MPIManager.h"
#include "core/timing/TimingPool.h"
#include <array>
#include <iostream>
#include <sstream>
namespace walberla {
int main( int /*argc*/, char ** /*argv*/ )
{
const size_t numElements = 100000000;
mpi::SendBuffer sb0;
mpi::SendBuffer sb1;
mpi::SendBuffer sb2;
Vector3<real_t> v(1,2,3);
WcTimer timer0;
WcTimer timer1;
WcTimer timer2;
for (size_t i = 0; i < numElements; ++i)
{
sb0 << v;
sb1 << v;
sb2 << v;
}
WALBERLA_LOG_DEVEL_VAR(sb0.size());
sb0.clear();
sb1.clear();
sb2.clear();
timer0.start();
for (size_t i = 0; i < numElements; ++i)
{
sb0 << v;
}
timer0.end();
WALBERLA_LOG_DEVEL_VAR(sb0.size());
sb0.clear();
timer1.start();
for (size_t i = 0; i < numElements; ++i)
{
sb1 << v[0] << v[1] << v[2];
}
timer1.end();
WALBERLA_LOG_DEVEL_VAR(sb0.size());
sb0.clear();
timer2.start();
for (size_t i = 0; i < numElements; ++i)
{
auto pos = sb2.forward(sizeof(real_t) * 3);
memcpy(pos, v.data(), sizeof(real_t) * 3);
}
timer2.end();
WALBERLA_LOG_DEVEL_VAR(sb0.size());
sb0.clear();
//auto ptr0 = sb0.ptr();
//auto ptr1 = sb1.ptr();
//for (auto i = 0; i < numElements; ++i)
//{
// WALBERLA_ASSERT_EQUAL(*ptr0, *ptr1);
// ++ptr0;
// ++ptr1;
//}
WALBERLA_LOG_DEVEL("native: " << timer0.total());
WALBERLA_LOG_DEVEL("elementwise: " << timer1.total());
WALBERLA_LOG_DEVEL("memcpy: " << timer2.total());
return 0;
}
} // namespace walberla
int main( int argc, char* argv[] )
{
walberla::mpi::Environment mpiEnv( argc, argv );
WALBERLA_UNUSED(mpiEnv);
return walberla::main( argc, argv );
}
......@@ -196,7 +196,10 @@ public:
inline friend mpi::GenericRecvBuffer< ET > & operator>>( mpi::GenericRecvBuffer< ET > & buf, GenericAABB< T > & aabb )
{
buf.readDebugMarker( "bb" );
buf >> aabb.minCorner_ >> aabb.maxCorner_;
static_assert ( std::is_trivially_copyable< GenericAABB< T > >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.skip(sizeof(GenericAABB< T >));
std::memcpy(&aabb, pos, sizeof(GenericAABB< T >));
WALBERLA_ASSERT( aabb.checkInvariant() );
return buf;
}
......
......@@ -1883,7 +1883,11 @@ template< typename T, // Element type of SendBuffer
mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const GenericAABB< VT > & aabb )
{
buf.addDebugMarker( "bb" );
return buf << aabb.minCorner() << aabb.maxCorner();
static_assert ( std::is_trivially_copyable< GenericAABB< VT > >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.forward(sizeof(GenericAABB< VT >));
std::memcpy(pos, &aabb, sizeof(GenericAABB< VT >));
return buf;
}
......
......@@ -958,9 +958,11 @@ template< typename T, // Element type of SendBuffer
typename MT > // Element type of matrix
mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Matrix2<MT> & m )
{
for(unsigned int i=0; i<4; ++i)
buf << m[i];
buf.addDebugMarker( "m2" );
static_assert ( std::is_trivially_copyable< Matrix2<MT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.forward(sizeof(Matrix2<MT>));
std::memcpy(pos, &m, sizeof(Matrix2<MT>));
return buf;
}
......@@ -968,9 +970,12 @@ template< typename T, // Element type of RecvBuffer
typename MT > // Element type of matrix
mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Matrix2<MT> & m )
{
for(unsigned int i=0; i<4; ++i)
buf >> m[i];
buf.readDebugMarker( "m2" );
static_assert ( std::is_trivially_copyable< Matrix2<MT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.skip(sizeof(Matrix2<MT>));
//suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
std::memcpy(static_cast<void*>(&m), pos, sizeof(Matrix2<MT>));
return buf;
}
......
......@@ -35,6 +35,7 @@
#include <type_traits>
#include <algorithm>
#include <array>
#include <cmath>
#include <iostream>
#include <limits>
......@@ -174,6 +175,7 @@ public:
template< typename Other > inline const Vector3<HIGH> solve( const Vector3<Other> &rhs ) const;
inline Type trace() const;
inline Type* data() {return v_;}
inline Type const * data() const {return v_;}
//@}
//*******************************************************************************************************************
......@@ -246,9 +248,9 @@ private:
* 6 & 7 & 8 \\
* \end{array}\right)\f]
**/
Type v_[9] = {Type(1), Type(0), Type(0),
Type(0), Type(1), Type(0),
Type(0), Type(0), Type(1)};
std::array<Type, 9> v_ = {{Type(1), Type(0), Type(0),
Type(0), Type(1), Type(0),
Type(0), Type(0), Type(1)}};
//@}
//*******************************************************************************************************************
};
......@@ -1769,9 +1771,11 @@ namespace mpi {
typename MT > // Element type of matrix
mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Matrix3<MT> & m )
{
for(unsigned int i=0; i<9; ++i)
buf << m[i];
buf.addDebugMarker( "m3" );
static_assert ( std::is_trivially_copyable< Matrix3<MT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.forward(sizeof(Matrix3<MT>));
std::memcpy(pos, &m, sizeof(Matrix3<MT>));
return buf;
}
......@@ -1779,9 +1783,12 @@ namespace mpi {
typename MT > // Element type of matrix
mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Matrix3<MT> & m )
{
for(unsigned int i=0; i<9; ++i)
buf >> m[i];
buf.readDebugMarker( "m3" );
static_assert ( std::is_trivially_copyable< Matrix3<MT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.skip(sizeof(Matrix3<MT>));
//suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
std::memcpy(static_cast<void*>(&m), pos, sizeof(Matrix3<MT>));
return buf;
}
......
......@@ -161,6 +161,8 @@ public:
inline void rotateZ( Type angle );
inline void swap( Quaternion& q ) /* throw() */;
inline const Vector3<Type> getEulerAnglesXYZ() const;
inline Type* data() {return v_;}
inline Type const * data() const {return v_;}
//@}
//**********************************************************************************************
......@@ -1097,7 +1099,10 @@ namespace mpi {
mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const math::Quaternion<VT> & quat )
{
buf.addDebugMarker( "q4" );
buf << quat[0] << quat[1] << quat[2] << quat[3];
static_assert ( std::is_trivially_copyable< math::Quaternion<VT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.forward(sizeof(math::Quaternion<VT>));
std::memcpy(pos, &quat, sizeof(math::Quaternion<VT>));
return buf;
}
......@@ -1106,9 +1111,11 @@ namespace mpi {
mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, math::Quaternion<VT> & quat )
{
buf.readDebugMarker( "q4" );
VT tmp1, tmp2, tmp3, tmp4;
buf >> tmp1 >> tmp2 >> tmp3 >> tmp4;
quat.set(tmp1, tmp2, tmp3, tmp4);
static_assert ( std::is_trivially_copyable< math::Quaternion<VT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.skip(sizeof(math::Quaternion<VT>));
//suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
std::memcpy(static_cast<void*>(&quat), pos, sizeof(math::Quaternion<VT>));
return buf;
}
......
......@@ -160,6 +160,7 @@ public:
inline Type sqrLength() const;
inline Vector2<Length> getNormalized() const;
inline Type* data() {return v_;}
inline Type const * data() const {return v_;}
//@}
//*******************************************************************************************************************
......@@ -1609,7 +1610,10 @@ namespace mpi {
mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Vector2<VT> & vec )
{
buf.addDebugMarker( "v2" );
buf << vec[0] << vec[1];
static_assert ( std::is_trivially_copyable< Vector2<VT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.forward(sizeof(Vector2<VT>));
std::memcpy(pos, &vec, sizeof(Vector2<VT>));
return buf;
}
......@@ -1618,7 +1622,11 @@ namespace mpi {
mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Vector2<VT> & vec )
{
buf.readDebugMarker( "v2" );
buf >> vec[0] >> vec[1] ;
static_assert ( std::is_trivially_copyable< Vector2<VT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.skip(sizeof(Vector2<VT>));
//suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
std::memcpy(static_cast<void*>(&vec), pos, sizeof(Vector2<VT>));
return buf;
}
......
......@@ -166,6 +166,7 @@ public:
inline Vector3<Length> getNormalizedOrZero() const;
inline void reset();
inline Type* data() {return v_;}
inline Type const * data() const {return v_;}
//@}
//*******************************************************************************************************************
......@@ -1864,7 +1865,10 @@ namespace mpi {
mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Vector3<VT> & vec )
{
buf.addDebugMarker( "v3" );
buf << vec[0] << vec[1] << vec[2];
static_assert ( std::is_trivially_copyable< Vector3<VT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.forward(sizeof(Vector3<VT>));
std::memcpy(pos, &vec, sizeof(Vector3<VT>));
return buf;
}
......@@ -1873,7 +1877,11 @@ namespace mpi {
mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Vector3<VT> & vec )
{
buf.readDebugMarker( "v3" );
buf >> vec[0] >> vec[1] >> vec[2];
static_assert ( std::is_trivially_copyable< Vector3<VT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.skip(sizeof(Vector3<VT>));
//suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
std::memcpy(static_cast<void*>(&vec), pos, sizeof(Vector3<VT>));
return buf;
}
......
......@@ -102,7 +102,7 @@ void testNonEmptyAABB( const GenericAABB< T > & aabb )
WALBERLA_CHECK_EQUAL( tmpAABB, aabb.getIntersection( intersectingBox ) );
WALBERLA_CHECK_EQUAL( aabb.getIntersection( intersectingBox ), tmpAABB );
WALBERLA_CHECK_FLOAT_EQUAL( tmpAABB.volume(), aabb.volume() / T(8) );
WALBERLA_CHECK_IDENTICAL( tmpAABB.volume(), aabb.intersectionVolume( intersectingBox ) );
WALBERLA_CHECK_FLOAT_EQUAL( tmpAABB.volume(), aabb.intersectionVolume( intersectingBox ) );
intersectingBox.init( aabb.minCorner() + aabb.sizes(), aabb.maxCorner() + aabb.sizes() );
tmpAABB = aabb;
......@@ -231,7 +231,7 @@ void testAnyAABB( const GenericAABB< T > & aabb )
WALBERLA_CHECK( aabb.intersectsClosedInterval( aabb ) );
WALBERLA_CHECK_EQUAL( aabb.getIntersection( aabb ), aabb );
WALBERLA_CHECK_IDENTICAL( aabb.intersectionVolume( aabb ), aabb.volume() );
WALBERLA_CHECK_FLOAT_EQUAL( aabb.intersectionVolume( aabb ), aabb.volume() );
WALBERLA_CHECK( aabb.isIdentical( aabb ) );
WALBERLA_CHECK( aabb.isEqual( aabb ) );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment