Commit 5c57da72 authored by Sebastian Eibl's avatar Sebastian Eibl

using memcpy instead of packing individual elements is faster

parent c4101e63
waLBerla_add_executable ( NAME PackPerformance
FILES PackPerformance.cpp
DEPENDS core )
waLBerla_add_executable ( NAME ProbeVsExtraMessage
FILES ProbeVsExtraMessage.cpp
DEPENDS core postprocessing stencil )
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file PackPerformance.h
//! \author Sebastian Eibl <sebastian.eibl@fau.de>
//
//======================================================================================================================
#include "core/DataTypes.h"
#include "core/Environment.h"
#include "core/math/Vector3.h"
#include "core/mpi/BufferSystem.h"
#include "core/mpi/MPIManager.h"
#include "core/timing/TimingPool.h"
#include <array>
#include <iostream>
#include <sstream>
namespace walberla {
int main( int /*argc*/, char ** /*argv*/ )
{
const size_t numElements = 100000000;
mpi::SendBuffer sb0;
mpi::SendBuffer sb1;
mpi::SendBuffer sb2;
Vector3<real_t> v(1,2,3);
WcTimer timer0;
WcTimer timer1;
WcTimer timer2;
for (size_t i = 0; i < numElements; ++i)
{
sb0 << v;
sb1 << v;
sb2 << v;
}
WALBERLA_LOG_DEVEL_VAR(sb0.size());
sb0.clear();
sb1.clear();
sb2.clear();
timer0.start();
for (size_t i = 0; i < numElements; ++i)
{
sb0 << v;
}
timer0.end();
WALBERLA_LOG_DEVEL_VAR(sb0.size());
sb0.clear();
timer1.start();
for (size_t i = 0; i < numElements; ++i)
{
sb1 << v[0] << v[1] << v[2];
}
timer1.end();
WALBERLA_LOG_DEVEL_VAR(sb0.size());
sb0.clear();
timer2.start();
for (size_t i = 0; i < numElements; ++i)
{
auto pos = sb2.forward(sizeof(real_t) * 3);
memcpy(pos, v.data(), sizeof(real_t) * 3);
}
timer2.end();
WALBERLA_LOG_DEVEL_VAR(sb0.size());
sb0.clear();
//auto ptr0 = sb0.ptr();
//auto ptr1 = sb1.ptr();
//for (auto i = 0; i < numElements; ++i)
//{
// WALBERLA_ASSERT_EQUAL(*ptr0, *ptr1);
// ++ptr0;
// ++ptr1;
//}
WALBERLA_LOG_DEVEL("native: " << timer0.total());
WALBERLA_LOG_DEVEL("elementwise: " << timer1.total());
WALBERLA_LOG_DEVEL("memcpy: " << timer2.total());
return 0;
}
} // namespace walberla
int main( int argc, char* argv[] )
{
walberla::mpi::Environment mpiEnv( argc, argv );
WALBERLA_UNUSED(mpiEnv);
return walberla::main( argc, argv );
}
......@@ -196,7 +196,10 @@ public:
inline friend mpi::GenericRecvBuffer< ET > & operator>>( mpi::GenericRecvBuffer< ET > & buf, GenericAABB< T > & aabb )
{
buf.readDebugMarker( "bb" );
buf >> aabb.minCorner_ >> aabb.maxCorner_;
static_assert ( std::is_trivially_copyable< GenericAABB< T > >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.skip(sizeof(GenericAABB< T >));
std::memcpy(&aabb, pos, sizeof(GenericAABB< T >));
WALBERLA_ASSERT( aabb.checkInvariant() );
return buf;
}
......
......@@ -1883,7 +1883,11 @@ template< typename T, // Element type of SendBuffer
mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const GenericAABB< VT > & aabb )
{
buf.addDebugMarker( "bb" );
return buf << aabb.minCorner() << aabb.maxCorner();
static_assert ( std::is_trivially_copyable< GenericAABB< VT > >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.forward(sizeof(GenericAABB< VT >));
std::memcpy(pos, &aabb, sizeof(GenericAABB< VT >));
return buf;
}
......
......@@ -958,9 +958,11 @@ template< typename T, // Element type of SendBuffer
typename MT > // Element type of matrix
mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Matrix2<MT> & m )
{
for(unsigned int i=0; i<4; ++i)
buf << m[i];
buf.addDebugMarker( "m2" );
static_assert ( std::is_trivially_copyable< Matrix2<MT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.forward(sizeof(Matrix2<MT>));
std::memcpy(pos, &m, sizeof(Matrix2<MT>));
return buf;
}
......@@ -968,9 +970,12 @@ template< typename T, // Element type of RecvBuffer
typename MT > // Element type of matrix
mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Matrix2<MT> & m )
{
for(unsigned int i=0; i<4; ++i)
buf >> m[i];
buf.readDebugMarker( "m2" );
static_assert ( std::is_trivially_copyable< Matrix2<MT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.skip(sizeof(Matrix2<MT>));
//suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
std::memcpy(static_cast<void*>(&m), pos, sizeof(Matrix2<MT>));
return buf;
}
......
......@@ -35,6 +35,7 @@
#include <type_traits>
#include <algorithm>
#include <array>
#include <cmath>
#include <iostream>
#include <limits>
......@@ -174,6 +175,7 @@ public:
template< typename Other > inline const Vector3<HIGH> solve( const Vector3<Other> &rhs ) const;
inline Type trace() const;
inline Type* data() {return v_;}
inline Type const * data() const {return v_;}
//@}
//*******************************************************************************************************************
......@@ -246,9 +248,9 @@ private:
* 6 & 7 & 8 \\
* \end{array}\right)\f]
**/
Type v_[9] = {Type(1), Type(0), Type(0),
Type(0), Type(1), Type(0),
Type(0), Type(0), Type(1)};
std::array<Type, 9> v_ = {{Type(1), Type(0), Type(0),
Type(0), Type(1), Type(0),
Type(0), Type(0), Type(1)}};
//@}
//*******************************************************************************************************************
};
......@@ -1769,9 +1771,11 @@ namespace mpi {
typename MT > // Element type of matrix
mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Matrix3<MT> & m )
{
for(unsigned int i=0; i<9; ++i)
buf << m[i];
buf.addDebugMarker( "m3" );
static_assert ( std::is_trivially_copyable< Matrix3<MT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.forward(sizeof(Matrix3<MT>));
std::memcpy(pos, &m, sizeof(Matrix3<MT>));
return buf;
}
......@@ -1779,9 +1783,12 @@ namespace mpi {
typename MT > // Element type of matrix
mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Matrix3<MT> & m )
{
for(unsigned int i=0; i<9; ++i)
buf >> m[i];
buf.readDebugMarker( "m3" );
static_assert ( std::is_trivially_copyable< Matrix3<MT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.skip(sizeof(Matrix3<MT>));
//suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
std::memcpy(static_cast<void*>(&m), pos, sizeof(Matrix3<MT>));
return buf;
}
......
......@@ -161,6 +161,8 @@ public:
inline void rotateZ( Type angle );
inline void swap( Quaternion& q ) /* throw() */;
inline const Vector3<Type> getEulerAnglesXYZ() const;
inline Type* data() {return v_;}
inline Type const * data() const {return v_;}
//@}
//**********************************************************************************************
......@@ -1083,7 +1085,10 @@ namespace mpi {
mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const math::Quaternion<VT> & quat )
{
buf.addDebugMarker( "q4" );
buf << quat[0] << quat[1] << quat[2] << quat[3];
static_assert ( std::is_trivially_copyable< math::Quaternion<VT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.forward(sizeof(math::Quaternion<VT>));
std::memcpy(pos, &quat, sizeof(math::Quaternion<VT>));
return buf;
}
......@@ -1092,9 +1097,11 @@ namespace mpi {
mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, math::Quaternion<VT> & quat )
{
buf.readDebugMarker( "q4" );
VT tmp1, tmp2, tmp3, tmp4;
buf >> tmp1 >> tmp2 >> tmp3 >> tmp4;
quat.set(tmp1, tmp2, tmp3, tmp4);
static_assert ( std::is_trivially_copyable< math::Quaternion<VT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.skip(sizeof(math::Quaternion<VT>));
//suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
std::memcpy(static_cast<void*>(&quat), pos, sizeof(math::Quaternion<VT>));
return buf;
}
......
......@@ -160,6 +160,7 @@ public:
inline Type sqrLength() const;
inline Vector2<Length> getNormalized() const;
inline Type* data() {return v_;}
inline Type const * data() const {return v_;}
//@}
//*******************************************************************************************************************
......@@ -1609,7 +1610,10 @@ namespace mpi {
mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Vector2<VT> & vec )
{
buf.addDebugMarker( "v2" );
buf << vec[0] << vec[1];
static_assert ( std::is_trivially_copyable< Vector2<VT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.forward(sizeof(Vector2<VT>));
std::memcpy(pos, &vec, sizeof(Vector2<VT>));
return buf;
}
......@@ -1618,7 +1622,11 @@ namespace mpi {
mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Vector2<VT> & vec )
{
buf.readDebugMarker( "v2" );
buf >> vec[0] >> vec[1] ;
static_assert ( std::is_trivially_copyable< Vector2<VT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.skip(sizeof(Vector2<VT>));
//suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
std::memcpy(static_cast<void*>(&vec), pos, sizeof(Vector2<VT>));
return buf;
}
......
......@@ -166,6 +166,7 @@ public:
inline Vector3<Length> getNormalizedOrZero() const;
inline void reset();
inline Type* data() {return v_;}
inline Type const * data() const {return v_;}
//@}
//*******************************************************************************************************************
......@@ -1864,7 +1865,10 @@ namespace mpi {
mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Vector3<VT> & vec )
{
buf.addDebugMarker( "v3" );
buf << vec[0] << vec[1] << vec[2];
static_assert ( std::is_trivially_copyable< Vector3<VT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.forward(sizeof(Vector3<VT>));
std::memcpy(pos, &vec, sizeof(Vector3<VT>));
return buf;
}
......@@ -1873,7 +1877,11 @@ namespace mpi {
mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Vector3<VT> & vec )
{
buf.readDebugMarker( "v3" );
buf >> vec[0] >> vec[1] >> vec[2];
static_assert ( std::is_trivially_copyable< Vector3<VT> >::value,
"type has to be trivially copyable for the memcpy to work correctly" );
auto pos = buf.skip(sizeof(Vector3<VT>));
//suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
std::memcpy(static_cast<void*>(&vec), pos, sizeof(Vector3<VT>));
return buf;
}
......
......@@ -102,7 +102,7 @@ void testNonEmptyAABB( const GenericAABB< T > & aabb )
WALBERLA_CHECK_EQUAL( tmpAABB, aabb.getIntersection( intersectingBox ) );
WALBERLA_CHECK_EQUAL( aabb.getIntersection( intersectingBox ), tmpAABB );
WALBERLA_CHECK_FLOAT_EQUAL( tmpAABB.volume(), aabb.volume() / T(8) );
WALBERLA_CHECK_IDENTICAL( tmpAABB.volume(), aabb.intersectionVolume( intersectingBox ) );
WALBERLA_CHECK_FLOAT_EQUAL( tmpAABB.volume(), aabb.intersectionVolume( intersectingBox ) );
intersectingBox.init( aabb.minCorner() + aabb.sizes(), aabb.maxCorner() + aabb.sizes() );
tmpAABB = aabb;
......@@ -231,7 +231,7 @@ void testAnyAABB( const GenericAABB< T > & aabb )
WALBERLA_CHECK( aabb.intersectsClosedInterval( aabb ) );
WALBERLA_CHECK_EQUAL( aabb.getIntersection( aabb ), aabb );
WALBERLA_CHECK_IDENTICAL( aabb.intersectionVolume( aabb ), aabb.volume() );
WALBERLA_CHECK_FLOAT_EQUAL( aabb.intersectionVolume( aabb ), aabb.volume() );
WALBERLA_CHECK( aabb.isIdentical( aabb ) );
WALBERLA_CHECK( aabb.isEqual( aabb ) );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment