diff --git a/apps/benchmarks/ProbeVsExtraMessage/CMakeLists.txt b/apps/benchmarks/ProbeVsExtraMessage/CMakeLists.txt
index 7330aa0ddb83f973d47071a4dba1a4c03baed242..a5cd53649d6ac5c011c026346d137dbcb862a20c 100644
--- a/apps/benchmarks/ProbeVsExtraMessage/CMakeLists.txt
+++ b/apps/benchmarks/ProbeVsExtraMessage/CMakeLists.txt
@@ -1,2 +1,7 @@
+waLBerla_add_executable ( NAME PackPerformance 
+                          FILES PackPerformance.cpp
+                          DEPENDS core )
+
 waLBerla_add_executable ( NAME ProbeVsExtraMessage 
+                          FILES ProbeVsExtraMessage.cpp
                           DEPENDS core postprocessing stencil )
diff --git a/apps/benchmarks/ProbeVsExtraMessage/PackPerformance.cpp b/apps/benchmarks/ProbeVsExtraMessage/PackPerformance.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6479a3f641778ea859e126450c808febcce0d5e1
--- /dev/null
+++ b/apps/benchmarks/ProbeVsExtraMessage/PackPerformance.cpp
@@ -0,0 +1,112 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PackPerformance.h
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+#include "core/DataTypes.h"
+#include "core/Environment.h"
+#include "core/math/Vector3.h"
+#include "core/mpi/BufferSystem.h"
+#include "core/mpi/MPIManager.h"
+#include "core/timing/TimingPool.h"
+
+#include <array>
+#include <iostream>
+#include <sstream>
+
+namespace walberla {
+
+int main( int /*argc*/, char ** /*argv*/ )
+{
+   const size_t numElements = 100000000;
+   mpi::SendBuffer sb0;
+   mpi::SendBuffer sb1;
+   mpi::SendBuffer sb2;
+   Vector3<real_t> v(1,2,3);
+   WcTimer timer0;
+   WcTimer timer1;
+   WcTimer timer2;
+
+   for (size_t i = 0; i < numElements; ++i)
+   {
+      sb0 << v;
+      sb1 << v;
+      sb2 << v;
+   }
+
+   WALBERLA_LOG_DEVEL_VAR(sb0.size());
+   sb0.clear();
+   sb1.clear();
+   sb2.clear();
+
+   timer0.start();
+   for (size_t i = 0; i < numElements; ++i)
+   {
+      sb0 << v;
+   }
+   timer0.end();
+
+   WALBERLA_LOG_DEVEL_VAR(sb0.size());
+   sb0.clear();
+
+   timer1.start();
+   for (size_t i = 0; i < numElements; ++i)
+   {
+      sb1 << v[0] << v[1] << v[2];
+   }
+   timer1.end();
+
+   WALBERLA_LOG_DEVEL_VAR(sb0.size());
+   sb0.clear();
+
+   timer2.start();
+   for (size_t i = 0; i < numElements; ++i)
+   {
+      auto pos = sb2.forward(sizeof(real_t) * 3);
+      memcpy(pos, v.data(), sizeof(real_t) * 3);
+   }
+   timer2.end();
+
+   WALBERLA_LOG_DEVEL_VAR(sb0.size());
+   sb0.clear();
+
+   //auto ptr0 = sb0.ptr();
+   //auto ptr1 = sb1.ptr();
+   //for (auto i = 0; i < numElements; ++i)
+   //{
+   //   WALBERLA_ASSERT_EQUAL(*ptr0, *ptr1);
+   //   ++ptr0;
+   //   ++ptr1;
+   //}
+
+   WALBERLA_LOG_DEVEL("native:      " << timer0.total());
+   WALBERLA_LOG_DEVEL("elementwise: " << timer1.total());
+   WALBERLA_LOG_DEVEL("memcpy:      " << timer2.total());
+
+   return 0;
+}
+
+} // namespace walberla
+
+int main( int argc, char* argv[] )
+{
+   walberla::mpi::Environment mpiEnv( argc, argv );
+   WALBERLA_UNUSED(mpiEnv);
+
+   return walberla::main( argc, argv );
+}
diff --git a/src/core/math/GenericAABB.h b/src/core/math/GenericAABB.h
index 4664a48a3e821e356488629fbf70fa2b3fbda897..9b321e738f53548dba55094722425937967bef12 100644
--- a/src/core/math/GenericAABB.h
+++ b/src/core/math/GenericAABB.h
@@ -196,7 +196,10 @@ public:
    inline friend mpi::GenericRecvBuffer< ET > & operator>>( mpi::GenericRecvBuffer< ET > & buf, GenericAABB< T > & aabb )
    {
       buf.readDebugMarker( "bb" );
-      buf >> aabb.minCorner_ >> aabb.maxCorner_;
+      static_assert ( std::is_trivially_copyable< GenericAABB< T > >::value,
+                      "type has to be trivially copyable for the memcpy to work correctly" );
+      auto pos = buf.skip(sizeof(GenericAABB< T >));
+      std::memcpy(&aabb, pos, sizeof(GenericAABB< T >));
       WALBERLA_ASSERT( aabb.checkInvariant() );
       return buf;
    }
diff --git a/src/core/math/GenericAABB.impl.h b/src/core/math/GenericAABB.impl.h
index 7d24f164f2fbae77cbb715d3e4ac2fb35ba987ff..1fae800a88c6f7bcfb1d7a2b24cc9a30411c8066 100644
--- a/src/core/math/GenericAABB.impl.h
+++ b/src/core/math/GenericAABB.impl.h
@@ -1883,7 +1883,11 @@ template< typename T,    // Element type of SendBuffer
 mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const GenericAABB< VT > & aabb )
 {
    buf.addDebugMarker( "bb" );
-   return buf << aabb.minCorner() << aabb.maxCorner();
+   static_assert ( std::is_trivially_copyable< GenericAABB< VT > >::value,
+                   "type has to be trivially copyable for the memcpy to work correctly" );
+   auto pos = buf.forward(sizeof(GenericAABB< VT >));
+   std::memcpy(pos, &aabb, sizeof(GenericAABB< VT >));
+   return buf;
 }
 
 
diff --git a/src/core/math/Matrix2.h b/src/core/math/Matrix2.h
index 8e0504359bb722f912d159557034c5bdab9773e6..e3d8893333b3e8ba27f59b3565d551379b8fd774 100644
--- a/src/core/math/Matrix2.h
+++ b/src/core/math/Matrix2.h
@@ -958,9 +958,11 @@ template< typename T,    // Element type of SendBuffer
           typename MT >  // Element type of matrix
 mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Matrix2<MT> & m )
 {
-   for(unsigned int i=0; i<4; ++i)
-      buf << m[i];
-
+   buf.addDebugMarker( "m2" );
+   static_assert ( std::is_trivially_copyable< Matrix2<MT> >::value,
+                   "type has to be trivially copyable for the memcpy to work correctly" );
+   auto pos = buf.forward(sizeof(Matrix2<MT>));
+   std::memcpy(pos, &m, sizeof(Matrix2<MT>));
    return buf;
 }
 
@@ -968,9 +970,12 @@ template< typename T,    // Element type  of RecvBuffer
           typename MT >  // Element type of matrix
 mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Matrix2<MT> & m )
 {
-   for(unsigned int i=0; i<4; ++i)
-      buf >> m[i];
-
+   buf.readDebugMarker( "m2" );
+   static_assert ( std::is_trivially_copyable< Matrix2<MT> >::value,
+                   "type has to be trivially copyable for the memcpy to work correctly" );
+   auto pos = buf.skip(sizeof(Matrix2<MT>));
+   //suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
+   std::memcpy(static_cast<void*>(&m), pos, sizeof(Matrix2<MT>));
    return buf;
 }
 
diff --git a/src/core/math/Matrix3.h b/src/core/math/Matrix3.h
index 311a041200f49c08bb21c4f456c382dc2d0fb75b..88c7818b7926b05747cdaa09578aa8bbc32c6cb1 100644
--- a/src/core/math/Matrix3.h
+++ b/src/core/math/Matrix3.h
@@ -35,6 +35,7 @@
 #include <type_traits>
 
 #include <algorithm>
+#include <array>
 #include <cmath>
 #include <iostream>
 #include <limits>
@@ -174,6 +175,7 @@ public:
    template< typename Other > inline const Vector3<HIGH> solve( const Vector3<Other> &rhs )         const;
                               inline Type                trace()                                    const;
                               inline Type*               data()                                     {return v_;}
+                              inline Type const *        data()                                     const {return v_;}
    //@}
    //*******************************************************************************************************************
 
@@ -246,9 +248,9 @@ private:
     * 6 & 7 & 8 \\
     * \end{array}\right)\f]
    **/
-   Type v_[9] = {Type(1), Type(0), Type(0),
-                 Type(0), Type(1), Type(0),
-                 Type(0), Type(0), Type(1)};
+   std::array<Type, 9> v_ = {{Type(1), Type(0), Type(0),
+                              Type(0), Type(1), Type(0),
+                              Type(0), Type(0), Type(1)}};
    //@}
    //*******************************************************************************************************************
 };
@@ -1769,9 +1771,11 @@ namespace mpi {
                 typename MT >  // Element type of matrix
       mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Matrix3<MT> & m )
       {
-         for(unsigned int i=0; i<9; ++i)
-            buf << m[i];
-
+         buf.addDebugMarker( "m3" );
+         static_assert ( std::is_trivially_copyable< Matrix3<MT> >::value,
+                         "type has to be trivially copyable for the memcpy to work correctly" );
+         auto pos = buf.forward(sizeof(Matrix3<MT>));
+         std::memcpy(pos, &m, sizeof(Matrix3<MT>));
          return buf;
       }
 
@@ -1779,9 +1783,12 @@ namespace mpi {
                 typename MT >  // Element type of matrix
       mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Matrix3<MT> & m )
       {
-         for(unsigned int i=0; i<9; ++i)
-            buf >> m[i];
-
+         buf.readDebugMarker( "m3" );
+         static_assert ( std::is_trivially_copyable< Matrix3<MT> >::value,
+                         "type has to be trivially copyable for the memcpy to work correctly" );
+         auto pos = buf.skip(sizeof(Matrix3<MT>));
+         //suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
+         std::memcpy(static_cast<void*>(&m), pos, sizeof(Matrix3<MT>));
          return buf;
       }
 
diff --git a/src/core/math/Quaternion.h b/src/core/math/Quaternion.h
index dd3a4298ab22a0fc814ab65a7e27b90321a5a888..90fd652de353026cab2612aed1be1570d754ea66 100644
--- a/src/core/math/Quaternion.h
+++ b/src/core/math/Quaternion.h
@@ -161,6 +161,8 @@ public:
                               inline void                       rotateZ( Type angle );
                               inline void                       swap( Quaternion& q ) /* throw() */;
                               inline const Vector3<Type>        getEulerAnglesXYZ() const;
+                              inline Type*                      data()                         {return v_;}
+                              inline Type const *               data()                         const {return v_;}
    //@}
    //**********************************************************************************************
 
@@ -1097,7 +1099,10 @@ namespace mpi {
    mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const math::Quaternion<VT> & quat )
    {
       buf.addDebugMarker( "q4" );
-      buf << quat[0] << quat[1] << quat[2] << quat[3];
+      static_assert ( std::is_trivially_copyable< math::Quaternion<VT> >::value,
+                      "type has to be trivially copyable for the memcpy to work correctly" );
+      auto pos = buf.forward(sizeof(math::Quaternion<VT>));
+      std::memcpy(pos, &quat, sizeof(math::Quaternion<VT>));
       return buf;
    }
 
@@ -1106,9 +1111,11 @@ namespace mpi {
    mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, math::Quaternion<VT> & quat )
    {
       buf.readDebugMarker( "q4" );
-      VT tmp1, tmp2, tmp3, tmp4;
-      buf >> tmp1 >> tmp2 >> tmp3 >> tmp4;
-      quat.set(tmp1, tmp2, tmp3, tmp4);
+      static_assert ( std::is_trivially_copyable< math::Quaternion<VT> >::value,
+                      "type has to be trivially copyable for the memcpy to work correctly" );
+      auto pos = buf.skip(sizeof(math::Quaternion<VT>));
+      //suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
+      std::memcpy(static_cast<void*>(&quat), pos, sizeof(math::Quaternion<VT>));
       return buf;
    }
 
diff --git a/src/core/math/Vector2.h b/src/core/math/Vector2.h
index c9b9dba4be0d8c5e38fc9296ed037e47ced1559c..b62d58f4651a743e33a0b9c6f0e4f7c860d174dd 100644
--- a/src/core/math/Vector2.h
+++ b/src/core/math/Vector2.h
@@ -160,6 +160,7 @@ public:
    inline Type            sqrLength()                    const;
    inline Vector2<Length> getNormalized()                const;
    inline Type*           data()                         {return v_;}
+   inline Type const *    data()                         const {return v_;}
    //@}
    //*******************************************************************************************************************
 
@@ -1609,7 +1610,10 @@ namespace mpi {
    mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Vector2<VT> & vec )
    {
       buf.addDebugMarker( "v2" );
-      buf << vec[0] << vec[1];
+      static_assert ( std::is_trivially_copyable< Vector2<VT> >::value,
+                      "type has to be trivially copyable for the memcpy to work correctly" );
+      auto pos = buf.forward(sizeof(Vector2<VT>));
+      std::memcpy(pos, &vec, sizeof(Vector2<VT>));
       return buf;
    }
 
@@ -1618,7 +1622,11 @@ namespace mpi {
    mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Vector2<VT> & vec )
    {
       buf.readDebugMarker( "v2" );
-      buf >> vec[0] >> vec[1] ;
+      static_assert ( std::is_trivially_copyable< Vector2<VT> >::value,
+                      "type has to be trivially copyable for the memcpy to work correctly" );
+      auto pos = buf.skip(sizeof(Vector2<VT>));
+      //suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
+      std::memcpy(static_cast<void*>(&vec), pos, sizeof(Vector2<VT>));
       return buf;
    }
 
diff --git a/src/core/math/Vector3.h b/src/core/math/Vector3.h
index c5ec3e7b8ccb7840fc59905c7d6eaa081661f4f2..267e90c38f7b9752820a8397d7729c36134e3c96 100644
--- a/src/core/math/Vector3.h
+++ b/src/core/math/Vector3.h
@@ -166,6 +166,7 @@ public:
    inline Vector3<Length> getNormalizedOrZero()          const;
    inline void            reset();
    inline Type*           data()                         {return v_;}
+   inline Type const *    data()                         const {return v_;}
    //@}
    //*******************************************************************************************************************
 
@@ -1864,7 +1865,10 @@ namespace mpi {
    mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Vector3<VT> & vec )
    {
       buf.addDebugMarker( "v3" );
-      buf << vec[0] << vec[1] << vec[2];
+      static_assert ( std::is_trivially_copyable< Vector3<VT> >::value,
+                      "type has to be trivially copyable for the memcpy to work correctly" );
+      auto pos = buf.forward(sizeof(Vector3<VT>));
+      std::memcpy(pos, &vec, sizeof(Vector3<VT>));
       return buf;
    }
 
@@ -1873,7 +1877,11 @@ namespace mpi {
    mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Vector3<VT> & vec )
    {
       buf.readDebugMarker( "v3" );
-      buf >> vec[0] >> vec[1] >> vec[2];
+      static_assert ( std::is_trivially_copyable< Vector3<VT> >::value,
+                      "type has to be trivially copyable for the memcpy to work correctly" );
+      auto pos = buf.skip(sizeof(Vector3<VT>));
+      //suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
+      std::memcpy(static_cast<void*>(&vec), pos, sizeof(Vector3<VT>));
       return buf;
    }
 
diff --git a/tests/core/math/GenericAABBTest.cpp b/tests/core/math/GenericAABBTest.cpp
index ee6db55766e9528b00de3a7a6bb323ca0677ee3b..f1d386ade803ce23909bf47a5df4f6b1307618d2 100644
--- a/tests/core/math/GenericAABBTest.cpp
+++ b/tests/core/math/GenericAABBTest.cpp
@@ -102,7 +102,7 @@ void testNonEmptyAABB( const GenericAABB< T > & aabb )
    WALBERLA_CHECK_EQUAL( tmpAABB, aabb.getIntersection( intersectingBox ) );
    WALBERLA_CHECK_EQUAL( aabb.getIntersection( intersectingBox ), tmpAABB );
    WALBERLA_CHECK_FLOAT_EQUAL( tmpAABB.volume(), aabb.volume() / T(8) );
-   WALBERLA_CHECK_IDENTICAL( tmpAABB.volume(), aabb.intersectionVolume( intersectingBox ) );
+   WALBERLA_CHECK_FLOAT_EQUAL( tmpAABB.volume(), aabb.intersectionVolume( intersectingBox ) );
 
    intersectingBox.init( aabb.minCorner() + aabb.sizes(), aabb.maxCorner() + aabb.sizes() );
    tmpAABB = aabb;
@@ -231,7 +231,7 @@ void testAnyAABB( const GenericAABB< T > & aabb )
 
    WALBERLA_CHECK( aabb.intersectsClosedInterval( aabb ) );
    WALBERLA_CHECK_EQUAL( aabb.getIntersection( aabb ), aabb );
-   WALBERLA_CHECK_IDENTICAL( aabb.intersectionVolume( aabb ), aabb.volume() );
+   WALBERLA_CHECK_FLOAT_EQUAL( aabb.intersectionVolume( aabb ), aabb.volume() );
 
    WALBERLA_CHECK( aabb.isIdentical( aabb ) );
    WALBERLA_CHECK( aabb.isEqual( aabb ) );