diff --git a/apps/benchmarks/GranularGas/MESA_PD_GranularGas.cpp b/apps/benchmarks/GranularGas/MESA_PD_GranularGas.cpp
index 68cd3111bacb829897b20913e7d5e299e80d8566..18874901dc71d904af29d874c10b421f7aee094a 100644
--- a/apps/benchmarks/GranularGas/MESA_PD_GranularGas.cpp
+++ b/apps/benchmarks/GranularGas/MESA_PD_GranularGas.cpp
@@ -329,38 +329,34 @@ int main( int argc, char ** argv )
          SNN(*ps, domain);
          if (bBarrier) WALBERLA_MPI_BARRIER();
          tp["SNN"].end();
-
-         //               if( i % 100 == 0 )
-         //               {
-         //                  WALBERLA_LOG_DEVEL_ON_ROOT( "Timestep " << i << " / " << simulationSteps );
-         //                  SNNBytesSent     = SNN.getBytesSent();
-         //                  SNNBytesReceived = SNN.getBytesReceived();
-         //                  SNNSends         = SNN.getNumberOfSends();
-         //                  SNNReceives      = SNN.getNumberOfReceives();
-         //                  RPBytesSent      = RP.getBytesSent();
-         //                  RPBytesReceived  = RP.getBytesReceived();
-         //                  RPSends          = RP.getNumberOfSends();
-         //                  RPReceives       = RP.getNumberOfReceives();
-         //                  walberla::mpi::reduceInplace(SNNBytesSent, walberla::mpi::SUM);
-         //                  walberla::mpi::reduceInplace(SNNBytesReceived, walberla::mpi::SUM);
-         //                  walberla::mpi::reduceInplace(SNNSends, walberla::mpi::SUM);
-         //                  walberla::mpi::reduceInplace(SNNReceives, walberla::mpi::SUM);
-         //                  walberla::mpi::reduceInplace(RPBytesSent, walberla::mpi::SUM);
-         //                  walberla::mpi::reduceInplace(RPBytesReceived, walberla::mpi::SUM);
-         //                  walberla::mpi::reduceInplace(RPSends, walberla::mpi::SUM);
-         //                  walberla::mpi::reduceInplace(RPReceives, walberla::mpi::SUM);
-         //                  auto cC = walberla::mpi::reduce(contactsChecked, walberla::mpi::SUM);
-         //                  auto cD = walberla::mpi::reduce(contactsDetected, walberla::mpi::SUM);
-         //                  auto cT = walberla::mpi::reduce(contactsTreated, walberla::mpi::SUM);
-         //                  WALBERLA_LOG_DEVEL_ON_ROOT( "SNN bytes communicated:   " << SNNBytesSent << " / " << SNNBytesReceived );
-         //                  WALBERLA_LOG_DEVEL_ON_ROOT( "SNN communication partners: " << SNNSends << " / " << SNNReceives );
-         //                  WALBERLA_LOG_DEVEL_ON_ROOT( "RP bytes communicated:  " << RPBytesSent << " / " << RPBytesReceived );
-         //                  WALBERLA_LOG_DEVEL_ON_ROOT( "RP communication partners: " << RPSends << " / " << RPReceives );
-         //                  WALBERLA_LOG_DEVEL_ON_ROOT( "contacts checked/detected/treated: " << cC << " / " << cD << " / " << cT );
-         //               }
       }
       timer.end();
 
+      SNNBytesSent     = SNN.getBytesSent();
+      SNNBytesReceived = SNN.getBytesReceived();
+      SNNSends         = SNN.getNumberOfSends();
+      SNNReceives      = SNN.getNumberOfReceives();
+      RPBytesSent      = RP.getBytesSent();
+      RPBytesReceived  = RP.getBytesReceived();
+      RPSends          = RP.getNumberOfSends();
+      RPReceives       = RP.getNumberOfReceives();
+      walberla::mpi::reduceInplace(SNNBytesSent, walberla::mpi::SUM);
+      walberla::mpi::reduceInplace(SNNBytesReceived, walberla::mpi::SUM);
+      walberla::mpi::reduceInplace(SNNSends, walberla::mpi::SUM);
+      walberla::mpi::reduceInplace(SNNReceives, walberla::mpi::SUM);
+      walberla::mpi::reduceInplace(RPBytesSent, walberla::mpi::SUM);
+      walberla::mpi::reduceInplace(RPBytesReceived, walberla::mpi::SUM);
+      walberla::mpi::reduceInplace(RPSends, walberla::mpi::SUM);
+      walberla::mpi::reduceInplace(RPReceives, walberla::mpi::SUM);
+      auto cC = walberla::mpi::reduce(contactsChecked, walberla::mpi::SUM);
+      auto cD = walberla::mpi::reduce(contactsDetected, walberla::mpi::SUM);
+      auto cT = walberla::mpi::reduce(contactsTreated, walberla::mpi::SUM);
+      WALBERLA_LOG_DEVEL_ON_ROOT( "SNN bytes communicated:   " << SNNBytesSent << " / " << SNNBytesReceived );
+      WALBERLA_LOG_DEVEL_ON_ROOT( "SNN communication partners: " << SNNSends << " / " << SNNReceives );
+      WALBERLA_LOG_DEVEL_ON_ROOT( "RP bytes communicated:  " << RPBytesSent << " / " << RPBytesReceived );
+      WALBERLA_LOG_DEVEL_ON_ROOT( "RP communication partners: " << RPSends << " / " << RPReceives );
+      WALBERLA_LOG_DEVEL_ON_ROOT( "contacts checked/detected/treated: " << cC << " / " << cD << " / " << cT );
+
       auto timer_reduced = walberla::timing::getReduced(timer, REDUCE_TOTAL, 0);
       double PUpS = 0.0;
       WALBERLA_ROOT_SECTION()
diff --git a/apps/benchmarks/ProbeVsExtraMessage/CMakeLists.txt b/apps/benchmarks/ProbeVsExtraMessage/CMakeLists.txt
index 7330aa0ddb83f973d47071a4dba1a4c03baed242..a5cd53649d6ac5c011c026346d137dbcb862a20c 100644
--- a/apps/benchmarks/ProbeVsExtraMessage/CMakeLists.txt
+++ b/apps/benchmarks/ProbeVsExtraMessage/CMakeLists.txt
@@ -1,2 +1,7 @@
+waLBerla_add_executable ( NAME PackPerformance 
+                          FILES PackPerformance.cpp
+                          DEPENDS core )
+
 waLBerla_add_executable ( NAME ProbeVsExtraMessage 
+                          FILES ProbeVsExtraMessage.cpp
                           DEPENDS core postprocessing stencil )
diff --git a/apps/benchmarks/ProbeVsExtraMessage/PackPerformance.cpp b/apps/benchmarks/ProbeVsExtraMessage/PackPerformance.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6479a3f641778ea859e126450c808febcce0d5e1
--- /dev/null
+++ b/apps/benchmarks/ProbeVsExtraMessage/PackPerformance.cpp
@@ -0,0 +1,112 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PackPerformance.h
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+#include "core/DataTypes.h"
+#include "core/Environment.h"
+#include "core/math/Vector3.h"
+#include "core/mpi/BufferSystem.h"
+#include "core/mpi/MPIManager.h"
+#include "core/timing/TimingPool.h"
+
+#include <array>
+#include <iostream>
+#include <sstream>
+
+namespace walberla {
+
+int main( int /*argc*/, char ** /*argv*/ )
+{
+   const size_t numElements = 100000000;
+   mpi::SendBuffer sb0;
+   mpi::SendBuffer sb1;
+   mpi::SendBuffer sb2;
+   Vector3<real_t> v(1,2,3);
+   WcTimer timer0;
+   WcTimer timer1;
+   WcTimer timer2;
+
+   for (size_t i = 0; i < numElements; ++i)
+   {
+      sb0 << v;
+      sb1 << v;
+      sb2 << v;
+   }
+
+   WALBERLA_LOG_DEVEL_VAR(sb0.size());
+   sb0.clear();
+   sb1.clear();
+   sb2.clear();
+
+   timer0.start();
+   for (size_t i = 0; i < numElements; ++i)
+   {
+      sb0 << v;
+   }
+   timer0.end();
+
+   WALBERLA_LOG_DEVEL_VAR(sb0.size());
+   sb0.clear();
+
+   timer1.start();
+   for (size_t i = 0; i < numElements; ++i)
+   {
+      sb1 << v[0] << v[1] << v[2];
+   }
+   timer1.end();
+
+   WALBERLA_LOG_DEVEL_VAR(sb0.size());
+   sb0.clear();
+
+   timer2.start();
+   for (size_t i = 0; i < numElements; ++i)
+   {
+      auto pos = sb2.forward(sizeof(real_t) * 3);
+      memcpy(pos, v.data(), sizeof(real_t) * 3);
+   }
+   timer2.end();
+
+   WALBERLA_LOG_DEVEL_VAR(sb0.size());
+   sb0.clear();
+
+   //auto ptr0 = sb0.ptr();
+   //auto ptr1 = sb1.ptr();
+   //for (auto i = 0; i < numElements; ++i)
+   //{
+   //   WALBERLA_ASSERT_EQUAL(*ptr0, *ptr1);
+   //   ++ptr0;
+   //   ++ptr1;
+   //}
+
+   WALBERLA_LOG_DEVEL("native:      " << timer0.total());
+   WALBERLA_LOG_DEVEL("elementwise: " << timer1.total());
+   WALBERLA_LOG_DEVEL("memcpy:      " << timer2.total());
+
+   return 0;
+}
+
+} // namespace walberla
+
+int main( int argc, char* argv[] )
+{
+   walberla::mpi::Environment mpiEnv( argc, argv );
+   WALBERLA_UNUSED(mpiEnv);
+
+   return walberla::main( argc, argv );
+}
diff --git a/apps/benchmarks/ProbeVsExtraMessage/ProbeVsExtraMessage.cpp b/apps/benchmarks/ProbeVsExtraMessage/ProbeVsExtraMessage.cpp
index 7eb39be162734f2daa942c53e335a2a97daca2bc..960bf1e9e7efd662d867ffe5c952cdceec9ee776 100644
--- a/apps/benchmarks/ProbeVsExtraMessage/ProbeVsExtraMessage.cpp
+++ b/apps/benchmarks/ProbeVsExtraMessage/ProbeVsExtraMessage.cpp
@@ -37,6 +37,22 @@
 
 namespace walberla {
 
+class CustomBufferSystem : public mpi::BufferSystem
+{
+public:
+   explicit CustomBufferSystem( const MPI_Comm & communicator, int tag = 0 )
+      : mpi::BufferSystem(communicator, tag)
+   {}
+   auto& recvBuffer ( walberla::mpi::MPIRank rank )
+   {
+      auto it = recvInfos_.find(rank);
+      WALBERLA_CHECK_UNEQUAL(it, recvInfos_.end(), recvInfos_.size());
+      return it->second.buffer;
+   }
+
+   auto& getRecvInfos() {return recvInfos_;}
+};
+
 class MPIInfo
 {
 public:
@@ -44,32 +60,41 @@ public:
 
    int getNeighborRank(const stencil::Direction& dir);
 private:
+   shared_ptr<mpi::MPIManager> manager_;
    Vector3<uint_t> procs_;
    Vector3<bool>   periodicity_;
    Vector3<int>    pos_;
+   std::array<int, 27> ranks_;
 };
 
 MPIInfo::MPIInfo( const Vector3<uint_t>& procs, const Vector3<bool>& periodicity )
-   : procs_(procs)
+   : manager_(mpi::MPIManager::instance())
+   , procs_(procs)
    , periodicity_(periodicity)
 {
-   mpi::MPIManager::instance()->createCartesianComm(procs[0], procs[1], procs[2], periodicity[0], periodicity[1], periodicity[2]);
-   mpi::MPIManager::instance()->cartesianCoord(pos_.data());
+   manager_->createCartesianComm(procs[0], procs[1], procs[2], periodicity[0], periodicity[1], periodicity[2]);
+   manager_->cartesianCoord(pos_.data());
+
+   for (auto dirIt = stencil::D3Q27::beginNoCenter(); dirIt != stencil::D3Q27::end(); ++dirIt)
+   {
+      auto neighborCoord = pos_;
+      neighborCoord[0] += stencil::cx[*dirIt];
+      neighborCoord[1] += stencil::cy[*dirIt];
+      neighborCoord[2] += stencil::cz[*dirIt];
+      if (!periodicity_[0] && (neighborCoord[0] < 0)) ranks_[*dirIt] = -1;
+      if (!periodicity_[1] && (neighborCoord[1] < 0)) ranks_[*dirIt] = -1;
+      if (!periodicity_[2] && (neighborCoord[2] < 0)) ranks_[*dirIt] = -1;
+      if (!periodicity_[0] && (neighborCoord[0] >= int_c(procs_[0]))) ranks_[*dirIt] = -1;
+      if (!periodicity_[1] && (neighborCoord[1] >= int_c(procs_[1]))) ranks_[*dirIt] = -1;
+      if (!periodicity_[2] && (neighborCoord[2] >= int_c(procs_[2]))) ranks_[*dirIt] = -1;
+      ranks_[*dirIt] = manager_->cartesianRank(uint_c(neighborCoord[0]), uint_c(neighborCoord[1]), uint_c(neighborCoord[2]));
+   }
 }
 
+inline
 int MPIInfo::getNeighborRank( const stencil::Direction& dir )
 {
-   auto neighborCoord = pos_;
-   neighborCoord[0] += stencil::cx[dir];
-   neighborCoord[1] += stencil::cy[dir];
-   neighborCoord[2] += stencil::cz[dir];
-   if (neighborCoord[0] < 0) return -1;
-   if (neighborCoord[1] < 0) return -1;
-   if (neighborCoord[2] < 0) return -1;
-   if (neighborCoord[0] >= int_c(procs_[0])) return -1;
-   if (neighborCoord[1] >= int_c(procs_[1])) return -1;
-   if (neighborCoord[2] >= int_c(procs_[2])) return -1;
-   return mpi::MPIManager::instance()->cartesianRank(uint_c(neighborCoord[0]), uint_c(neighborCoord[1]), uint_c(neighborCoord[2]));
+   return ranks_[dir];
 }
 
 template <typename Stencil>
@@ -82,15 +107,13 @@ void communicate( MPIInfo& mpiInfo,
    std::vector<char> sendBuf(messageSize);
    std::vector<char> recvBuf(messageSize);
 
-   WcTimer& timer = tp[iProbe ? "IProbe" : "twoMessage"];
-
-   mpi::BufferSystem bs( mpi::MPIManager::instance()->comm() );
+   CustomBufferSystem bs( mpi::MPIManager::instance()->comm() );
    bs.useIProbe(iProbe);
 
    for( uint_t i =0; i < iterations; ++i )
    {
-      timer.start();
-
+      WALBERLA_MPI_BARRIER();
+      tp["pack"].start();
       for (auto dirIt = Stencil::beginNoCenter(); dirIt != Stencil::end(); ++dirIt)
       {
          auto recvRank = mpiInfo.getNeighborRank( *dirIt );
@@ -98,19 +121,29 @@ void communicate( MPIInfo& mpiInfo,
          bs.sendBuffer(recvRank) << sendBuf;
          WALBERLA_ASSERT_EQUAL(bs.sendBuffer(recvRank).size(), messageSize + sizeof(size_t));
       }
+      tp["pack"].end();
 
+      WALBERLA_MPI_BARRIER();
+      tp["communicate"].start();
       bs.setReceiverInfoFromSendBufferState(false, true);
       bs.sendAll();
-
       for( auto it = bs.begin(); it != bs.end(); ++it )
       {
          WALBERLA_ASSERT_EQUAL(it.buffer().size(), messageSize + sizeof(size_t));
-         it.buffer() >> recvBuf;
          WALBERLA_ASSERT_EQUAL(recvBuf.size(), messageSize);
-         WALBERLA_ASSERT(it.buffer().isEmpty());
       }
+      tp["communicate"].end();
 
-      timer.end();
+      WALBERLA_MPI_BARRIER();
+      tp["unpack"].start();
+      auto& recvInfos = bs.getRecvInfos();
+      for (auto recvIt = recvInfos.begin(); recvIt != recvInfos.end(); ++recvIt)
+      {
+         auto& rb = recvIt->second.buffer;
+         rb >> recvBuf;
+         WALBERLA_ASSERT(rb.isEmpty());
+      }
+      tp["unpack"].end();
    }
 }
 
@@ -166,25 +199,27 @@ int main( int argc, char ** argv )
 
    MPIInfo mpiInfo(procs, periodicity);
 
-   WcTimingPool tp;
+   WcTimingPool tp_twoMessages;
+   WcTimingPool tp_probe;
    WALBERLA_MPI_BARRIER();
    if (stencil == "D3Q27")
    {
-      communicate<stencil::D3Q27>(mpiInfo, iterations, messageSize, false, tp);
-      communicate<stencil::D3Q27>(mpiInfo, iterations, messageSize, true, tp);
+      communicate<stencil::D3Q27>(mpiInfo, iterations, messageSize, false, tp_twoMessages);
+      communicate<stencil::D3Q27>(mpiInfo, iterations, messageSize, true, tp_probe);
    } else if (stencil == "D3Q19")
    {
-      communicate<stencil::D3Q19>(mpiInfo, iterations, messageSize, false, tp);
-      communicate<stencil::D3Q19>(mpiInfo, iterations, messageSize, true, tp);
+      communicate<stencil::D3Q19>(mpiInfo, iterations, messageSize, false, tp_twoMessages);
+      communicate<stencil::D3Q19>(mpiInfo, iterations, messageSize, true, tp_probe);
    } else if (stencil == "D3Q7")
    {
-      communicate<stencil::D3Q7>(mpiInfo, iterations, messageSize, false, tp);
-      communicate<stencil::D3Q7>(mpiInfo, iterations, messageSize, true, tp);
+      communicate<stencil::D3Q7>(mpiInfo, iterations, messageSize, false, tp_twoMessages);
+      communicate<stencil::D3Q7>(mpiInfo, iterations, messageSize, true, tp_probe);
    } else
    {
       WALBERLA_ABORT("stencil not supported: " << stencil);
    }
-   WALBERLA_LOG_INFO_ON_ROOT(tp);
+   WALBERLA_LOG_INFO_ON_ROOT(tp_twoMessages);
+   WALBERLA_LOG_INFO_ON_ROOT(tp_probe);
 
    WALBERLA_ROOT_SECTION()
    {
@@ -216,7 +251,8 @@ int main( int argc, char ** argv )
       stringProperties["SLURM_TASKS_PER_NODE"]     = envToString(std::getenv( "SLURM_TASKS_PER_NODE" ));
 
       auto runId = postprocessing::storeRunInSqliteDB( "ProbeVsTwoMessages.sqlite", integerProperties, stringProperties, realProperties );
-      postprocessing::storeTimingPoolInSqliteDB( "ProbeVsTwoMessages.sqlite", runId, tp, "Timings" );
+      postprocessing::storeTimingPoolInSqliteDB( "ProbeVsTwoMessages.sqlite", runId, tp_twoMessages, "twoMessages" );
+      postprocessing::storeTimingPoolInSqliteDB( "ProbeVsTwoMessages.sqlite", runId, tp_probe, "probe" );
    }
 
    return 0;
diff --git a/apps/benchmarks/UniformGridGPU/CMakeLists.txt b/apps/benchmarks/UniformGridGPU/CMakeLists.txt
index 857df4d5f20c4be25a6dacaafd8e3c987a53531f..66497288fd68f32b56467d012b396d78de6cdb5b 100644
--- a/apps/benchmarks/UniformGridGPU/CMakeLists.txt
+++ b/apps/benchmarks/UniformGridGPU/CMakeLists.txt
@@ -10,6 +10,7 @@ waLBerla_python_file_generates(UniformGridGPU.py
         UniformGridGPU_PackInfo.cu UniformGridGPU_PackInfo.h
         UniformGridGPU_MacroSetter.cpp UniformGridGPU_MacroSetter.h
         UniformGridGPU_MacroGetter.cpp UniformGridGPU_MacroGetter.h
+        UniformGridGPU_Defines.h
         )
 
 foreach(config srt trt mrt smagorinsky entropic )
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp b/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
index a413df6934496259c62fb21b3295a5e9d64186e8..9e95c73dd6171fcf93765aa07708bae3467d742a 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
@@ -35,6 +35,7 @@
 #include "UniformGridGPU_Communication.h"
 #include "UniformGridGPU_MacroSetter.h"
 #include "UniformGridGPU_MacroGetter.h"
+#include "UniformGridGPU_Defines.h"
 
 
 using namespace walberla;
@@ -332,7 +333,10 @@ int main( int argc, char **argv )
                   if ( pythonCallbackResults.isCallable())
                   {
                       pythonCallbackResults.data().exposeValue( "mlupsPerProcess", mlupsPerProcess );
-                      pythonCallbackResults.data().exposeValue( "githash", WALBERLA_GIT_SHA1 );
+                      pythonCallbackResults.data().exposeValue( "stencil", infoStencil );
+                      pythonCallbackResults.data().exposeValue( "configName", infoConfigName );
+                      pythonCallbackResults.data().exposeValue( "cse_global", infoCseGlobal );
+                      pythonCallbackResults.data().exposeValue( "cse_pdfs", infoCsePdfs );
                       // Call Python function to report results
                       pythonCallbackResults();
                   }
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
index b95b514b5ed4b29aeacca7f71b38440f5f7d8e0e..d3b9444f7b8c25b0ed5bdb1626bcdd218632e00c 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
@@ -55,6 +55,15 @@ options_dict = {
     }
 }
 
+info_header = """
+#include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q}; 
+const char * infoStencil = "{stencil}";
+const char * infoConfigName = "{configName}";
+const bool infoCseGlobal = {cse_global};
+const bool infoCsePdfs = {cse_pdfs};
+"""
+
+
 with CodeGeneration() as ctx:
     accessor = StreamPullTwoFieldsAccessor()
     #accessor = StreamPushTwoFieldsAccessor()
@@ -109,3 +118,12 @@ with CodeGeneration() as ctx:
 
     # communication
     generate_pack_info_from_kernel(ctx, 'UniformGridGPU_PackInfo', update_rule, target='gpu')
+
+    infoHeaderParams = {
+        'stencil': stencil_str,
+        'q': q,
+        'configName': ctx.config,
+        'cse_global': int(options['optimization']['cse_global']),
+        'cse_pdfs': int(options['optimization']['cse_pdfs']),
+    }
+    ctx.write_file("UniformGridGPU_Defines.h", info_header.format(**infoHeaderParams))
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_AA.cpp b/apps/benchmarks/UniformGridGPU/UniformGridGPU_AA.cpp
index be38e3cb3eb6fcaadaa3caca922776b7b2b6f480..e19413a25583fe1d05e42e6ce9dfb9f61f916214 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_AA.cpp
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_AA.cpp
@@ -225,6 +225,7 @@ int main( int argc, char **argv )
         bool useGui = parameters.getParameter<bool>( "useGui", false );
         if( useGui )
         {
+#ifdef WALBERLA_ENABLE_GUI
             cuda::fieldCpy< PdfField_T, cuda::GPUField< real_t > >( blocks, pdfFieldCpuID, pdfFieldGpuID );
             timeLoop.addFuncAfterTimeStep( cuda::fieldCpyFunctor<PdfField_T, cuda::GPUField<real_t> >( blocks, pdfFieldCpuID, pdfFieldGpuID ), "copy to CPU" );
             GUI gui( timeLoop, blocks, argc, argv);
@@ -235,6 +236,9 @@ int main( int argc, char **argv )
                     return nullptr;
                 });
             gui.run();
+#else
+            WALBERLA_ABORT_NO_DEBUG_INFO("Application was built without GUI. Set useGui to false or re-compile with GUI.")
+#endif
         }
         else
         {
@@ -264,7 +268,10 @@ int main( int argc, char **argv )
                     if ( pythonCallbackResults.isCallable())
                     {
                         pythonCallbackResults.data().exposeValue( "mlupsPerProcess", mlupsPerProcess );
-                        pythonCallbackResults.data().exposeValue( "githash", WALBERLA_GIT_SHA1 );
+                        pythonCallbackResults.data().exposeValue( "stencil", infoStencil );
+                        pythonCallbackResults.data().exposeValue( "configName", infoConfigName );
+                        pythonCallbackResults.data().exposeValue( "cse_global", infoCseGlobal );
+                        pythonCallbackResults.data().exposeValue( "cse_pdfs", infoCsePdfs );
                         // Call Python function to report results
                         pythonCallbackResults();
                     }
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_AA.py b/apps/benchmarks/UniformGridGPU/UniformGridGPU_AA.py
index 105cb48f4800cbc82ba16908025eb9dd120d8777..08d8b875ac232f76dbf1a065a6e7ee4810c8fc66 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_AA.py
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_AA.py
@@ -53,6 +53,16 @@ options_dict = {
     }
 }
 
+
+info_header = """
+#include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q}; 
+const char * infoStencil = "{stencil}";
+const char * infoConfigName = "{configName}";
+const bool infoCseGlobal = {cse_global};
+const bool infoCsePdfs = {cse_pdfs};
+"""
+
+
 with CodeGeneration() as ctx:
     accessors = {
         'Even': AAEvenTimeStepAccessor(),
@@ -105,5 +115,11 @@ with CodeGeneration() as ctx:
     generate_pack_info_from_kernel(ctx, 'UniformGridGPU_AA_PackInfoPull', update_rules['Odd'], kind='pull', target='gpu')
     generate_pack_info_from_kernel(ctx, 'UniformGridGPU_AA_PackInfoPush', update_rules['Odd'], kind='push', target='gpu')
 
-    ctx.write_file("UniformGridGPU_AA_Defines.h",
-                   '#include "stencil/D3Q{0}.h"\nusing Stencil_T = walberla::stencil::D3Q{0}; \n '.format(q))
+    infoHeaderParams = {
+        'stencil': stencil_str,
+        'q': q,
+        'configName': ctx.config,
+        'cse_global': int(options['optimization']['cse_global']),
+        'cse_pdfs': int(options['optimization']['cse_pdfs']),
+    }
+    ctx.write_file("UniformGridGPU_AA_Defines.h", info_header.format(**infoHeaderParams))
diff --git a/apps/tutorials/mesa_pd/01_LennardJones.cpp b/apps/tutorials/mesa_pd/01_LennardJones.cpp
index eec37960f8984f3a66cec47fe8a5db57196529f8..6ed34ccabafa8d588270794ca14a13f64b85793b 100644
--- a/apps/tutorials/mesa_pd/01_LennardJones.cpp
+++ b/apps/tutorials/mesa_pd/01_LennardJones.cpp
@@ -89,15 +89,15 @@ int main( int argc, char ** argv )
       WALBERLA_LOG_DEVEL(timestep);
       linkedCells.clear();
       storage->forEachParticle(true, kernel::SelectAll(), ac, ipilc, ac, linkedCells);
-      storage->forEachParticle(true, kernel::SelectAll(), ac, vvPreForce, ac);
+      storage->forEachParticle(true, kernel::SelectLocal(), ac, vvPreForce, ac);
       linkedCells.forEachParticlePairHalf(true, kernel::SelectAll(), ac, lj, ac);
       const real_t coeff = real_t(0.2);
       storage->forEachParticle(true,
-                               kernel::SelectAll(),
+                               kernel::SelectLocal(),
                                ac,
                                [coeff](const size_t idx, auto& access){ access.setForce(idx, -coeff*access.getPosition(idx) + access.getForce(idx)); },
                                ac);
-      storage->forEachParticle(true, kernel::SelectAll(), ac, vvPostForce, ac);
+      storage->forEachParticle(true, kernel::SelectLocal(), ac, vvPostForce, ac);
       vtkWriter->write();
    }
 
diff --git a/python/mesa_pd/templates/mpi/ReduceProperty.templ.h b/python/mesa_pd/templates/mpi/ReduceProperty.templ.h
index 99eab36d7cb3da7a7a9ce78c53624785b20f8951..38f7340854e17934bc5e867757968566393d658a 100644
--- a/python/mesa_pd/templates/mpi/ReduceProperty.templ.h
+++ b/python/mesa_pd/templates/mpi/ReduceProperty.templ.h
@@ -29,6 +29,7 @@
 #include <mesa_pd/data/DataTypes.h>
 #include <mesa_pd/data/Flags.h>
 #include <mesa_pd/data/ParticleStorage.h>
+#include <mesa_pd/mpi/notifications/reset.h>
 
 #include <core/mpi/BufferSystem.h>
 #include <core/logging/Logging.h>
@@ -100,6 +101,7 @@ void ReduceProperty::operator()(data::ParticleStorage& ps) const
          }
 
          sb << Notification( p );
+         reset<Notification>( p );
       } else
       {
          //local particles should receive the property and sum it up
diff --git a/python/mesa_pd/templates/mpi/notifications/ContactHistoryNotification.templ.h b/python/mesa_pd/templates/mpi/notifications/ContactHistoryNotification.templ.h
index 15ab081fdfb77e9102f4ff4cbf4b03323ec4c0c8..2ffca67a2aeecd4d4768ad6c4685bf426652afa7 100644
--- a/python/mesa_pd/templates/mpi/notifications/ContactHistoryNotification.templ.h
+++ b/python/mesa_pd/templates/mpi/notifications/ContactHistoryNotification.templ.h
@@ -28,6 +28,7 @@
 
 #include <mesa_pd/data/ContactHistory.h>
 #include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/mpi/notifications/reset.h>
 
 #include <core/mpi/BufferDataTypeExtensions.h>
 #include <core/mpi/Datatype.h>
@@ -56,6 +57,12 @@ public:
    const data::Particle& p_;
 };
 
+template <>
+void reset<ContactHistoryNotification>(data::Particle& p)
+{
+   p.setNewContactHistory(std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>());
+}
+
 void reduce(data::Particle&& p, const ContactHistoryNotification::Parameters& objparam)
 {
    auto& ch = p.getNewContactHistoryRef();
diff --git a/python/mesa_pd/templates/mpi/notifications/ForceTorqueNotification.templ.h b/python/mesa_pd/templates/mpi/notifications/ForceTorqueNotification.templ.h
index f00db9e420a65647933fc69bd5015fe144bcd873..6bfa6412b1a7d5e1d8661c36868f14b76192f69a 100644
--- a/python/mesa_pd/templates/mpi/notifications/ForceTorqueNotification.templ.h
+++ b/python/mesa_pd/templates/mpi/notifications/ForceTorqueNotification.templ.h
@@ -29,6 +29,7 @@
 #include <mesa_pd/data/DataTypes.h>
 #include <mesa_pd/data/ParticleStorage.h>
 #include <mesa_pd/mpi/notifications/NotificationType.h>
+#include <mesa_pd/mpi/notifications/reset.h>
 
 #include <core/mpi/Datatype.h>
 #include <core/mpi/RecvBuffer.h>
@@ -55,6 +56,13 @@ public:
    const data::Particle& p_;
 };
 
+template <>
+void reset<ForceTorqueNotification>(data::Particle& p)
+{
+   p.setForce(  Vec3(real_t(0)) );
+   p.setTorque( Vec3(real_t(0)) );
+}
+
 void reduce(data::Particle&& p, const ForceTorqueNotification::Parameters& objparam)
 {
    p.getForceRef()  += objparam.force_;
diff --git a/src/core/math/GenericAABB.h b/src/core/math/GenericAABB.h
index 4664a48a3e821e356488629fbf70fa2b3fbda897..9b321e738f53548dba55094722425937967bef12 100644
--- a/src/core/math/GenericAABB.h
+++ b/src/core/math/GenericAABB.h
@@ -196,7 +196,10 @@ public:
    inline friend mpi::GenericRecvBuffer< ET > & operator>>( mpi::GenericRecvBuffer< ET > & buf, GenericAABB< T > & aabb )
    {
       buf.readDebugMarker( "bb" );
-      buf >> aabb.minCorner_ >> aabb.maxCorner_;
+      static_assert ( std::is_trivially_copyable< GenericAABB< T > >::value,
+                      "type has to be trivially copyable for the memcpy to work correctly" );
+      auto pos = buf.skip(sizeof(GenericAABB< T >));
+      std::memcpy(&aabb, pos, sizeof(GenericAABB< T >));
       WALBERLA_ASSERT( aabb.checkInvariant() );
       return buf;
    }
diff --git a/src/core/math/GenericAABB.impl.h b/src/core/math/GenericAABB.impl.h
index 7d24f164f2fbae77cbb715d3e4ac2fb35ba987ff..1fae800a88c6f7bcfb1d7a2b24cc9a30411c8066 100644
--- a/src/core/math/GenericAABB.impl.h
+++ b/src/core/math/GenericAABB.impl.h
@@ -1883,7 +1883,11 @@ template< typename T,    // Element type of SendBuffer
 mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const GenericAABB< VT > & aabb )
 {
    buf.addDebugMarker( "bb" );
-   return buf << aabb.minCorner() << aabb.maxCorner();
+   static_assert ( std::is_trivially_copyable< GenericAABB< VT > >::value,
+                   "type has to be trivially copyable for the memcpy to work correctly" );
+   auto pos = buf.forward(sizeof(GenericAABB< VT >));
+   std::memcpy(pos, &aabb, sizeof(GenericAABB< VT >));
+   return buf;
 }
 
 
diff --git a/src/core/math/Matrix2.h b/src/core/math/Matrix2.h
index 8e0504359bb722f912d159557034c5bdab9773e6..e3d8893333b3e8ba27f59b3565d551379b8fd774 100644
--- a/src/core/math/Matrix2.h
+++ b/src/core/math/Matrix2.h
@@ -958,9 +958,11 @@ template< typename T,    // Element type of SendBuffer
           typename MT >  // Element type of matrix
 mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Matrix2<MT> & m )
 {
-   for(unsigned int i=0; i<4; ++i)
-      buf << m[i];
-
+   buf.addDebugMarker( "m2" );
+   static_assert ( std::is_trivially_copyable< Matrix2<MT> >::value,
+                   "type has to be trivially copyable for the memcpy to work correctly" );
+   auto pos = buf.forward(sizeof(Matrix2<MT>));
+   std::memcpy(pos, &m, sizeof(Matrix2<MT>));
    return buf;
 }
 
@@ -968,9 +970,12 @@ template< typename T,    // Element type  of RecvBuffer
           typename MT >  // Element type of matrix
 mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Matrix2<MT> & m )
 {
-   for(unsigned int i=0; i<4; ++i)
-      buf >> m[i];
-
+   buf.readDebugMarker( "m2" );
+   static_assert ( std::is_trivially_copyable< Matrix2<MT> >::value,
+                   "type has to be trivially copyable for the memcpy to work correctly" );
+   auto pos = buf.skip(sizeof(Matrix2<MT>));
+   //suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
+   std::memcpy(static_cast<void*>(&m), pos, sizeof(Matrix2<MT>));
    return buf;
 }
 
diff --git a/src/core/math/Matrix3.h b/src/core/math/Matrix3.h
index 311a041200f49c08bb21c4f456c382dc2d0fb75b..88c7818b7926b05747cdaa09578aa8bbc32c6cb1 100644
--- a/src/core/math/Matrix3.h
+++ b/src/core/math/Matrix3.h
@@ -35,6 +35,7 @@
 #include <type_traits>
 
 #include <algorithm>
+#include <array>
 #include <cmath>
 #include <iostream>
 #include <limits>
@@ -174,6 +175,7 @@ public:
    template< typename Other > inline const Vector3<HIGH> solve( const Vector3<Other> &rhs )         const;
                               inline Type                trace()                                    const;
                               inline Type*               data()                                     {return v_;}
+                              inline Type const *        data()                                     const {return v_;}
    //@}
    //*******************************************************************************************************************
 
@@ -246,9 +248,9 @@ private:
     * 6 & 7 & 8 \\
     * \end{array}\right)\f]
    **/
-   Type v_[9] = {Type(1), Type(0), Type(0),
-                 Type(0), Type(1), Type(0),
-                 Type(0), Type(0), Type(1)};
+   std::array<Type, 9> v_ = {{Type(1), Type(0), Type(0),
+                              Type(0), Type(1), Type(0),
+                              Type(0), Type(0), Type(1)}};
    //@}
    //*******************************************************************************************************************
 };
@@ -1769,9 +1771,11 @@ namespace mpi {
                 typename MT >  // Element type of matrix
       mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Matrix3<MT> & m )
       {
-         for(unsigned int i=0; i<9; ++i)
-            buf << m[i];
-
+         buf.addDebugMarker( "m3" );
+         static_assert ( std::is_trivially_copyable< Matrix3<MT> >::value,
+                         "type has to be trivially copyable for the memcpy to work correctly" );
+         auto pos = buf.forward(sizeof(Matrix3<MT>));
+         std::memcpy(pos, &m, sizeof(Matrix3<MT>));
          return buf;
       }
 
@@ -1779,9 +1783,12 @@ namespace mpi {
                 typename MT >  // Element type of matrix
       mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Matrix3<MT> & m )
       {
-         for(unsigned int i=0; i<9; ++i)
-            buf >> m[i];
-
+         buf.readDebugMarker( "m3" );
+         static_assert ( std::is_trivially_copyable< Matrix3<MT> >::value,
+                         "type has to be trivially copyable for the memcpy to work correctly" );
+         auto pos = buf.skip(sizeof(Matrix3<MT>));
+         //suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
+         std::memcpy(static_cast<void*>(&m), pos, sizeof(Matrix3<MT>));
          return buf;
       }
 
diff --git a/src/core/math/Quaternion.h b/src/core/math/Quaternion.h
index dd3a4298ab22a0fc814ab65a7e27b90321a5a888..90fd652de353026cab2612aed1be1570d754ea66 100644
--- a/src/core/math/Quaternion.h
+++ b/src/core/math/Quaternion.h
@@ -161,6 +161,8 @@ public:
                               inline void                       rotateZ( Type angle );
                               inline void                       swap( Quaternion& q ) /* throw() */;
                               inline const Vector3<Type>        getEulerAnglesXYZ() const;
+                              inline Type*                      data()                         {return v_;}
+                              inline Type const *               data()                         const {return v_;}
    //@}
    //**********************************************************************************************
 
@@ -1097,7 +1099,10 @@ namespace mpi {
    mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const math::Quaternion<VT> & quat )
    {
       buf.addDebugMarker( "q4" );
-      buf << quat[0] << quat[1] << quat[2] << quat[3];
+      static_assert ( std::is_trivially_copyable< math::Quaternion<VT> >::value,
+                      "type has to be trivially copyable for the memcpy to work correctly" );
+      auto pos = buf.forward(sizeof(math::Quaternion<VT>));
+      std::memcpy(pos, &quat, sizeof(math::Quaternion<VT>));
       return buf;
    }
 
@@ -1106,9 +1111,11 @@ namespace mpi {
    mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, math::Quaternion<VT> & quat )
    {
       buf.readDebugMarker( "q4" );
-      VT tmp1, tmp2, tmp3, tmp4;
-      buf >> tmp1 >> tmp2 >> tmp3 >> tmp4;
-      quat.set(tmp1, tmp2, tmp3, tmp4);
+      static_assert ( std::is_trivially_copyable< math::Quaternion<VT> >::value,
+                      "type has to be trivially copyable for the memcpy to work correctly" );
+      auto pos = buf.skip(sizeof(math::Quaternion<VT>));
+      //suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
+      std::memcpy(static_cast<void*>(&quat), pos, sizeof(math::Quaternion<VT>));
       return buf;
    }
 
diff --git a/src/core/math/Vector2.h b/src/core/math/Vector2.h
index c9b9dba4be0d8c5e38fc9296ed037e47ced1559c..b62d58f4651a743e33a0b9c6f0e4f7c860d174dd 100644
--- a/src/core/math/Vector2.h
+++ b/src/core/math/Vector2.h
@@ -160,6 +160,7 @@ public:
    inline Type            sqrLength()                    const;
    inline Vector2<Length> getNormalized()                const;
    inline Type*           data()                         {return v_;}
+   inline Type const *    data()                         const {return v_;}
    //@}
    //*******************************************************************************************************************
 
@@ -1609,7 +1610,10 @@ namespace mpi {
    mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Vector2<VT> & vec )
    {
       buf.addDebugMarker( "v2" );
-      buf << vec[0] << vec[1];
+      static_assert ( std::is_trivially_copyable< Vector2<VT> >::value,
+                      "type has to be trivially copyable for the memcpy to work correctly" );
+      auto pos = buf.forward(sizeof(Vector2<VT>));
+      std::memcpy(pos, &vec, sizeof(Vector2<VT>));
       return buf;
    }
 
@@ -1618,7 +1622,11 @@ namespace mpi {
    mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Vector2<VT> & vec )
    {
       buf.readDebugMarker( "v2" );
-      buf >> vec[0] >> vec[1] ;
+      static_assert ( std::is_trivially_copyable< Vector2<VT> >::value,
+                      "type has to be trivially copyable for the memcpy to work correctly" );
+      auto pos = buf.skip(sizeof(Vector2<VT>));
+      //suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
+      std::memcpy(static_cast<void*>(&vec), pos, sizeof(Vector2<VT>));
       return buf;
    }
 
diff --git a/src/core/math/Vector3.h b/src/core/math/Vector3.h
index c5ec3e7b8ccb7840fc59905c7d6eaa081661f4f2..267e90c38f7b9752820a8397d7729c36134e3c96 100644
--- a/src/core/math/Vector3.h
+++ b/src/core/math/Vector3.h
@@ -166,6 +166,7 @@ public:
    inline Vector3<Length> getNormalizedOrZero()          const;
    inline void            reset();
    inline Type*           data()                         {return v_;}
+   inline Type const *    data()                         const {return v_;}
    //@}
    //*******************************************************************************************************************
 
@@ -1864,7 +1865,10 @@ namespace mpi {
    mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const Vector3<VT> & vec )
    {
       buf.addDebugMarker( "v3" );
-      buf << vec[0] << vec[1] << vec[2];
+      static_assert ( std::is_trivially_copyable< Vector3<VT> >::value,
+                      "type has to be trivially copyable for the memcpy to work correctly" );
+      auto pos = buf.forward(sizeof(Vector3<VT>));
+      std::memcpy(pos, &vec, sizeof(Vector3<VT>));
       return buf;
    }
 
@@ -1873,7 +1877,11 @@ namespace mpi {
    mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, Vector3<VT> & vec )
    {
       buf.readDebugMarker( "v3" );
-      buf >> vec[0] >> vec[1] >> vec[2];
+      static_assert ( std::is_trivially_copyable< Vector3<VT> >::value,
+                      "type has to be trivially copyable for the memcpy to work correctly" );
+      auto pos = buf.skip(sizeof(Vector3<VT>));
+      //suppress https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html#index-Wclass-memaccess
+      std::memcpy(static_cast<void*>(&vec), pos, sizeof(Vector3<VT>));
       return buf;
    }
 
diff --git a/src/core/mpi/BufferSystem.impl.h b/src/core/mpi/BufferSystem.impl.h
index 8a4185dd5de991eb85137ed5855ea53104773c1c..f3d7539865e2d8894cda86492f67463006fe0513 100644
--- a/src/core/mpi/BufferSystem.impl.h
+++ b/src/core/mpi/BufferSystem.impl.h
@@ -417,6 +417,11 @@ void GenericBufferSystem<Rb, Sb>::send( MPIRank rank )
 template< typename Rb, typename Sb>
 void GenericBufferSystem<Rb, Sb>::startCommunication()
 {
+   // Clear receive buffers
+   for( auto iter = recvInfos_.begin(); iter != recvInfos_.end(); ++iter )  {
+      iter->second.buffer.clear();
+   }
+
    const auto tag = currentComm_->getTag();
    WALBERLA_CHECK_EQUAL(activeTags_.find(tag), activeTags_.end(),
                         "Another communication with the same MPI tag is currently in progress.");
@@ -457,11 +462,6 @@ void GenericBufferSystem<Rb, Sb>::endCommunication()
       iter->second.buffer.clear();
    }
 
-   // Clear receive buffers
-   for( auto iter = recvInfos_.begin(); iter != recvInfos_.end(); ++iter )  {
-      iter->second.buffer.clear();
-   }
-
 
    if( !sizeChangesEverytime_ )
       setCommunicationType( true );
diff --git a/src/mesa_pd/mpi/ReduceProperty.h b/src/mesa_pd/mpi/ReduceProperty.h
index 22ca078f13da4dbf19e0ad6b12b1c556b532326a..7f55c334973729778ca7dd87ada38ed175a522e7 100644
--- a/src/mesa_pd/mpi/ReduceProperty.h
+++ b/src/mesa_pd/mpi/ReduceProperty.h
@@ -29,6 +29,7 @@
 #include <mesa_pd/data/DataTypes.h>
 #include <mesa_pd/data/Flags.h>
 #include <mesa_pd/data/ParticleStorage.h>
+#include <mesa_pd/mpi/notifications/reset.h>
 
 #include <core/mpi/BufferSystem.h>
 #include <core/logging/Logging.h>
@@ -100,6 +101,7 @@ void ReduceProperty::operator()(data::ParticleStorage& ps) const
          }
 
          sb << Notification( p );
+         reset<Notification>( p );
       } else
       {
          //local particles should receive the property and sum it up
diff --git a/src/mesa_pd/mpi/notifications/ContactHistoryNotification.h b/src/mesa_pd/mpi/notifications/ContactHistoryNotification.h
index c306f6c8977d174203aaea6f1042731ccdf561c1..b177f313c61d1c522a116c83781109275f2c0c07 100644
--- a/src/mesa_pd/mpi/notifications/ContactHistoryNotification.h
+++ b/src/mesa_pd/mpi/notifications/ContactHistoryNotification.h
@@ -28,6 +28,7 @@
 
 #include <mesa_pd/data/ContactHistory.h>
 #include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/mpi/notifications/reset.h>
 
 #include <core/mpi/BufferDataTypeExtensions.h>
 #include <core/mpi/Datatype.h>
@@ -56,6 +57,12 @@ public:
    const data::Particle& p_;
 };
 
+template <>
+void reset<ContactHistoryNotification>(data::Particle& p)
+{
+   p.setNewContactHistory(std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>());
+}
+
 void reduce(data::Particle&& p, const ContactHistoryNotification::Parameters& objparam)
 {
    auto& ch = p.getNewContactHistoryRef();
diff --git a/src/mesa_pd/mpi/notifications/ForceTorqueNotification.h b/src/mesa_pd/mpi/notifications/ForceTorqueNotification.h
index c46f58724790f804b51b44fe966f5616efdaac76..d19d0d084635528194d048820d9d70785d03940a 100644
--- a/src/mesa_pd/mpi/notifications/ForceTorqueNotification.h
+++ b/src/mesa_pd/mpi/notifications/ForceTorqueNotification.h
@@ -29,6 +29,7 @@
 #include <mesa_pd/data/DataTypes.h>
 #include <mesa_pd/data/ParticleStorage.h>
 #include <mesa_pd/mpi/notifications/NotificationType.h>
+#include <mesa_pd/mpi/notifications/reset.h>
 
 #include <core/mpi/Datatype.h>
 #include <core/mpi/RecvBuffer.h>
@@ -55,6 +56,13 @@ public:
    const data::Particle& p_;
 };
 
+template <>
+void reset<ForceTorqueNotification>(data::Particle& p)
+{
+   p.setForce(  Vec3(real_t(0)) );
+   p.setTorque( Vec3(real_t(0)) );
+}
+
 void reduce(data::Particle&& p, const ForceTorqueNotification::Parameters& objparam)
 {
    p.getForceRef()  += objparam.force_;
diff --git a/src/mesa_pd/mpi/notifications/reset.h b/src/mesa_pd/mpi/notifications/reset.h
new file mode 100644
index 0000000000000000000000000000000000000000..24bab4533bacbfba9695fb4927136112da31b47e
--- /dev/null
+++ b/src/mesa_pd/mpi/notifications/reset.h
@@ -0,0 +1,35 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file reset.h
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <mesa_pd/data/ParticleStorage.h>
+
+namespace walberla {
+namespace mesa_pd {
+
+template <class Notification>
+void reset(data::Particle& /*p*/)
+{
+   WALBERLA_ABORT("not implemented!");
+}
+
+} // mpi
+} // walberla
diff --git a/tests/core/math/GenericAABBTest.cpp b/tests/core/math/GenericAABBTest.cpp
index ee6db55766e9528b00de3a7a6bb323ca0677ee3b..f1d386ade803ce23909bf47a5df4f6b1307618d2 100644
--- a/tests/core/math/GenericAABBTest.cpp
+++ b/tests/core/math/GenericAABBTest.cpp
@@ -102,7 +102,7 @@ void testNonEmptyAABB( const GenericAABB< T > & aabb )
    WALBERLA_CHECK_EQUAL( tmpAABB, aabb.getIntersection( intersectingBox ) );
    WALBERLA_CHECK_EQUAL( aabb.getIntersection( intersectingBox ), tmpAABB );
    WALBERLA_CHECK_FLOAT_EQUAL( tmpAABB.volume(), aabb.volume() / T(8) );
-   WALBERLA_CHECK_IDENTICAL( tmpAABB.volume(), aabb.intersectionVolume( intersectingBox ) );
+   WALBERLA_CHECK_FLOAT_EQUAL( tmpAABB.volume(), aabb.intersectionVolume( intersectingBox ) );
 
    intersectingBox.init( aabb.minCorner() + aabb.sizes(), aabb.maxCorner() + aabb.sizes() );
    tmpAABB = aabb;
@@ -231,7 +231,7 @@ void testAnyAABB( const GenericAABB< T > & aabb )
 
    WALBERLA_CHECK( aabb.intersectsClosedInterval( aabb ) );
    WALBERLA_CHECK_EQUAL( aabb.getIntersection( aabb ), aabb );
-   WALBERLA_CHECK_IDENTICAL( aabb.intersectionVolume( aabb ), aabb.volume() );
+   WALBERLA_CHECK_FLOAT_EQUAL( aabb.intersectionVolume( aabb ), aabb.volume() );
 
    WALBERLA_CHECK( aabb.isIdentical( aabb ) );
    WALBERLA_CHECK( aabb.isEqual( aabb ) );
diff --git a/tests/mesa_pd/mpi/ReduceProperty.cpp b/tests/mesa_pd/mpi/ReduceProperty.cpp
index 300fc5dc106f938b619b810ddba72e8a7738820f..50c5b77836e842f6ba88cb4695c0bccf87c5389d 100644
--- a/tests/mesa_pd/mpi/ReduceProperty.cpp
+++ b/tests/mesa_pd/mpi/ReduceProperty.cpp
@@ -95,7 +95,7 @@ void main( int argc, char ** argv )
       WALBERLA_CHECK_FLOAT_EQUAL( pIt->getForce(), Vec3(real_t(28)) );
    } else
    {
-      WALBERLA_CHECK_FLOAT_EQUAL( pIt->getForce(), Vec3(real_t(walberla::mpi::MPIManager::instance()->rank())) );
+      WALBERLA_CHECK_FLOAT_EQUAL( pIt->getForce(), Vec3(0) );
    }
 }