diff --git a/apps/benchmarks/CMakeLists.txt b/apps/benchmarks/CMakeLists.txt
index 02a864055a0535e3148ffa222750b760e356da05..ff8c9388f2638f862deb1c595eb2fe254b1ba772 100644
--- a/apps/benchmarks/CMakeLists.txt
+++ b/apps/benchmarks/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_subdirectory( CouetteFlow )
+add_subdirectory( ForcesOnSphereNearPlaneInShearFlow )
 add_subdirectory( NonUniformGrid )
 add_subdirectory( MotionSingleHeavySphere )
 add_subdirectory( PoiseuilleChannel )
diff --git a/apps/benchmarks/ForcesOnSphereNearPlaneInShearFlow/CMakeLists.txt b/apps/benchmarks/ForcesOnSphereNearPlaneInShearFlow/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2c67b31abd4ce5a1c17ac69a6dde65ad1ae77ef
--- /dev/null
+++ b/apps/benchmarks/ForcesOnSphereNearPlaneInShearFlow/CMakeLists.txt
@@ -0,0 +1,2 @@
+waLBerla_add_executable ( NAME ForcesOnSphereNearPlaneInShearFlow
+                          DEPENDS blockforest boundary core domain_decomposition field lbm pe pe_coupling postprocessing timeloop vtk )
\ No newline at end of file
diff --git a/apps/benchmarks/ForcesOnSphereNearPlaneInShearFlow/ForcesOnSphereNearPlaneInShearFlow.cpp b/apps/benchmarks/ForcesOnSphereNearPlaneInShearFlow/ForcesOnSphereNearPlaneInShearFlow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c14c38922132ea4da7d0940646583e0fc2799246
--- /dev/null
+++ b/apps/benchmarks/ForcesOnSphereNearPlaneInShearFlow/ForcesOnSphereNearPlaneInShearFlow.cpp
@@ -0,0 +1,811 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file ForcesOnSphereNearPlaneInShearFlow.cpp
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/Initialization.h"
+#include "blockforest/communication/UniformBufferedScheme.h"
+
+#include "boundary/all.h"
+
+#include "core/DataTypes.h"
+#include "core/Environment.h"
+#include "core/SharedFunctor.h"
+#include "core/debug/Debug.h"
+#include "core/debug/TestSubsystem.h"
+#include "core/math/all.h"
+#include "core/timing/RemainingTimeLogger.h"
+
+#include "domain_decomposition/SharedSweep.h"
+
+#include "field/AddToStorage.h"
+#include "field/StabilityChecker.h"
+#include "field/communication/PackInfo.h"
+
+#include "lbm/boundary/all.h"
+#include "lbm/communication/PdfFieldPackInfo.h"
+#include "lbm/field/AddToStorage.h"
+#include "lbm/field/PdfField.h"
+#include "lbm/lattice_model/D3Q19.h"
+#include "lbm/refinement/all.h"
+#include "lbm/sweeps/CellwiseSweep.h"
+#include "lbm/sweeps/SweepWrappers.h"
+
+#include "pe/basic.h"
+#include "pe/vtk/BodyVtkOutput.h"
+#include "pe/vtk/SphereVtkOutput.h"
+#include "pe/cr/ICR.h"
+#include "pe/Types.h"
+
+#include "pe_coupling/mapping/all.h"
+#include "pe_coupling/momentum_exchange_method/all.h"
+#include "pe_coupling/utility/all.h"
+
+#include "timeloop/SweepTimeloop.h"
+
+#include "vtk/all.h"
+#include "field/vtk/all.h"
+#include "lbm/vtk/all.h"
+
+namespace forces_on_sphere_near_plane_in_shear_flow
+{
+
+///////////
+// USING //
+///////////
+
+using namespace walberla;
+using walberla::uint_t;
+
+//////////////
+// TYPEDEFS //
+//////////////
+
+// PDF field, flag field & body field
+typedef lbm::D3Q19< lbm::collision_model::TRT, false >  LatticeModel_T;
+typedef LatticeModel_T::Stencil          Stencil_T;
+typedef lbm::PdfField< LatticeModel_T > PdfField_T;
+
+typedef walberla::uint8_t                 flag_t;
+typedef FlagField< flag_t >               FlagField_T;
+typedef GhostLayerField< pe::BodyID, 1 >  BodyField_T;
+
+const uint_t FieldGhostLayers = 4;
+
+// boundary handling
+typedef pe_coupling::SimpleBB< LatticeModel_T, FlagField_T > MO_SBB_T;
+typedef pe_coupling::CurvedLinear< LatticeModel_T, FlagField_T > MO_CLI_T;
+
+typedef boost::tuples::tuple< MO_SBB_T, MO_CLI_T > BoundaryConditions_T;
+typedef BoundaryHandling< FlagField_T, Stencil_T, BoundaryConditions_T > BoundaryHandling_T;
+
+typedef boost::tuple< pe::Sphere, pe::Plane > BodyTypeTuple;
+
+///////////
+// FLAGS //
+///////////
+
+const FlagUID Fluid_Flag( "fluid" );
+const FlagUID MO_SBB_Flag( "moving obstacle SBB" );
+const FlagUID MO_CLI_Flag( "moving obstacle CLI" );
+
+
+/////////////////////
+// BLOCK STRUCTURE //
+/////////////////////
+
+static void refinementSelection( SetupBlockForest& forest, uint_t levels, AABB refinementBox )
+{
+   real_t dx = real_t(1); // dx on finest level
+   for( auto block = forest.begin(); block != forest.end(); ++block )
+   {
+      uint_t blockLevel = block->getLevel();
+      uint_t levelScalingFactor = ( uint_t(1) << (levels - uint_t(1) - blockLevel) );
+      real_t dxOnLevel = dx * real_c(levelScalingFactor);
+      AABB blockAABB = block->getAABB();
+
+      // extend block AABB by ghostlayers
+      AABB extendedBlockAABB = blockAABB.getExtended( dxOnLevel * real_c(FieldGhostLayers) );
+
+      if( extendedBlockAABB.intersects( refinementBox ) )
+         if( blockLevel < ( levels - uint_t(1) ) )
+            block->setMarker( true );
+   }
+}
+
+static void workloadAndMemoryAssignment( SetupBlockForest& forest )
+{
+   for( auto block = forest.begin(); block != forest.end(); ++block )
+   {
+      block->setWorkload( numeric_cast< workload_t >( uint_t(1) << block->getLevel() ) );
+      block->setMemory( numeric_cast< memory_t >(1) );
+   }
+}
+
+static shared_ptr< StructuredBlockForest > createBlockStructure( const AABB & domainAABB, Vector3<uint_t> blockSizeInCells,
+                                                                 uint_t numberOfLevels, real_t diameter, Vector3<real_t> initialPosition,
+                                                                 bool useLargeRefinementRegion,
+                                                                 bool keepGlobalBlockInformation = false )
+{
+   SetupBlockForest sforest;
+
+   Vector3<uint_t> numberOfFineBlocksPerDirection( uint_c(domainAABB.size(0)) / blockSizeInCells[0],
+                                                   uint_c(domainAABB.size(1)) / blockSizeInCells[1],
+                                                   uint_c(domainAABB.size(2)) / blockSizeInCells[2] );
+
+   for(uint_t i = 0; i < 3; ++i )
+   {
+      WALBERLA_CHECK_EQUAL( numberOfFineBlocksPerDirection[i] * blockSizeInCells[i], uint_c(domainAABB.size(i)),
+                            "Domain can not be decomposed in direction " << i << " into fine blocks of size " << blockSizeInCells[i] );
+   }
+
+   uint_t levelScalingFactor = ( uint_t(1) << ( numberOfLevels - uint_t(1) ) );
+   Vector3<uint_t> numberOfCoarseBlocksPerDirection( numberOfFineBlocksPerDirection / levelScalingFactor );
+
+   for(uint_t i = 0; i < 3; ++i )
+   {
+      WALBERLA_CHECK_EQUAL(numberOfCoarseBlocksPerDirection[i] * levelScalingFactor, numberOfFineBlocksPerDirection[i],
+                            "Domain can not be refined in direction " << i << " according to the specified number of levels!" );
+   }
+
+   AABB refinementBox;
+   if( useLargeRefinementRegion ) {
+      // refinement area is the complete area along the bottom plane, containing the sphere
+      // this avoids refinement borders in flow direction
+      refinementBox = AABB(domainAABB.xMin(), domainAABB.yMin(), domainAABB.zMin(),
+                           domainAABB.xMax(), domainAABB.yMax(), initialPosition[2] + real_t(0.5) * diameter);
+   } else{
+      // refinement area is just around the sphere
+      refinementBox = AABB(initialPosition[0] - real_t(0.5) * diameter, initialPosition[1] - real_t(0.5) * diameter, domainAABB.zMin(),
+                           initialPosition[0] + real_t(0.5) * diameter, initialPosition[1] + real_t(0.5) * diameter, initialPosition[2] + real_t(0.5) * diameter);
+   }
+
+   WALBERLA_LOG_INFO_ON_ROOT(" - refinement box: " << refinementBox);
+
+   sforest.addRefinementSelectionFunction( boost::bind( refinementSelection, _1, numberOfLevels, refinementBox ) );
+   sforest.addWorkloadMemorySUIDAssignmentFunction( workloadAndMemoryAssignment );
+
+   sforest.init( domainAABB, numberOfCoarseBlocksPerDirection[0], numberOfCoarseBlocksPerDirection[1], numberOfCoarseBlocksPerDirection[2], true, true, false );
+
+   // calculate process distribution
+   const memory_t memoryLimit = math::Limits< memory_t >::inf();
+
+   sforest.balanceLoad( blockforest::StaticLevelwiseCurveBalance(true), uint_c( MPIManager::instance()->numProcesses() ), real_t(0), memoryLimit, true );
+
+   WALBERLA_LOG_INFO_ON_ROOT( sforest );
+
+   MPIManager::instance()->useWorldComm();
+
+   // create StructuredBlockForest (encapsulates a newly created BlockForest)
+   shared_ptr< StructuredBlockForest > sbf =
+         make_shared< StructuredBlockForest >( make_shared< BlockForest >( uint_c( MPIManager::instance()->rank() ), sforest, keepGlobalBlockInformation ),
+                                               blockSizeInCells[0], blockSizeInCells[1], blockSizeInCells[2]);
+   sbf->createCellBoundingBoxes();
+
+   return sbf;
+}
+
+/////////////////////////////////////
+// BOUNDARY HANDLING CUSTOMIZATION //
+/////////////////////////////////////
+class MyBoundaryHandling
+{
+public:
+
+   MyBoundaryHandling( const BlockDataID & flagFieldID, const BlockDataID & pdfFieldID, const BlockDataID & bodyFieldID ) :
+      flagFieldID_( flagFieldID ), pdfFieldID_( pdfFieldID ), bodyFieldID_ ( bodyFieldID ) {}
+
+   BoundaryHandling_T * operator()( IBlock* const block, const StructuredBlockStorage* const storage ) const;
+
+private:
+
+   const BlockDataID flagFieldID_;
+   const BlockDataID pdfFieldID_;
+   const BlockDataID bodyFieldID_;
+
+
+}; // class MyBoundaryHandling
+
+
+BoundaryHandling_T * MyBoundaryHandling::operator()( IBlock * const block, const StructuredBlockStorage * const storage ) const
+{
+   WALBERLA_ASSERT_NOT_NULLPTR( block );
+   WALBERLA_ASSERT_NOT_NULLPTR( storage );
+
+   FlagField_T * flagField       = block->getData< FlagField_T >( flagFieldID_ );
+   PdfField_T *  pdfField        = block->getData< PdfField_T > ( pdfFieldID_ );
+   BodyField_T * bodyField       = block->getData< BodyField_T >( bodyFieldID_ );
+
+   const auto fluid = flagField->flagExists( Fluid_Flag ) ? flagField->getFlag( Fluid_Flag ) : flagField->registerFlag( Fluid_Flag );
+
+   BoundaryHandling_T * handling = new BoundaryHandling_T( "moving obstacle boundary handling", flagField, fluid,
+         boost::tuples::make_tuple( MO_SBB_T( "MO_SBB", MO_SBB_Flag, pdfField, flagField, bodyField, fluid, *storage, *block ),
+                                    MO_CLI_T( "MO_CLI", MO_CLI_Flag, pdfField, flagField, bodyField, fluid, *storage, *block ) ) );
+
+   // boundary conditions are set by mapping the (moving) planes into the domain
+
+   handling->fillWithDomain( FieldGhostLayers );
+
+   return handling;
+}
+//*******************************************************************************************************************
+
+
+class SpherePropertyLogger
+{
+public:
+   SpherePropertyLogger( const shared_ptr< StructuredBlockStorage > & blocks,
+                         const BlockDataID & bodyStorageID,
+                         const std::string & fileName, bool fileIO,
+                         real_t dragNormalizationFactor, real_t liftNormalizationFactor, real_t physicalTimeScale ) :
+      blocks_( blocks ), bodyStorageID_( bodyStorageID ),
+      fileName_( fileName ), fileIO_(fileIO),
+      dragNormalizationFactor_( dragNormalizationFactor ), liftNormalizationFactor_( liftNormalizationFactor ),
+      physicalTimeScale_( physicalTimeScale ),
+      counter_( 0 )
+   {
+      if ( fileIO_ )
+      {
+         WALBERLA_ROOT_SECTION()
+         {
+            std::ofstream file;
+            file.open( fileName_.c_str() );
+            file << "#\t Cd\t Cl\t fX\t fY\t fZ\t tX\t tY\t tZ\n";
+            file.close();
+         }
+      }
+   }
+
+   void operator()()
+   {
+
+      Vector3<real_t> force(real_t(0));
+      Vector3<real_t> torque(real_t(0));
+
+      for( auto blockIt = blocks_->begin(); blockIt != blocks_->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID_); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            force += bodyIt->getForce();
+            torque += bodyIt->getTorque();
+         }
+      }
+
+      WALBERLA_MPI_SECTION()
+      {
+         mpi::allReduceInplace( force[0], mpi::SUM );
+         mpi::allReduceInplace( force[1], mpi::SUM );
+         mpi::allReduceInplace( force[2], mpi::SUM );
+
+         mpi::allReduceInplace( torque[0], mpi::SUM );
+         mpi::allReduceInplace( torque[1], mpi::SUM );
+         mpi::allReduceInplace( torque[2], mpi::SUM );
+      }
+
+      if( fileIO_ )
+         writeToFile( counter_, force, torque);
+
+      dragForce_ = force[0];
+      liftForce_ = force[2];
+
+      ++counter_;
+
+   }
+
+   real_t getDragForce()
+   {
+      return dragForce_;
+   }
+
+   real_t getLiftForce()
+   {
+      return liftForce_;
+   }
+
+   real_t getDragCoefficient()
+   {
+      return dragForce_ / dragNormalizationFactor_;
+   }
+
+   real_t getLiftCoefficient()
+   {
+      return liftForce_ / liftNormalizationFactor_;
+   }
+
+
+private:
+   void writeToFile( uint_t timestep, const Vector3<real_t> & force, const Vector3<real_t> & torque )
+   {
+      WALBERLA_ROOT_SECTION()
+      {
+         std::ofstream file;
+         file.open( fileName_.c_str(), std::ofstream::app );
+
+         file << real_c(timestep) / physicalTimeScale_
+              << "\t" << force[0] / dragNormalizationFactor_ << "\t" << force[2] / liftNormalizationFactor_
+              << "\t" << force[0] << "\t" << force[1] << "\t" << force[2]
+              << "\t" << torque[0] << "\t" << torque[1] << "\t" << torque[2]
+              << "\n";
+         file.close();
+      }
+   }
+
+   shared_ptr< StructuredBlockStorage > blocks_;
+   const BlockDataID bodyStorageID_;
+   std::string fileName_;
+   bool fileIO_;
+   real_t dragForce_, liftForce_;
+   real_t dragNormalizationFactor_, liftNormalizationFactor_;
+   real_t physicalTimeScale_;
+   uint_t counter_;
+
+};
+
+void initializeCouetteProfile( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & pdfFieldID, const BlockDataID & boundaryHandlingID,
+                               const real_t & substrateHeight, const real_t & domainHeight, const real_t wallVelocity )
+{
+   const real_t rho = real_c(1);
+
+   for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+   {
+      auto pdfField = blockIt->getData< PdfField_T > ( pdfFieldID );
+      BoundaryHandling_T * boundaryHandling = blockIt->getData< BoundaryHandling_T >( boundaryHandlingID );
+
+      WALBERLA_FOR_ALL_CELLS_XYZ(pdfField,
+                                 if( !boundaryHandling->isDomain(x,y,z) ) continue;
+
+                                 const Vector3< real_t > coord = blocks->getBlockLocalCellCenter( *blockIt, Cell(x,y,z) );
+
+                                 Vector3< real_t > velocity( real_c(0) );
+
+                                 if( coord[2] >= substrateHeight )
+                                 {
+                                    velocity[0] =  wallVelocity * (coord[2] - substrateHeight) / ( domainHeight - substrateHeight);
+                                 }
+                                 pdfField->setToEquilibrium( x, y, z, velocity, rho );
+      )
+   }
+}
+
+void logFinalResult( const std::string & fileName, real_t Re, real_t wallDistance, real_t diameter, uint_t numLevels,
+                     real_t dragCoeff, real_t liftCoeff, real_t timestep )
+{
+   WALBERLA_ROOT_SECTION()
+   {
+      std::ofstream file;
+      file.open( fileName.c_str(), std::ofstream::app );
+
+      file << Re << "\t " << wallDistance << "\t " << diameter << "\t " << numLevels << "\t "
+           << dragCoeff << "\t " << liftCoeff << "\t " << timestep
+           << "\n";
+      file.close();
+   }
+}
+
+//////////
+// MAIN //
+//////////
+
+//*******************************************************************************************************************
+/*!\brief Testcase that evaluates the drag and lift force on a sphere that is close to the bottom plane in shear flow
+ *
+ * see overview paper:
+ * Zeng, Najjar, Balachandar, Fischer - "Forces on a finite-sized particle located close to a wall in a linear shear flow", 2009
+ *
+ * contains references to:
+ * Leighton, Acrivos - "The lift on a small sphere touching a plane in the presence of a simple shear flow", 1985
+ * Zeng, Balachandar, Fischer - "Wall-induced forces on a rigid sphere at finite Reynolds number", 2005
+ *
+ * CFD-IBM simulations in:
+ * Lee, Balachandar - "Drag and lift forces on a spherical particle moving on a wall in a shear flow at finite Re", 2010
+ *
+ * Description:
+ *  - Domain size [x, y, z] = [48 x 16 x 8 ] * diameter = [L(ength), W(idth), H(eight)]
+ *  - horizontally periodic, bounded by two planes in z-direction
+ *  - top plane is moving with constant wall velocity -> shear flow
+ *  - sphere is placed in the vicinity of the bottom plane at [ L/2 + xOffset, W72 + yOffset, dist * diameter]
+ *  - distance of sphere center to the bottom plane is crucial parameter
+ *  - viscosity is adjusted to match specified Reynolds number = shearRate * diameter * wallDistance / viscosity
+ *  - dimensionless drag and lift forces are evaluated and written to logging file
+ *  - different variants of grid refinement can be chosen (number of levels, refinement region)
+ */
+//*******************************************************************************************************************
+
+int main( int argc, char **argv )
+{
+   debug::enterTestMode();
+
+   mpi::Environment env( argc, argv );
+
+   ///////////////////
+   // Customization //
+   ///////////////////
+
+   // simulation control
+   bool fileIO = false;
+   bool zeroShearTest = false; // shear rate is zero such that a quiescent fluid is obtained -> drag and lift have to be and remain zero -> ignores Reynolds number
+   uint_t vtkIOFreq = 0;
+   std::string baseFolderVTK = "vtk_out_ForcesNearPlane";
+   std::string baseFolderLogging = ".";
+
+   // physical setup
+   real_t diameter = real_t(20); // cells per diameter -> determines overall resolution
+   real_t normalizedWallDistance = real_t(1); // distance of the sphere center to the bottom wall, normalized by the diameter
+   real_t ReynoldsNumberShear = real_t(1); // = shearRate * wallDistance * diameter / viscosity
+
+   //numerical parameters
+   real_t minimumNonDimTimesteps = real_t(100); // minimum number of non-dimensional time steps before simulation can be terminated by convergence
+   uint_t numberOfLevels = uint_t(4); // number of grid levels for static refinement ( 1 = no refinement)
+   real_t xOffsetOfSpherePosition = real_t(0); // offset in x-direction of sphere position
+   real_t yOffsetOfSpherePosition = real_t(0); // offset in y-direction of sphere position
+   bool useSBB = false; // use simple bounce-back boundary condition for sphere
+   bool useLargeRefinementRegion = false; // uses the whole area near the bottom plane as the finest grid, else refinement is only applied around the sphere
+
+
+   // command line arguments
+   for( int i = 1; i < argc; ++i )
+   {
+      if( std::strcmp( argv[i], "--zeroShearTest" )    == 0 ) { zeroShearTest = true; continue; }
+      if( std::strcmp( argv[i], "--fileIO" )           == 0 ) { fileIO = true; continue; }
+      if( std::strcmp( argv[i], "--vtkIOFreq" )        == 0 ) { vtkIOFreq = uint_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--diameter" )         == 0 ) { diameter = real_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--numLevels" )        == 0 ) { numberOfLevels = uint_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--timesteps" )        == 0 ) { minimumNonDimTimesteps = real_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--wallDistance" )     == 0 ) { normalizedWallDistance = real_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--Re" )               == 0 ) { ReynoldsNumberShear = real_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--xOffset" )          == 0 ) { xOffsetOfSpherePosition = real_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--yOffset" )          == 0 ) { yOffsetOfSpherePosition = real_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--useSBB" )           == 0 ) { useSBB = true; continue; }
+      if( std::strcmp( argv[i], "--useLargeRefinementRegion" ) == 0 ) { useLargeRefinementRegion = true; continue; }
+      if( std::strcmp( argv[i], "--baseFolderVTK" ) == 0 ) { baseFolderVTK = argv[++i]; continue; }
+      if( std::strcmp( argv[i], "--baseFolderLogging" ) == 0 ) { baseFolderVTK = argv[++i]; continue; }
+      WALBERLA_ABORT("Unrecognized command line argument found: " << argv[i]);
+   }
+
+   WALBERLA_CHECK_GREATER_EQUAL(normalizedWallDistance, real_t(0.5));
+   WALBERLA_CHECK_GREATER_EQUAL(ReynoldsNumberShear, real_t(0));
+   WALBERLA_CHECK_GREATER_EQUAL(diameter, real_t(0));
+
+   //////////////////////////
+   // NUMERICAL PARAMETERS //
+   //////////////////////////
+
+   const real_t domainLength = real_t(48) * diameter; //x
+   const real_t domainWidth  = real_t(16) * diameter; //y
+   const real_t domainHeight = real_t( 8) * diameter; //z
+
+   Vector3<uint_t> domainSize( uint_c( std::ceil(domainLength)), uint_c( std::ceil(domainWidth)), uint_c( std::ceil(domainHeight)) );
+
+   real_t wallVelocity = real_t(0.01);
+   if( zeroShearTest ) wallVelocity = real_t(0);
+
+   const real_t wallDistance = diameter * normalizedWallDistance;
+   const real_t shearRate = wallVelocity / domainHeight;
+   const real_t velAtSpherePosition = shearRate * wallDistance;
+   const real_t viscosity = ( zeroShearTest ) ? real_t(0.015) : ( velAtSpherePosition * diameter / ReynoldsNumberShear );
+
+   const real_t relaxationTime = real_t(1) / lbm::collision_model::omegaFromViscosity(viscosity);
+
+   const real_t densityFluid = real_t(1);
+
+   const real_t dx = real_t(1);
+
+   const real_t physicalTimeScale = ( zeroShearTest ) ? real_t(10) : (diameter / velAtSpherePosition);
+   const uint_t minimumLBMtimesteps = uint_c(minimumNonDimTimesteps * physicalTimeScale);
+
+   const real_t omega = real_t(1) / relaxationTime;
+   const uint_t finestLevel = numberOfLevels - uint_t(1);
+   Vector3<real_t> initialPosition( domainLength * real_t(0.5) + xOffsetOfSpherePosition,
+                                    domainWidth * real_t(0.5) + yOffsetOfSpherePosition,
+                                    wallDistance );
+
+   WALBERLA_LOG_INFO_ON_ROOT("Setup:");
+   WALBERLA_LOG_INFO_ON_ROOT(" - domain size = " << domainSize );
+   WALBERLA_LOG_INFO_ON_ROOT(" - normalized wall distance = " << normalizedWallDistance );
+   WALBERLA_LOG_INFO_ON_ROOT(" - shear rate = " << shearRate );
+   WALBERLA_LOG_INFO_ON_ROOT(" - wall velocity = " << wallVelocity );
+   WALBERLA_LOG_INFO_ON_ROOT(" - Reynolds number (shear rate based) = " << ReynoldsNumberShear << ", vel at sphere pos = " << velAtSpherePosition);
+   WALBERLA_LOG_INFO_ON_ROOT(" - density = " << densityFluid  );
+   std::stringstream omega_msg;
+   for( uint_t i = 0; i < numberOfLevels; ++i )
+   {
+      omega_msg << lbm::collision_model::levelDependentRelaxationParameter( i, omega, finestLevel ) << " ( on level " << i << " ), ";
+   }
+   WALBERLA_LOG_INFO_ON_ROOT(" - viscosity = " << viscosity << " -> omega = " << omega_msg.str());
+   WALBERLA_LOG_INFO_ON_ROOT(" - sphere diameter = " << diameter << ", position = " << initialPosition << " ( xOffset = " << xOffsetOfSpherePosition << ", yOffset = " << yOffsetOfSpherePosition << " )");
+   WALBERLA_LOG_INFO_ON_ROOT(" - using " << ((useSBB) ? "SBB" : "CLI") << " boundary condition along sphere");
+   WALBERLA_LOG_INFO_ON_ROOT(" - base folder VTK = " << baseFolderVTK << ", base folder logging = " << baseFolderLogging );
+   if( zeroShearTest ) WALBERLA_LOG_INFO_ON_ROOT(" === ATTENTION: USING zeroShearTest SETUP -> MANY PHYSICAL PARAMETERS ARE IGNORED AND THUS MIGHT BE REPORTED INCORRECTLY! === ") ;
+
+   ///////////////////////////
+   // BLOCK STRUCTURE SETUP //
+   ///////////////////////////
+
+   const uint_t levelScalingFactor = ( uint_t(1) << finestLevel );
+   const uint_t lbmTimeStepsPerTimeLoopIteration = levelScalingFactor;
+
+   const uint_t timesteps = uint_c( minimumLBMtimesteps / lbmTimeStepsPerTimeLoopIteration );
+
+   Vector3<uint_t> coarseBlocksPerDirection( 6, 2, 1 );
+   if( numberOfLevels == 1 || numberOfLevels == 2 )
+   {
+      // have enough coarse blocks to run on cluster
+      coarseBlocksPerDirection = Vector3<uint_t>(12,4,2);
+   }
+   WALBERLA_CHECK( domainSize[0] % coarseBlocksPerDirection[0] == 0 &&
+                   domainSize[1] % coarseBlocksPerDirection[1] == 0 &&
+                   domainSize[2] % coarseBlocksPerDirection[2] == 0 );
+   Vector3<uint_t> blockSizeInCells(domainSize[0] / ( coarseBlocksPerDirection[0] * levelScalingFactor ),
+                                    domainSize[1] / ( coarseBlocksPerDirection[1] * levelScalingFactor ),
+                                    domainSize[2] / ( coarseBlocksPerDirection[2] * levelScalingFactor ) );
+
+   AABB simulationDomain( real_t(0), real_t(0), real_t(0), real_c(domainSize[0]), real_c(domainSize[1]), real_c(domainSize[2]) );
+   auto blocks = createBlockStructure( simulationDomain, blockSizeInCells, numberOfLevels, diameter, initialPosition, useLargeRefinementRegion );
+
+   //write domain decomposition to file
+   if( vtkIOFreq > 0 )
+   {
+      vtk::writeDomainDecomposition( blocks, "initial_domain_decomposition", baseFolderVTK );
+   }
+
+
+   /////////////////
+   // PE COUPLING //
+   /////////////////
+
+   // set up pe functionality
+   shared_ptr<pe::BodyStorage>  globalBodyStorage = make_shared<pe::BodyStorage>();
+   pe::SetBodyTypeIDs<BodyTypeTuple>::execute();
+
+   auto bodyStorageID  = blocks->addBlockData(pe::createStorageDataHandling<BodyTypeTuple>(), "pe Body Storage");
+
+   // set up synchronization procedure
+   const real_t overlap = real_t( 1.5 ) * dx;
+   boost::function<void(void)> syncCall = boost::bind( pe::syncShadowOwners<BodyTypeTuple>, boost::ref(blocks->getBlockForest()), bodyStorageID, static_cast<WcTimingTree*>(NULL), overlap, false );
+
+   // create pe bodies
+
+   // bounding planes (global)
+   const auto planeMaterial = pe::createMaterial( "myPlaneMat", real_t(8920), real_t(0), real_t(1), real_t(1), real_t(0), real_t(1), real_t(1), real_t(0), real_t(0) );
+   pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,0,1), Vector3<real_t>(0,0,0), planeMaterial );
+   auto topPlane = pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,0,-1), Vector3<real_t>(0,0,domainHeight), planeMaterial );
+   topPlane->setLinearVel(wallVelocity, real_t(0), real_t(0));
+
+   // add the sphere
+   pe::createSphere( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, initialPosition, real_t(0.5) * diameter, planeMaterial );
+
+   uint_t minBlockSizeInCells = blockSizeInCells.min();
+   for( uint_t i = 0; i < uint_c(diameter / real_c(minBlockSizeInCells)) + 1; ++i)
+      syncCall();
+
+   ///////////////////////
+   // ADD DATA TO BLOCKS //
+   ////////////////////////
+
+   // create the lattice model
+   LatticeModel_T latticeModel = LatticeModel_T( lbm::collision_model::TRT::constructWithMagicNumber( omega, lbm::collision_model::TRT::threeSixteenth, finestLevel ) );
+
+   // add PDF field
+   BlockDataID pdfFieldID = lbm::addPdfFieldToStorage< LatticeModel_T >( blocks, "pdf field (zyxf)", latticeModel,
+                                                                         Vector3< real_t >( real_t(0) ), real_t(1),
+                                                                         FieldGhostLayers, field::zyxf );
+   // add flag field
+   BlockDataID flagFieldID = field::addFlagFieldToStorage<FlagField_T>( blocks, "flag field", FieldGhostLayers );
+
+   // add body field
+   BlockDataID bodyFieldID = field::addToStorage<BodyField_T>( blocks, "body field", NULL, field::zyxf, FieldGhostLayers );
+
+   // add boundary handling
+   BlockDataID boundaryHandlingID = blocks->addStructuredBlockData< BoundaryHandling_T >( MyBoundaryHandling( flagFieldID, pdfFieldID, bodyFieldID ), "boundary handling" );
+
+
+   // map planes into the LBM simulation -> act as no-slip boundaries
+   // since we have refinement boundaries along the bottom plane, we use SBB here
+   // (see test/pe_coupling/momentum_exchange_method/GlobalBodyAsBoundaryMEMStaticRefinement test for more infos)
+   pe_coupling::mapMovingGlobalBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, *globalBodyStorage, bodyFieldID, MO_SBB_Flag );
+
+   // map movable sphere into the LBM simulation
+   // the whole sphere resides on the same refinement level, so we can also use CLI instead of SBB
+   if( useSBB )
+      pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MO_SBB_Flag );
+   else
+      pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MO_CLI_Flag );
+
+
+   // initialize Couette velocity profile in whole domain
+   if( !zeroShearTest ) initializeCouetteProfile(blocks, pdfFieldID, boundaryHandlingID, diameter, domainHeight, wallVelocity);
+
+   ///////////////
+   // TIME LOOP //
+   ///////////////
+
+   // setup of the LBM communication for synchronizing the pdf field between neighboring blocks
+   boost::function< void () > commFunction;
+   blockforest::communication::UniformBufferedScheme< Stencil_T > scheme( blocks );
+   scheme.addPackInfo( make_shared< lbm::PdfFieldPackInfo< LatticeModel_T > >( pdfFieldID ) );
+   commFunction = scheme;
+
+   // create the timeloop
+   SweepTimeloop timeloop( blocks->getBlockStorage(), timesteps );
+
+   if( vtkIOFreq != uint_t(0) )
+   {
+      // flag field (written only once in the first time step, ghost layers are also written)
+      auto flagFieldVTK = vtk::createVTKOutput_BlockData( blocks, "flag_field", vtkIOFreq, uint_t(0), false, baseFolderVTK );
+      flagFieldVTK->addCellDataWriter( make_shared< field::VTKWriter< FlagField_T > >( flagFieldID, "FlagField" ) );
+      timeloop.addFuncBeforeTimeStep( vtk::writeFiles( flagFieldVTK ), "VTK (flag field data)" );
+
+      auto pdfFieldVTK = vtk::createVTKOutput_BlockData( blocks, "fluid_field", vtkIOFreq, uint_t(0), false, baseFolderVTK );
+
+      field::FlagFieldCellFilter< FlagField_T > fluidFilter( flagFieldID );
+      fluidFilter.addFlag( Fluid_Flag );
+      pdfFieldVTK->addCellInclusionFilter( fluidFilter );
+
+      pdfFieldVTK->addCellDataWriter( make_shared< lbm::VelocityVTKWriter< LatticeModel_T, float > >( pdfFieldID, "VelocityFromPDF" ) );
+      pdfFieldVTK->addCellDataWriter( make_shared< lbm::DensityVTKWriter < LatticeModel_T, float > >( pdfFieldID, "DensityFromPDF" ) );
+
+      timeloop.addFuncBeforeTimeStep( vtk::writeFiles( pdfFieldVTK ), "VTK (fluid field data)" );
+   }
+
+   auto sweep = lbm::makeCellwiseSweep< LatticeModel_T, FlagField_T >( pdfFieldID, flagFieldID, Fluid_Flag );
+
+   auto refinementTimestep = lbm::refinement::makeTimeStep< LatticeModel_T, BoundaryHandling_T >( blocks, sweep, pdfFieldID, boundaryHandlingID );
+
+   // add force evaluation and logging
+   real_t normalizationFactor = ( zeroShearTest ) ? real_t(1) : ( math::M_PI / real_t(8) * densityFluid * shearRate * shearRate * wallDistance * wallDistance * diameter * diameter );
+   std::string loggingFileName( baseFolderLogging + "/LoggingForcesNearPlane");
+   loggingFileName += "_lvl" + std::to_string(numberOfLevels);
+   loggingFileName += "_D" + std::to_string(uint_c(diameter));
+   loggingFileName += "_Re" + std::to_string(uint_c(ReynoldsNumberShear));
+   loggingFileName += "_WD" + std::to_string(uint_c(normalizedWallDistance * real_t(1000)));
+   loggingFileName += ".txt";
+   shared_ptr< SpherePropertyLogger > logger = walberla::make_shared< SpherePropertyLogger >( blocks, bodyStorageID,
+                                                                                              loggingFileName, fileIO,
+                                                                                              normalizationFactor, normalizationFactor, physicalTimeScale);
+   refinementTimestep->addPostStreamVoidFunction( lbm::refinement::FunctorWrapper( SharedFunctor< SpherePropertyLogger >( logger ) ), "Sphere property logger", finestLevel );
+
+   // add pe timesteps
+   refinementTimestep->addPostStreamVoidFunction( lbm::refinement::FunctorWrapper(pe_coupling::ForceTorqueOnBodiesResetter( blocks, bodyStorageID )),
+                                                  "force reset", finestLevel );
+
+   // add LBM sweep with refinement
+   timeloop.addFuncBeforeTimeStep( makeSharedFunctor( refinementTimestep ), "LBM refinement time step" );
+
+
+   timeloop.addFuncAfterTimeStep( RemainingTimeLogger( timeloop.getNrOfTimeSteps() ), "Remaining Time Logger" );
+
+   // compute reference values from literature
+
+   const real_t normalizedGapSize = normalizedWallDistance - real_t(0.5);
+
+   // drag correlation for the drag coefficient
+   const real_t standardDragCorrelation = real_t(24) / ReynoldsNumberShear * (real_t(1) + real_t(0.15) * std::pow(ReynoldsNumberShear, real_t(0.687))); // Schiller-Naumann correlation
+   const real_t dragCorrelationWithGapSizeStokes = real_t(24) / ReynoldsNumberShear * (real_t(1) + real_t(0.138) * std::exp(real_t(-2) * normalizedGapSize) + real_t(9)/( real_t(16) * (real_t(1) + real_t(2) * normalizedGapSize) ) ); // Goldman et al. (1967)
+   const real_t alphaDragS = real_t(0.15) - real_t(0.046) * ( real_t(1) - real_t(0.16) * normalizedGapSize * normalizedGapSize ) * std::exp( -real_t(0.7) *  normalizedGapSize);
+   const real_t betaDragS = real_t(0.687) + real_t(0.066)*(real_t(1) - real_t(0.76) * normalizedGapSize * normalizedGapSize) * std::exp( - std::pow( normalizedGapSize, real_t(0.9) ) );
+   const real_t dragCorrelationZeng = dragCorrelationWithGapSizeStokes * ( real_t(1) + alphaDragS * std::pow( ReynoldsNumberShear, betaDragS ) ); // Zeng et al. (2009) - Eqs. (13) and (14)
+
+   // lift correlations for the lift coefficient
+   const real_t liftCorrelationZeroGapStokes = real_t(5.87); // Leighton, Acrivos (1985)
+   const real_t liftCorrelationZeroGap = real_t(3.663) / std::pow( ReynoldsNumberShear * ReynoldsNumberShear + real_t(0.1173), real_t(0.22) ); //  Zeng et al. (2009) - Eq. (19)
+   const real_t alphaLiftS = - std::exp( -real_t(0.3) + real_t(0.025) * ReynoldsNumberShear);
+   const real_t betaLiftS = real_t(0.8) + real_t(0.01) * ReynoldsNumberShear;
+   const real_t lambdaLiftS = ( real_t(1) - std::exp(-normalizedGapSize)) * std::pow( ReynoldsNumberShear / real_t(250), real_t(5) / real_t(2) );
+   const real_t liftCorrelationZeng = liftCorrelationZeroGap * std::exp( - real_t(0.5) * normalizedGapSize * std::pow( ReynoldsNumberShear / real_t(250), real_t(4)/real_t(3))) *
+                                      ( std::exp( alphaLiftS * std::pow( normalizedGapSize, betaLiftS ) ) - lambdaLiftS ); // Zeng et al. (2009) - Eqs. (28) and (29)
+
+   ////////////////////////
+   // EXECUTE SIMULATION //
+   ////////////////////////
+
+   WcTimingPool timeloopTiming;
+
+   const real_t relativeChangeConvergenceEps = real_t(1e-3);
+   const real_t physicalCheckingFrequency = real_t(0.00625);
+   const uint_t checkingFrequency = (zeroShearTest) ? uint_t(1) : uint_c( physicalCheckingFrequency * physicalTimeScale );
+
+   WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with at least " << timesteps << " (coarse) time steps");
+   WALBERLA_LOG_INFO_ON_ROOT("Convergence checking frequency = " << checkingFrequency << " (physically = " << physicalCheckingFrequency << ")");
+
+   real_t maxDragCurrentCheckingPeriod = -math::Limits<real_t>::inf();
+   real_t minDragCurrentCheckingPeriod = math::Limits<real_t>::inf();
+   real_t maxLiftCurrentCheckingPeriod = -math::Limits<real_t>::inf();
+   real_t minLiftCurrentCheckingPeriod = math::Limits<real_t>::inf();
+   uint_t timestep = 0;
+
+   // time loop
+   while( true )
+   {
+      // perform a single simulation step
+      timeloop.singleStep( timeloopTiming );
+
+      real_t curDrag = logger->getDragCoefficient();
+      real_t curLift = logger->getLiftCoefficient();
+
+      maxDragCurrentCheckingPeriod = std::max( maxDragCurrentCheckingPeriod, curDrag);
+      minDragCurrentCheckingPeriod = std::min( minDragCurrentCheckingPeriod, curDrag);
+      maxLiftCurrentCheckingPeriod = std::max( maxLiftCurrentCheckingPeriod, curLift);
+      minLiftCurrentCheckingPeriod = std::min( minLiftCurrentCheckingPeriod, curLift);
+
+      real_t dragDiffCurrentCheckingPeriod = std::fabs(maxDragCurrentCheckingPeriod - minDragCurrentCheckingPeriod)/std::fabs(maxDragCurrentCheckingPeriod);
+      real_t liftDiffCurrentCheckingPeriod = std::fabs(maxLiftCurrentCheckingPeriod - minLiftCurrentCheckingPeriod)/std::fabs(maxLiftCurrentCheckingPeriod);
+
+      // continuous output during simulation
+      if( timestep % (checkingFrequency * uint_t(10)) == 0)
+      {
+         WALBERLA_LOG_INFO_ON_ROOT("Drag: current C_D = " << curDrag );
+         if( !zeroShearTest )
+         {
+            WALBERLA_LOG_INFO_ON_ROOT(" - standard C_D = " <<  standardDragCorrelation );
+            WALBERLA_LOG_INFO_ON_ROOT(" - C_D ( Stokes fit ) = " << dragCorrelationWithGapSizeStokes );
+            WALBERLA_LOG_INFO_ON_ROOT(" - C_D ( Zeng ) = " << dragCorrelationZeng );
+         }
+
+         WALBERLA_LOG_INFO_ON_ROOT("Lift: current C_L = " << curLift );
+         if( !zeroShearTest )
+         {
+            WALBERLA_LOG_INFO_ON_ROOT(" - C_L ( Stokes, zero gap ) = " << liftCorrelationZeroGapStokes);
+            WALBERLA_LOG_INFO_ON_ROOT(" - C_L ( zero gap ) = " << liftCorrelationZeroGap);
+            WALBERLA_LOG_INFO_ON_ROOT(" - C_L ( Zeng ) = " << liftCorrelationZeng);
+            WALBERLA_LOG_INFO_ON_ROOT( "Drag difference [(max-min)/max] = " << dragDiffCurrentCheckingPeriod << ", lift difference = " << liftDiffCurrentCheckingPeriod);
+         }
+      }
+
+      // check for convergence ( = difference between min and max values in current checking period is below limit)
+      if( timestep % checkingFrequency == 0 )
+      {
+         if( timestep > timesteps &&
+             dragDiffCurrentCheckingPeriod < relativeChangeConvergenceEps &&
+             liftDiffCurrentCheckingPeriod < relativeChangeConvergenceEps )
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Forces converged with an eps of " << relativeChangeConvergenceEps );
+            WALBERLA_LOG_INFO_ON_ROOT(" - drag min = " << minDragCurrentCheckingPeriod << " , max = " << maxDragCurrentCheckingPeriod);
+            WALBERLA_LOG_INFO_ON_ROOT(" - lift min = " << minLiftCurrentCheckingPeriod << " , max = " << maxLiftCurrentCheckingPeriod);
+            break;
+         }
+
+         //reset min and max values for new checking period
+         maxDragCurrentCheckingPeriod = -math::Limits<real_t>::inf();
+         minDragCurrentCheckingPeriod = math::Limits<real_t>::inf();
+         maxLiftCurrentCheckingPeriod = -math::Limits<real_t>::inf();
+         minLiftCurrentCheckingPeriod = math::Limits<real_t>::inf();
+      }
+
+      if( zeroShearTest && timestep > timesteps ) break;
+
+      ++timestep;
+
+   }
+
+   timeloopTiming.logResultOnRoot();
+
+   if ( !zeroShearTest )
+   {
+      std::string resultFileName( baseFolderLogging + "/ResultForcesNearPlane.txt");
+      logFinalResult(resultFileName, ReynoldsNumberShear, normalizedWallDistance, diameter, numberOfLevels,
+                     logger->getLiftCoefficient(), logger->getLiftCoefficient(), real_c(timestep * lbmTimeStepsPerTimeLoopIteration) / physicalTimeScale );
+   }
+
+   return EXIT_SUCCESS;
+}
+
+} // namespace forces_on_sphere_near_plane_in_shear_flow
+
+int main( int argc, char **argv ){
+   forces_on_sphere_near_plane_in_shear_flow::main(argc, argv);
+}
diff --git a/apps/benchmarks/MotionSingleHeavySphere/MotionSingleHeavySphere.cpp b/apps/benchmarks/MotionSingleHeavySphere/MotionSingleHeavySphere.cpp
index b037a5cbf43b6431ca24d74a2b0bf6a8f8192a33..0774932f3d17ef6d9af879d2f127159a15eddbd0 100644
--- a/apps/benchmarks/MotionSingleHeavySphere/MotionSingleHeavySphere.cpp
+++ b/apps/benchmarks/MotionSingleHeavySphere/MotionSingleHeavySphere.cpp
@@ -900,9 +900,9 @@ int main( int argc, char **argv )
 
       //initialization of the PDFs inside the particles, important for PSM M3
       if( psmVariant == PSMVariant::SC1W1 || psmVariant == PSMVariant::SC2W1 || psmVariant == PSMVariant::SC3W1 )
-         pe_coupling::initializeDomainForPSM< LatticeModel_T, 1 >( blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
+         pe_coupling::initializeDomainForPSM< LatticeModel_T, 1 >( *blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
       else
-         pe_coupling::initializeDomainForPSM< LatticeModel_T, 2 >( blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
+         pe_coupling::initializeDomainForPSM< LatticeModel_T, 2 >( *blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
 
    }
    else
@@ -912,11 +912,11 @@ int main( int argc, char **argv )
 
       if( memVariant == MEMVariant::CLI )
       {
-         pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MEM_CLI_Flag );
+         pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MEM_CLI_Flag );
       }else if ( memVariant == MEMVariant::MR ){
-         pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MEM_MR_Flag );
+         pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MEM_MR_Flag );
       }else{
-         pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MEM_BB_Flag );
+         pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MEM_BB_Flag );
       }
    }
 
diff --git a/src/lbm/geometry/IntersectionRatio.impl.h b/src/lbm/geometry/IntersectionRatio.impl.h
index 95d7a8cbe241e96e70e4aa7c635462ea469f379e..fd19725e16de01230fb7148e8f1e51771e3b5943 100644
--- a/src/lbm/geometry/IntersectionRatio.impl.h
+++ b/src/lbm/geometry/IntersectionRatio.impl.h
@@ -30,8 +30,8 @@ real_t intersectionRatioBisection( const Body & body,
                                    const Vector3<real_t> & direction,
                                    const real_t epsilon )
 {
-   WALBERLA_ASSERT( !geometry::contains( body, fluidPoint ) );
-   WALBERLA_ASSERT(  geometry::contains( body, fluidPoint + direction ) );
+   WALBERLA_ASSERT( !geometry::contains( body, fluidPoint ), "fluid point: " << fluidPoint );
+   WALBERLA_ASSERT(  geometry::contains( body, fluidPoint + direction ), "fluidPoint + direction: " << fluidPoint + direction );
    
    const real_t sqEpsilon         = epsilon * epsilon;
    const real_t sqDirectionLength = direction.sqrLength();
diff --git a/src/lbm/refinement/PdfFieldPackInfo.h b/src/lbm/refinement/PdfFieldPackInfo.h
index e993a449d67d137219c6a2b2ae090dd6ba808444..6fbfb92216ba567974b4dbd717be27dc3e3a403d 100644
--- a/src/lbm/refinement/PdfFieldPackInfo.h
+++ b/src/lbm/refinement/PdfFieldPackInfo.h
@@ -658,7 +658,7 @@ void PdfFieldPackInfo< LatticeModel_T, BoundaryHandling_T >::communicateLocalCoa
 
 #ifndef NDEBUG
                if( boundaryHandling->isDomain(sx,sy,sz) )
-                  WALBERLA_ASSERT( !math::isnan( sf->get( sx, sy, sz, idx ) ) );
+                  WALBERLA_ASSERT( !math::isnan( sf->get( sx, sy, sz, idx ) ), sx << ", " << sy << ", " << sz << ", " << idx << " coarse sender block = " << coarseSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> " );
 #endif
                }
                rx += cell_idx_t(2);
@@ -697,7 +697,7 @@ void PdfFieldPackInfo< LatticeModel_T, BoundaryHandling_T >::communicateLocalCoa
 
 #ifndef NDEBUG
                if( boundaryHandling->isDomain(sx,sy,sz) )
-                  WALBERLA_ASSERT( !math::isnan( value ) );
+                  WALBERLA_ASSERT( !math::isnan( value ), "value at " << sx << ", " << sy << ", " << sz << ", " << idx << " coarse sender block = " << coarseSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> " );
 #endif
 
                   rf->get( rx,                 ry,                 rz,                 idx ) = value;
@@ -779,16 +779,16 @@ void PdfFieldPackInfo< LatticeModel_T, BoundaryHandling_T >::packDataFineToCoars
 #ifndef NDEBUG
                if( boundaryHandling->isDomain(x,y,z) )
                {
-                  WALBERLA_ASSERT( !math::isnan( field->get( x,                 y,                 z,                 idx ) ) );
-                  WALBERLA_ASSERT( !math::isnan( field->get( x + cell_idx_t(1), y,                 z,                 idx ) ) );
-                  WALBERLA_ASSERT( !math::isnan( field->get( x,                 y + cell_idx_t(1), z,                 idx ) ) );
-                  WALBERLA_ASSERT( !math::isnan( field->get( x + cell_idx_t(1), y + cell_idx_t(1), z,                 idx ) ) );
+                  WALBERLA_ASSERT( !math::isnan( field->get( x,                 y,                 z,                 idx ) ), x << ", " << y << ", " << z << ", " << idx << " fine sender block = " << fineSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> ");
+                  WALBERLA_ASSERT( !math::isnan( field->get( x + cell_idx_t(1), y,                 z,                 idx ) ), x + cell_idx_t(1) << ", " << y << ", " << z << ", " << idx <<" fine sender block = " << fineSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> " );
+                  WALBERLA_ASSERT( !math::isnan( field->get( x,                 y + cell_idx_t(1), z,                 idx ) ), x << ", " << y + cell_idx_t(1) << ", " << z << ", " << idx << " fine sender block = " << fineSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> " );
+                  WALBERLA_ASSERT( !math::isnan( field->get( x + cell_idx_t(1), y + cell_idx_t(1), z,                 idx ) ), x + cell_idx_t(1) << ", " << y + cell_idx_t(1) << ", " << z << ", " << idx << " fine sender block = " << fineSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> " );
                   if( Stencil::D == uint_t(3) )
                   {
-                     WALBERLA_ASSERT( !math::isnan( field->get( x,                 y,                 z + cell_idx_t(1), idx ) ) );
-                     WALBERLA_ASSERT( !math::isnan( field->get( x + cell_idx_t(1), y,                 z + cell_idx_t(1), idx ) ) );
-                     WALBERLA_ASSERT( !math::isnan( field->get( x,                 y + cell_idx_t(1), z + cell_idx_t(1), idx ) ) );
-                     WALBERLA_ASSERT( !math::isnan( field->get( x + cell_idx_t(1), y + cell_idx_t(1), z + cell_idx_t(1), idx ) ) );
+                     WALBERLA_ASSERT( !math::isnan( field->get( x,                 y,                 z + cell_idx_t(1), idx ) ), x << ", " << y << ", " << z + cell_idx_t(1) << ", " << idx << " fine sender block = " << fineSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> "  );
+                     WALBERLA_ASSERT( !math::isnan( field->get( x + cell_idx_t(1), y,                 z + cell_idx_t(1), idx ) ), x + cell_idx_t(1) << ", " << y << ", " << z + cell_idx_t(1) << ", " << idx << " fine sender block = " << fineSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> " );
+                     WALBERLA_ASSERT( !math::isnan( field->get( x,                 y + cell_idx_t(1), z + cell_idx_t(1), idx ) ), x << ", " << y + cell_idx_t(1) << ", " << z + cell_idx_t(1) << ", " << idx << " fine sender block = " << fineSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> " );
+                     WALBERLA_ASSERT( !math::isnan( field->get( x + cell_idx_t(1), y + cell_idx_t(1), z + cell_idx_t(1), idx ) ), x + cell_idx_t(1) << ", " << y + cell_idx_t(1) << ", " << z + cell_idx_t(1) << ", " << idx << " fine sender block = " << fineSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> " );
                   }
                }
 #endif
@@ -925,16 +925,16 @@ void PdfFieldPackInfo< LatticeModel_T, BoundaryHandling_T >::communicateLocalFin
 #ifndef NDEBUG
                if( boundaryHandling->isDomain(sx,sy,sz) )
                {
-                  WALBERLA_ASSERT( !math::isnan( sf->get( sx,                 sy,                 sz,                 idx ) ) );
-                  WALBERLA_ASSERT( !math::isnan( sf->get( sx + cell_idx_t(1), sy,                 sz,                 idx ) ) );
-                  WALBERLA_ASSERT( !math::isnan( sf->get( sx,                 sy + cell_idx_t(1), sz,                 idx ) ) );
-                  WALBERLA_ASSERT( !math::isnan( sf->get( sx + cell_idx_t(1), sy + cell_idx_t(1), sz,                 idx ) ) );
+                  WALBERLA_ASSERT( !math::isnan( sf->get( sx,                 sy,                 sz,                 idx ) ), sx << ", " << sy << ", " << sz << ", " << idx << " fine sender block = " << fineSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> ");
+                  WALBERLA_ASSERT( !math::isnan( sf->get( sx + cell_idx_t(1), sy,                 sz,                 idx ) ), sx + cell_idx_t(1) << ", " << sy << ", " << sz << ", " << idx << " fine sender block = " << fineSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> ");
+                  WALBERLA_ASSERT( !math::isnan( sf->get( sx,                 sy + cell_idx_t(1), sz,                 idx ) ), sx << ", " << sy + cell_idx_t(1) << ", " << sz << ", " << idx << " fine sender block = " << fineSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> ");
+                  WALBERLA_ASSERT( !math::isnan( sf->get( sx + cell_idx_t(1), sy + cell_idx_t(1), sz,                 idx ) ), sx + cell_idx_t(1) << ", " << sy + cell_idx_t(1) << ", " << sz << ", " << idx << " fine sender block = " << fineSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> ");
                   if( Stencil::D == uint_t(3) )
                   {
-                     WALBERLA_ASSERT( !math::isnan( sf->get( sx,                 sy,                 sz + cell_idx_t(1), idx ) ) );
-                     WALBERLA_ASSERT( !math::isnan( sf->get( sx + cell_idx_t(1), sy,                 sz + cell_idx_t(1), idx ) ) );
-                     WALBERLA_ASSERT( !math::isnan( sf->get( sx,                 sy + cell_idx_t(1), sz + cell_idx_t(1), idx ) ) );
-                     WALBERLA_ASSERT( !math::isnan( sf->get( sx + cell_idx_t(1), sy + cell_idx_t(1), sz + cell_idx_t(1), idx ) ) );
+                     WALBERLA_ASSERT( !math::isnan( sf->get( sx,                 sy,                 sz + cell_idx_t(1), idx ) ), sx << ", " << sy << ", " << sz + cell_idx_t(1) << ", " << idx << " fine sender block = " << fineSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> ");
+                     WALBERLA_ASSERT( !math::isnan( sf->get( sx + cell_idx_t(1), sy,                 sz + cell_idx_t(1), idx ) ), sx + cell_idx_t(1) << ", " << sy << ", " << sz + cell_idx_t(1) << ", " << idx << " fine sender block = " << fineSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> ");
+                     WALBERLA_ASSERT( !math::isnan( sf->get( sx,                 sy + cell_idx_t(1), sz + cell_idx_t(1), idx ) ), sx << ", " << sy + cell_idx_t(1) << ", " << sz + cell_idx_t(1) << ", " << idx << " fine sender block = " << fineSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> ");
+                     WALBERLA_ASSERT( !math::isnan( sf->get( sx + cell_idx_t(1), sy + cell_idx_t(1), sz + cell_idx_t(1), idx ) ), sx + cell_idx_t(1) << ", " << sy + cell_idx_t(1) << ", " << sz + cell_idx_t(1) << ", " << idx << " fine sender block = " << fineSender->getId() << " in dir <" << stencil::cx[dir] << "," << stencil::cy[dir] << "," << stencil::cz[dir] << "> ");
                   }
                }
 #endif
diff --git a/src/lbm/refinement/RefinementFunctorWrapper.h b/src/lbm/refinement/RefinementFunctorWrapper.h
new file mode 100644
index 0000000000000000000000000000000000000000..a44dfd301f3a3f770868a3bebdfa0cb2dc446af1
--- /dev/null
+++ b/src/lbm/refinement/RefinementFunctorWrapper.h
@@ -0,0 +1,66 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file RefinementFunctorWrapper.h
+//! \ingroup lbm
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "domain_decomposition/BlockStorage.h"
+
+#include <boost/function.hpp>
+
+namespace walberla {
+namespace lbm {
+namespace refinement {
+
+class FunctorWrapper {
+public:
+   FunctorWrapper(boost::function<void()> fct)
+         : fct_(fct) {
+   }
+
+   void operator()(const uint_t /*level*/, const uint_t /*executionCounter*/) {
+      fct_();
+   }
+
+private:
+   boost::function<void(void)> fct_;
+};
+
+class SweepAsFunctorWrapper {
+public:
+   SweepAsFunctorWrapper( boost::function<void(IBlock * )> fct, const shared_ptr <StructuredBlockStorage> &blockStorage )
+         : fct_(fct), blockStorage_(blockStorage) {
+   }
+
+   void operator()(const uint_t level, const uint_t /*executionCounter*/) {
+      for (auto blockIt = blockStorage_->begin(); blockIt != blockStorage_->end(); ++blockIt) {
+         if (blockStorage_->getLevel(*blockIt) != level) continue;
+         fct_(blockIt.get());
+      }
+   }
+
+private:
+   boost::function<void(IBlock * )> fct_;
+   shared_ptr <StructuredBlockStorage> blockStorage_;
+};
+
+} // namespace refinement
+} // namespace lbm
+} // namespace walberla
diff --git a/src/lbm/refinement/TimeStep.h b/src/lbm/refinement/TimeStep.h
index 53f8b355abe9cc594bbce121f8c2285a58eb0210..f006d0494aa0af888821c671c6491aa86cff49dc 100644
--- a/src/lbm/refinement/TimeStep.h
+++ b/src/lbm/refinement/TimeStep.h
@@ -1591,91 +1591,130 @@ void TimeStep< LatticeModel_T, Sweep_T, BoundaryHandling_T >::recursiveStep( con
 
    std::vector< Block * > blocks = selectedBlocks( level );
 
+   WALBERLA_LOG_DETAIL("Starting recursive step with level " << level << " and execution count " << executionCount);
+
    if( asynchronousCommunication_ && level != coarsestLevel )
    {
+      WALBERLA_LOG_DETAIL("Start communication coarse to fine, initiated by fine level " << level );
       startCommunicationCoarseToFine( level ); // [start] explosion (initiated by fine level, involves "level" and "level-1")
    }
 
+   WALBERLA_LOG_DETAIL("Colliding on level " << level);
    collide( blocks, level, executionCount1st );
 
    if( asynchronousCommunication_ )
    {
+      WALBERLA_LOG_DETAIL("Start communication equal level, initiated by level " << level );
       startCommunicationEqualLevel( level ); // [start] equal level communication
    }
 
    if( level != finestLevel )
    {
+      WALBERLA_LOG_DETAIL("Calling recursive step with level " << level + uint_t(1) << " and execution count " << executionCount1st );
       recursiveStep( level + uint_t(1), executionCount1st );
 
-      if( asynchronousCommunication_ )
-         startCommunicationFineToCoarse( level + uint_t(1) ); // [start] coalescence (initiated by coarse level)
+      if( asynchronousCommunication_ ) {
+         WALBERLA_LOG_DETAIL("Start communication fine to coarse, initiated by coarse level " << level );
+         startCommunicationFineToCoarse(level + uint_t(1)); // [start] coalescence (initiated by coarse level)
+      }
    }
 
    if( level != coarsestLevel )
    {
-      if( !asynchronousCommunication_ )
-         startCommunicationCoarseToFine( level ); // [start] explosion (initiated by fine level, involves "level" and "level-1")
+      if( !asynchronousCommunication_ ) {
+         WALBERLA_LOG_DETAIL("Start communication coarse to fine, initiated by fine level " << level );
+         startCommunicationCoarseToFine(level); // [start] explosion (initiated by fine level, involves "level" and "level-1")
+      }
+      WALBERLA_LOG_DETAIL("End communication coarse to fine, initiated by fine level " << level );
       endCommunicationCoarseToFine( level ); // [end] explosion (initiated by fine level, involves "level" and "level-1")
+      WALBERLA_LOG_DETAIL("Perform linear explosion on level " << level );
       performLinearExplosion( blocks, level );
    }
 
-   if( !asynchronousCommunication_ )
-      startCommunicationEqualLevel( level ); // [start] equal level communication
+   if( !asynchronousCommunication_ ) {
+      WALBERLA_LOG_DETAIL("Start communication equal level, initiated by level " << level );
+      startCommunicationEqualLevel(level); // [start] equal level communication
+   }
+   WALBERLA_LOG_DETAIL("End communication equal level, initiated by level " << level );
    endCommunicationEqualLevel( level ); // [end] equal level communication
 
    // performLinearExplosion( blocks, level ); // if equal level neighbor values are needed, linear explosion should be performed here
 
    if( level == finestLevel && level != coarsestLevel )
    {
+      WALBERLA_LOG_DETAIL("Stream + collide on level " << level );
       streamCollide( blocks, level, executionCount1st );
    }
    else
    {
+      WALBERLA_LOG_DETAIL("Stream on level " << level );
       stream( blocks, level, executionCount1st );
 
       if( level != finestLevel )
       {
-         if( !asynchronousCommunication_ )
-            startCommunicationFineToCoarse( level + uint_t(1) ); // [start] coalescence (initiated by coarse level)
+         if( !asynchronousCommunication_ ) {
+            WALBERLA_LOG_DETAIL("Start communication fine to coarse, initiated by coarse level " << level );
+            startCommunicationFineToCoarse(level + uint_t(1)); // [start] coalescence (initiated by coarse level)
+         }
+         WALBERLA_LOG_DETAIL("End communication fine to coarse, initiated by coarse level " << level );
          endCommunicationFineToCoarse( level + uint_t(1) ); // [end] coalescence (initiated by coarse level)
       }
-      
+
+      WALBERLA_LOG_DETAIL("Finish stream on level " << level );
       finishStream( blocks, level, executionCount1st );
 
       if( level == coarsestLevel )
+      {
+         WALBERLA_LOG_DETAIL("End recursive step on level " << level );
          return;
+      }
 
+      WALBERLA_LOG_DETAIL("Colliding on level " << level);
       collide( blocks, level, executionCount2nd );
    }
 
-   if( asynchronousCommunication_ )
-      startCommunicationEqualLevel( level ); // [start] equal level communication
+   if( asynchronousCommunication_ ) {
+      WALBERLA_LOG_DETAIL("Start communication equal level, initiated by level " << level );
+      startCommunicationEqualLevel(level); // [start] equal level communication
+   }
 
    if( level != finestLevel )
    {
+      WALBERLA_LOG_DETAIL("Calling recursive step with level " << level + uint_t(1) << " and execution count " << executionCount2nd );
       recursiveStep( level + uint_t(1), executionCount2nd );
    
-      if( asynchronousCommunication_ )
-         startCommunicationFineToCoarse( level + uint_t(1) ); // [start] coalescence (initiated by coarse level)
+      if( asynchronousCommunication_ ) {
+         WALBERLA_LOG_DETAIL("Start communication fine to coarse, initiated by coarse level " << level );
+         startCommunicationFineToCoarse(level + uint_t(1)); // [start] coalescence (initiated by coarse level)
+      }
    }
 
-   if( !asynchronousCommunication_ )
-      startCommunicationEqualLevel( level ); // [start] equal level communication
+   if( !asynchronousCommunication_ ) {
+      WALBERLA_LOG_DETAIL("Start communication equal level, initiated by level " << level );
+      startCommunicationEqualLevel(level); // [start] equal level communication
+   }
+   WALBERLA_LOG_DETAIL("End communication equal level, initiated by level " << level );
    endCommunicationEqualLevel( level ); // [end] equal level communication
 
+   WALBERLA_LOG_DETAIL("Stream on level " << level );
    stream( blocks, level, executionCount2nd );
 
    if( level != finestLevel )
    {
       if( !asynchronousCommunication_ )
+      {
+         WALBERLA_LOG_DETAIL("Start communication fine to coarse, initiated by coarse level " << level );
          startCommunicationFineToCoarse( level + uint_t(1) ); // [start] coalescence (initiated by coarse level)
+      }
+      WALBERLA_LOG_DETAIL("End communication fine to coarse, initiated by coarse level " << level );
       endCommunicationFineToCoarse( level + uint_t(1) ); // [end] coalescence (initiated by coarse level)
    }
-   
-   finishStream( blocks, level, executionCount2nd );
-}
 
+   WALBERLA_LOG_DETAIL("Finish stream on level " << level );
+   finishStream( blocks, level, executionCount2nd );
 
+   WALBERLA_LOG_DETAIL("End recursive step on level " << level );
+}
 
 
 
diff --git a/src/lbm/refinement/all.h b/src/lbm/refinement/all.h
index 02846718f6b2ea122c7a301f214188549acc6458..a3a353488c9529dab22749a5735954a4551b0d1b 100644
--- a/src/lbm/refinement/all.h
+++ b/src/lbm/refinement/all.h
@@ -25,6 +25,7 @@
 #include "BoundarySetup.h"
 #include "PdfFieldPackInfo.h"
 #include "PdfFieldSyncPackInfo.h"
+#include "RefinementFunctorWrapper.h"
 #include "TimeStep.h"
 #include "TimeStepPdfPackInfo.h"
 #include "TimeTracker.h"
diff --git a/src/pe_coupling/mapping/BodyBBMapping.cpp b/src/pe_coupling/mapping/BodyBBMapping.cpp
index 7cec1f1fd591d25f6c02762d85039711267a8a9c..c3726a574fd68c97b6dc5329311f6bb927e0fb77 100644
--- a/src/pe_coupling/mapping/BodyBBMapping.cpp
+++ b/src/pe_coupling/mapping/BodyBBMapping.cpp
@@ -28,7 +28,7 @@
 namespace walberla {
 namespace pe_coupling {
 
-CellInterval getCellBB( const pe::ConstBodyID body, const IBlock & block, const shared_ptr<StructuredBlockStorage> & blockStorage,
+CellInterval getCellBB( const pe::ConstBodyID body, const IBlock & block, StructuredBlockStorage & blockStorage,
                         const uint_t numberOfGhostLayersToInclude )
 {
 
@@ -38,16 +38,33 @@ CellInterval getCellBB( const pe::ConstBodyID body, const IBlock & block, const
 
    if( body->isFinite() )
    {
-      blockStorage->getCellBBFromAABB( cellBB, body->getAABB(), blockStorage->getLevel(block) );
-   } else
+      blockStorage.getCellBBFromAABB( cellBB, body->getAABB(), blockStorage.getLevel(block) );
+   }
+   else
    {
-      blockStorage->getCellBBFromAABB( cellBB, body->getAABB().getIntersection( blockStorage->getDomain() ), blockStorage->getLevel(block) );
+      // if body is infinite (global), its AABB is also infinite which then requires special treatment
+
+      auto level = blockStorage.getLevel(block);
+      const real_t dx = blockStorage.dx(level);
+      const real_t dy = blockStorage.dy(level);
+      const real_t dz = blockStorage.dz(level);
+      Vector3<real_t> aabbExtensionByGhostLayers(real_c(numberOfGhostLayersToInclude) * dx,
+                                                 real_c(numberOfGhostLayersToInclude) * dy,
+                                                 real_c(numberOfGhostLayersToInclude) * dz);
+      auto extendedBlockAABB = blockStorage.getAABB(block.getId()).getExtended( aabbExtensionByGhostLayers );
+
+      // intersect the infinte (global) body with the block AABB, extended by its ghost layers
+      // then determine the cell bounding box of the intersection
+      blockStorage.getCellBBFromAABB( cellBB, body->getAABB().getIntersection( extendedBlockAABB ), level );
+
+      // if infinte body does not intersect with the extended block AABB, return an empty interval
+      if( cellBB.empty() ) return CellInterval();
    }
 
    cellBB.xMin() -= cell_idx_t(1); cellBB.yMin() -= cell_idx_t(1); cellBB.zMin() -= cell_idx_t(1);
    cellBB.xMax() += cell_idx_t(1); cellBB.yMax() += cell_idx_t(1); cellBB.zMax() += cell_idx_t(1);
 
-   CellInterval blockBB = blockStorage->getBlockCellBB( block );
+   CellInterval blockBB = blockStorage.getBlockCellBB( block );
 
    cell_idx_t layers = cell_idx_c( numberOfGhostLayersToInclude );
 
@@ -56,12 +73,10 @@ CellInterval getCellBB( const pe::ConstBodyID body, const IBlock & block, const
 
    cellBB.intersect( blockBB );
 
-   blockStorage->transformGlobalToBlockLocalCellInterval( cellBB, block );
+   blockStorage.transformGlobalToBlockLocalCellInterval( cellBB, block );
 
    return cellBB;
 }
 
-
-
 } // namespace pe_coupling
 } // namespace walberla
diff --git a/src/pe_coupling/mapping/BodyBBMapping.h b/src/pe_coupling/mapping/BodyBBMapping.h
index a015ac972dd98bb75ca92f25fed19a09ea538710..59b58680f39ac24cebc373684f35c0a668800424 100644
--- a/src/pe_coupling/mapping/BodyBBMapping.h
+++ b/src/pe_coupling/mapping/BodyBBMapping.h
@@ -32,7 +32,7 @@ namespace walberla {
 namespace pe_coupling {
 
 
-CellInterval getCellBB( const pe::ConstBodyID body, const IBlock & block, const shared_ptr<StructuredBlockStorage> & blockStorage,
+CellInterval getCellBB( const pe::ConstBodyID body, const IBlock & block, StructuredBlockStorage & blockStorage,
                         const uint_t numberOfGhostLayersToInclude );
 
 
diff --git a/src/pe_coupling/mapping/BodyMapping.h b/src/pe_coupling/mapping/BodyMapping.h
index 6df7a1341381f868b6c6d46e6134fa48f7fc1f81..2e70c14398937f9ccb4077daa43b0826fbc29b52 100644
--- a/src/pe_coupling/mapping/BodyMapping.h
+++ b/src/pe_coupling/mapping/BodyMapping.h
@@ -37,24 +37,16 @@
 namespace walberla {
 namespace pe_coupling {
 
-
-
+// general mapping functions for a given single body on a given single block
 template< typename BoundaryHandling_T >
-void mapBody( const pe::BodyID & body, IBlock & block, const shared_ptr<StructuredBlockStorage> & blockStorage,
-              const BlockDataID & boundaryHandlingID, const FlagUID & obstacle,
-              const bool fixedBodiesOnly = true, const bool moBodiesOnly = true )
+void mapBody( const pe::BodyID & body, IBlock & block, StructuredBlockStorage & blockStorage,
+              BoundaryHandling_T * boundaryHandlingPtr, const FlagUID & obstacle )
 {
-   WALBERLA_ASSERT_EQUAL( &block.getBlockStorage(), &(blockStorage->getBlockStorage()) );
-
-   WALBERLA_UNUSED(moBodiesOnly); // undo when other coupling algorithms are available
+   WALBERLA_ASSERT_EQUAL( &block.getBlockStorage(), &(blockStorage.getBlockStorage()) );
+   WALBERLA_ASSERT_NOT_NULLPTR( boundaryHandlingPtr );
 
-   if( ( fixedBodiesOnly && !body->isFixed() ) /*|| ( moBodiesOnly && !isMOBody( body ) )*/ )
-      return;
-
-   BoundaryHandling_T * boundaryHandling = block.getData< BoundaryHandling_T >( boundaryHandlingID );
-   auto *               flagField        = boundaryHandling->getFlagField();
+   auto * flagField = boundaryHandlingPtr->getFlagField();
 
-   WALBERLA_ASSERT_NOT_NULLPTR( boundaryHandling );
    WALBERLA_ASSERT_NOT_NULLPTR( flagField );
    WALBERLA_ASSERT( flagField->flagExists( obstacle ) );
 
@@ -62,10 +54,12 @@ void mapBody( const pe::BodyID & body, IBlock & block, const shared_ptr<Structur
 
    CellInterval cellBB = getCellBB( body, block, blockStorage, flagField->nrOfGhostLayers() );
 
-   Vector3<real_t> startCellCenter = blockStorage->getBlockLocalCellCenter( block, cellBB.min() );
-   const real_t dx = blockStorage->dx( blockStorage->getLevel(block) );
-   const real_t dy = blockStorage->dy( blockStorage->getLevel(block) );
-   const real_t dz = blockStorage->dz( blockStorage->getLevel(block) );
+   if( cellBB.empty() ) return;
+
+   Vector3<real_t> startCellCenter = blockStorage.getBlockLocalCellCenter( block, cellBB.min() );
+   const real_t dx = blockStorage.dx( blockStorage.getLevel(block) );
+   const real_t dy = blockStorage.dy( blockStorage.getLevel(block) );
+   const real_t dz = blockStorage.dz( blockStorage.getLevel(block) );
 
    real_t cz = startCellCenter[2];
    for( cell_idx_t z = cellBB.zMin(); z <= cellBB.zMax(); ++z )
@@ -77,58 +71,119 @@ void mapBody( const pe::BodyID & body, IBlock & block, const shared_ptr<Structur
          for( cell_idx_t x = cellBB.xMin(); x <= cellBB.xMax(); ++x )
          {
             if( body->containsPoint(cx,cy,cz) )
-               boundaryHandling->forceBoundary( obstacleFlag, x, y, z );
+               boundaryHandlingPtr->forceBoundary( obstacleFlag, x, y, z );
             cx += dx;
          }
          cy += dy;
       }
       cz += dz;
    }
+}
+
+template< typename BoundaryHandling_T >
+void mapBody( const pe::BodyID & body, IBlock & block, StructuredBlockStorage & blockStorage,
+              const BlockDataID & boundaryHandlingID, const FlagUID & obstacle )
+{
+   WALBERLA_ASSERT_EQUAL( &block.getBlockStorage(), &(blockStorage.getBlockStorage()) );
 
+   BoundaryHandling_T * boundaryHandling = block.getData< BoundaryHandling_T >( boundaryHandlingID );
 
+   mapBody(body, block, blockStorage, boundaryHandling, obstacle );
 }
 
 
 
+// mapping function to map all bodies from the body storage - with certain properties - onto all blocks
 template< typename BoundaryHandling_T >
-void mapBodies( const shared_ptr<StructuredBlockStorage> & blockStorage, const BlockDataID & boundaryHandlingID, const BlockDataID & bodyStorageID, const FlagUID & obstacle,
+void mapBodies( StructuredBlockStorage & blockStorage, const BlockDataID & boundaryHandlingID,
+                const BlockDataID & bodyStorageID, const FlagUID & obstacle,
                 const bool fixedBodiesOnly = true, const bool moBodiesOnly = true )
 {
-   for( auto blockIt = blockStorage->begin(); blockIt != blockStorage->end(); ++blockIt )
+   for( auto blockIt = blockStorage.begin(); blockIt != blockStorage.end(); ++blockIt )
    {
       for( auto bodyIt = pe::BodyIterator::begin(*blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
-         mapBody< BoundaryHandling_T >( *bodyIt, *blockIt, blockStorage, boundaryHandlingID, obstacle, fixedBodiesOnly, moBodiesOnly );
+      {
+         WALBERLA_UNUSED(moBodiesOnly); // undo when other coupling algorithms are available
+
+         if( fixedBodiesOnly && bodyIt->isFixed() )
+            continue;
+
+         mapBody<BoundaryHandling_T>( *bodyIt, *blockIt, blockStorage, boundaryHandlingID, obstacle );
+      }
    }
 }
 
+
+// mapping function to map all global bodies - with certain properties - onto all blocks
 template< typename BoundaryHandling_T >
-void mapGlobalBodies( const shared_ptr<StructuredBlockStorage> & blockStorage, const BlockDataID & boundaryHandlingID, pe::BodyStorage & globalBodyStorage, const FlagUID & obstacle,
+void mapGlobalBodies( StructuredBlockStorage & blockStorage, const BlockDataID & boundaryHandlingID,
+                      pe::BodyStorage & globalBodyStorage, const FlagUID & obstacle,
                       const bool fixedBodiesOnly = true, const bool moBodiesOnly = true )
 {
-   for( auto blockIt = blockStorage->begin(); blockIt != blockStorage->end(); ++blockIt )
+   for( auto blockIt = blockStorage.begin(); blockIt != blockStorage.end(); ++blockIt )
    {
-      for( auto bodyIt = globalBodyStorage.begin(); bodyIt != globalBodyStorage.end(); ++bodyIt)
+      for( auto bodyIt = globalBodyStorage.begin(); bodyIt != globalBodyStorage.end(); ++bodyIt )
       {
-         mapBody< BoundaryHandling_T >( *bodyIt, *blockIt, blockStorage, boundaryHandlingID, obstacle, fixedBodiesOnly, moBodiesOnly );
+         WALBERLA_UNUSED(moBodiesOnly); // undo when other coupling algorithms are available
+
+         if( fixedBodiesOnly && bodyIt->isFixed() )
+            continue;
+
+         mapBody< BoundaryHandling_T >( *bodyIt, *blockIt, blockStorage, boundaryHandlingID, obstacle );
       }
    }
 }
 
+// mapping function to map a given single global body onto all blocks
 template< typename BoundaryHandling_T >
 void mapGlobalBody( const id_t globalBodySystemID,
-                    const shared_ptr<StructuredBlockStorage> & blockStorage, const BlockDataID & boundaryHandlingID, pe::BodyStorage & globalBodyStorage, const FlagUID & obstacle,
-                    const bool fixedBodiesOnly = true, const bool moBodiesOnly = true )
+                    StructuredBlockStorage & blockStorage, const BlockDataID & boundaryHandlingID,
+                    pe::BodyStorage & globalBodyStorage, const FlagUID & obstacle )
 {
-   for( auto blockIt = blockStorage->begin(); blockIt != blockStorage->end(); ++blockIt )
+   for( auto blockIt = blockStorage.begin(); blockIt != blockStorage.end(); ++blockIt )
    {
       auto bodyIt = globalBodyStorage.find( globalBodySystemID );
       if( bodyIt != globalBodyStorage.end() )
       {
-         mapBody< BoundaryHandling_T >( *bodyIt, *blockIt, blockStorage, boundaryHandlingID, obstacle, fixedBodiesOnly, moBodiesOnly );
+         mapBody< BoundaryHandling_T >( *bodyIt, *blockIt, blockStorage, boundaryHandlingID, obstacle );
       }
    }
 }
 
 
+// mapping function to map all global bodies - with certain properties - onto a given single block
+template< typename BoundaryHandling_T >
+void mapGlobalBodiesOnBlock( IBlock & block,
+                             StructuredBlockStorage & blockStorage, BoundaryHandling_T * boundaryHandlingPtr,
+                             pe::BodyStorage & globalBodyStorage, const FlagUID & obstacle,
+                             const bool fixedBodiesOnly = true, const bool moBodiesOnly = true )
+{
+   for( auto bodyIt = globalBodyStorage.begin(); bodyIt != globalBodyStorage.end(); ++bodyIt)
+   {
+      WALBERLA_UNUSED(moBodiesOnly); // undo when other coupling algorithms are available
+
+      if( fixedBodiesOnly && bodyIt->isFixed() )
+         continue;
+
+      mapBody< BoundaryHandling_T >( *bodyIt, block, blockStorage, boundaryHandlingPtr, obstacle );
+   }
+}
+
+
+// mapping function to map a given single global body onto a given single block
+template< typename BoundaryHandling_T >
+void mapGlobalBodyOnBlock( const id_t globalBodySystemID, IBlock & block,
+                           StructuredBlockStorage & blockStorage, BoundaryHandling_T * boundaryHandlingPtr,
+                           pe::BodyStorage & globalBodyStorage, const FlagUID & obstacle )
+{
+   auto bodyIt = globalBodyStorage.find( globalBodySystemID );
+   if( bodyIt != globalBodyStorage.end() )
+   {
+      mapBody< BoundaryHandling_T >( *bodyIt, block, blockStorage, boundaryHandlingPtr, obstacle );
+   }
+
+}
+
+
 } // namespace pe_coupling
 } // namespace walberla
diff --git a/src/pe_coupling/momentum_exchange_method/BodyMapping.h b/src/pe_coupling/momentum_exchange_method/BodyMapping.h
index 732b31fe67bc483c3a6e52ad6daf85560c3fc726..77d5cf401ce455d5b936a77031214beb62a73d17 100644
--- a/src/pe_coupling/momentum_exchange_method/BodyMapping.h
+++ b/src/pe_coupling/momentum_exchange_method/BodyMapping.h
@@ -108,7 +108,7 @@ void BodyMapping< BoundaryHandling_T >::operator()( IBlock * const block ) const
 
       // policy: every body manages only its own flags
 
-      CellInterval cellBB = getCellBB( *bodyIt, *block, blockStorage_, flagField->nrOfGhostLayers() );
+      CellInterval cellBB = getCellBB( *bodyIt, *block, *blockStorage_, flagField->nrOfGhostLayers() );
 
       Vector3<real_t> startCellCenter = blockStorage_->getBlockLocalCellCenter( *block, cellBB.min() );
       const real_t dx = blockStorage_->dx( blockStorage_->getLevel(*block) );
@@ -174,12 +174,12 @@ void BodyMapping< BoundaryHandling_T >::operator()( IBlock * const block ) const
 ////////////////////////////
 
 template< typename BoundaryHandling_T >
-void mapMovingBody( const pe::BodyID body, IBlock & block, const shared_ptr<StructuredBlockStorage> & blockStorage,
+void mapMovingBody( const pe::BodyID body, IBlock & block, StructuredBlockStorage & blockStorage,
                     const BlockDataID & boundaryHandlingID, const BlockDataID & bodyFieldID, const FlagUID & obstacle )
 {
    typedef Field< pe::BodyID, 1 > BodyField_T;
 
-   WALBERLA_ASSERT_EQUAL( &block.getBlockStorage(), &(blockStorage->getBlockStorage()) );
+   WALBERLA_ASSERT_EQUAL( &block.getBlockStorage(), &(blockStorage.getBlockStorage()) );
 
    if( body->isFixed() /*|| !body->isFinite()*/ )
       return;
@@ -198,10 +198,12 @@ void mapMovingBody( const pe::BodyID body, IBlock & block, const shared_ptr<Stru
 
    CellInterval cellBB = getCellBB( body, block, blockStorage, flagField->nrOfGhostLayers() );
 
-   Vector3<real_t> startCellCenter = blockStorage->getBlockLocalCellCenter( block, cellBB.min() );
-   const real_t dx = blockStorage->dx( blockStorage->getLevel(block) );
-   const real_t dy = blockStorage->dy( blockStorage->getLevel(block) );
-   const real_t dz = blockStorage->dz( blockStorage->getLevel(block) );
+   if( cellBB.empty() ) return;
+
+   Vector3<real_t> startCellCenter = blockStorage.getBlockLocalCellCenter( block, cellBB.min() );
+   const real_t dx = blockStorage.dx( blockStorage.getLevel(block) );
+   const real_t dy = blockStorage.dy( blockStorage.getLevel(block) );
+   const real_t dz = blockStorage.dz( blockStorage.getLevel(block) );
 
    real_t cz = startCellCenter[2];
    for( cell_idx_t z = cellBB.zMin(); z <= cellBB.zMax(); ++z )
@@ -230,10 +232,10 @@ void mapMovingBody( const pe::BodyID body, IBlock & block, const shared_ptr<Stru
 
 
 template< typename BoundaryHandling_T >
-void mapMovingBodies( const shared_ptr<StructuredBlockStorage> & blockStorage, const BlockDataID & boundaryHandlingID, const BlockDataID & bodyStorageID,
+void mapMovingBodies( StructuredBlockStorage & blockStorage, const BlockDataID & boundaryHandlingID, const BlockDataID & bodyStorageID,
                       const BlockDataID & bodyFieldID, const FlagUID & obstacle )
 {
-   for( auto blockIt = blockStorage->begin(); blockIt != blockStorage->end(); ++blockIt )
+   for( auto blockIt = blockStorage.begin(); blockIt != blockStorage.end(); ++blockIt )
    {
        for (auto bodyIt = pe::BodyIterator::begin(*blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt)
            mapMovingBody< BoundaryHandling_T >( *bodyIt, *blockIt, blockStorage, boundaryHandlingID, bodyFieldID, obstacle );
@@ -241,10 +243,10 @@ void mapMovingBodies( const shared_ptr<StructuredBlockStorage> & blockStorage, c
 }
 
 template< typename BoundaryHandling_T >
-void mapMovingGlobalBodies( const shared_ptr<StructuredBlockStorage> & blockStorage, const BlockDataID & boundaryHandlingID, pe::BodyStorage & globalBodyStorage,
-                            const BlockDataID & bodyFieldID, const FlagUID & obstacle )
+void mapMovingGlobalBodies( StructuredBlockStorage & blockStorage, const BlockDataID & boundaryHandlingID,
+                            pe::BodyStorage & globalBodyStorage, const BlockDataID & bodyFieldID, const FlagUID & obstacle )
 {
-   for( auto blockIt = blockStorage->begin(); blockIt != blockStorage->end(); ++blockIt )
+   for( auto blockIt = blockStorage.begin(); blockIt != blockStorage.end(); ++blockIt )
    {
       for( auto bodyIt = globalBodyStorage.begin(); bodyIt != globalBodyStorage.end(); ++bodyIt)
       {
@@ -255,13 +257,13 @@ void mapMovingGlobalBodies( const shared_ptr<StructuredBlockStorage> & blockStor
 
 template< typename BoundaryHandling_T >
 void mapMovingGlobalBody( const id_t globalBodySystemID,
-                          const shared_ptr<StructuredBlockStorage> & blockStorage, const BlockDataID & boundaryHandlingID, pe::BodyStorage & globalBodyStorage,
-                          const BlockDataID & bodyFieldID, const FlagUID & obstacle )
+                          StructuredBlockStorage & blockStorage, const BlockDataID & boundaryHandlingID,
+                          pe::BodyStorage & globalBodyStorage, const BlockDataID & bodyFieldID, const FlagUID & obstacle )
 {
-   for( auto blockIt = blockStorage->begin(); blockIt != blockStorage->end(); ++blockIt )
+   for( auto blockIt = blockStorage.begin(); blockIt != blockStorage.end(); ++blockIt )
    {
       auto bodyIt = globalBodyStorage.find( globalBodySystemID );
-      if( bodyIt !=  globalBodyStorage.end() )
+      if( bodyIt != globalBodyStorage.end() )
       {
          mapMovingBody< BoundaryHandling_T >( *bodyIt, *blockIt, blockStorage, boundaryHandlingID, bodyFieldID, obstacle );
       }
diff --git a/src/pe_coupling/momentum_exchange_method/boundary/CurvedLinear.h b/src/pe_coupling/momentum_exchange_method/boundary/CurvedLinear.h
index fd1f3d4b39855caee21eca2c8cb5f5dbd7a4cbde..b23b33c83ef134373c739a7234c75fbce4073363 100644
--- a/src/pe_coupling/momentum_exchange_method/boundary/CurvedLinear.h
+++ b/src/pe_coupling/momentum_exchange_method/boundary/CurvedLinear.h
@@ -64,8 +64,6 @@ namespace pe_coupling {
 template< typename LatticeModel_T, typename FlagField_T >
 class CurvedLinear : public Boundary< typename FlagField_T::flag_t >
 {
-   static_assert( LatticeModel_T::compressible == false,                                                           "Only works with incompressible models!" );
-
    typedef lbm::PdfField< LatticeModel_T >   PDFField_T;
    typedef typename LatticeModel_T::Stencil  Stencil_T;
    typedef typename FlagField_T::flag_t      flag_t;
@@ -170,6 +168,9 @@ inline void CurvedLinear< LatticeModel_T, FlagField_T >::treatDirection( const c
    // depending on the implementation for the specific body, either an analytical formula (e.g. for the sphere) or a line search algorithm is used
    real_t delta = lbm::intersectionRatio( body, cellCenter, direction, tolerance );
 
+   WALBERLA_ASSERT_LESS_EQUAL( delta, real_t(1));
+   WALBERLA_ASSERT_GREATER_EQUAL( delta, real_t(0));
+
    real_t pdf_new = real_t(0);
    real_t alpha = real_t(0);
 
@@ -178,7 +179,7 @@ inline void CurvedLinear< LatticeModel_T, FlagField_T >::treatDirection( const c
    const cell_idx_t yff = y - cell_idx_c( stencil::cy[ dir ] );
    const cell_idx_t zff = z - cell_idx_c( stencil::cz[ dir ] );
 
-   // check if this cell is a valid (i.e. fluid) cell
+   // check if CLI can be applied, i.e. the further-away-cell has to be a valid (i.e. fluid) cell
    if( flagField_->isPartOfMaskSet( xff, yff, zff, domainMask_ ) )
    {
       // apply CLI scheme
@@ -211,9 +212,21 @@ inline void CurvedLinear< LatticeModel_T, FlagField_T >::treatDirection( const c
    auto boundaryVelocity = body.velFromWF( boundaryPosition );
 
    // include effect of boundary velocity
-   pdf_new -= real_c(3.0) * alpha * LatticeModel_T::w[ Stencil_T::idx[dir] ] * ( real_c( stencil::cx[ dir ] ) * boundaryVelocity[0] +
-                                                                               real_c( stencil::cy[ dir ] ) * boundaryVelocity[1] +
-                                                                               real_c( stencil::cz[ dir ] ) * boundaryVelocity[2] );
+   if( LatticeModel_T::compressible )
+   {
+      const auto density  = pdfField_->getDensity(x,y,z);
+      pdf_new -= real_c(3.0) * alpha * density * LatticeModel_T::w[ Stencil_T::idx[dir] ] *
+                 ( real_c( stencil::cx[ dir ] ) * boundaryVelocity[0] +
+                   real_c( stencil::cy[ dir ] ) * boundaryVelocity[1] +
+                   real_c( stencil::cz[ dir ] ) * boundaryVelocity[2] );
+   }
+   else
+   {
+      pdf_new -= real_c(3.0) * alpha * LatticeModel_T::w[ Stencil_T::idx[dir] ] *
+                 ( real_c( stencil::cx[ dir ] ) * boundaryVelocity[0] +
+                   real_c( stencil::cy[ dir ] ) * boundaryVelocity[1] +
+                   real_c( stencil::cz[ dir ] ) * boundaryVelocity[2] );
+   }
 
    // carry out the boundary handling
    pdfField_->get( nx, ny, nz, Stencil_T::invDirIdx(dir) ) = pdf_new;
@@ -246,6 +259,7 @@ inline void CurvedLinear< LatticeModel_T, FlagField_T >::treatDirection( const c
    // add the force onto the body at the obstacle boundary
    body.addForceAtPos( force, boundaryPosition );
 
+   /*
    WALBERLA_LOG_DETAIL_SECTION() {
       std::stringstream ss;
       ss << "MOBoundary in cell <" << x << ", " << y << ", " << z << "> in dir <"
@@ -255,7 +269,7 @@ inline void CurvedLinear< LatticeModel_T, FlagField_T >::treatDirection( const c
          << ", with pdf_old=" << pdf_old << ", pdf_new=" << pdf_new;
       WALBERLA_LOG_DETAIL( ss.str() );
    }
-
+   */
 }
 
 } // namespace pe_coupling
diff --git a/src/pe_coupling/momentum_exchange_method/boundary/CurvedQuadratic.h b/src/pe_coupling/momentum_exchange_method/boundary/CurvedQuadratic.h
index fef3a0b6b96a33d48c550d646cd32921cf1c23d1..fa0b22852f6997a4ed9a4e7801ccc48222405c98 100644
--- a/src/pe_coupling/momentum_exchange_method/boundary/CurvedQuadratic.h
+++ b/src/pe_coupling/momentum_exchange_method/boundary/CurvedQuadratic.h
@@ -73,7 +73,6 @@ template< typename LatticeModel_T, typename FlagField_T >
 class CurvedQuadratic : public Boundary< typename FlagField_T::flag_t >
 {
    static_assert( (boost::is_same< typename LatticeModel_T::CollisionModel::tag, lbm::collision_model::TRT_tag >::value), "Only works with TRT!" ); // to access lambda_d
-   static_assert( LatticeModel_T::compressible == false,                                                                  "Only works with incompressible models!" );
 
    typedef lbm::PdfField< LatticeModel_T >   PDFField_T;
    typedef typename LatticeModel_T::Stencil  Stencil_T;
@@ -185,6 +184,9 @@ inline void CurvedQuadratic< LatticeModel_T, FlagField_T >::treatDirection( cons
    // depending on the implementation for the specific body, either an analytical formula (e.g. for the sphere) or a line search algorithm is used
    const real_t delta = lbm::intersectionRatio( body, cellCenter, direction, tolerance );
 
+   WALBERLA_ASSERT_LESS_EQUAL( delta, real_t(1));
+   WALBERLA_ASSERT_GREATER_EQUAL( delta, real_t(0));
+
    bool useMR1full = false;
 
    real_t pdf_new = real_c(0);
@@ -197,7 +199,7 @@ inline void CurvedQuadratic< LatticeModel_T, FlagField_T >::treatDirection( cons
 
    auto domainWithGhostlayer = pdfField_->xyzSizeWithGhostLayer();
 
-   // check if this cell is a valid (i.e. fluid) cell
+   // check if MR can be applied, i.e. the further-away-cell has to be a valid (i.e. fluid) cell
    if( flagField_->isPartOfMaskSet( xff, yff, zff, domainMask_ ) )
    {
       // MR1 scheme can be applied
@@ -320,9 +322,21 @@ inline void CurvedQuadratic< LatticeModel_T, FlagField_T >::treatDirection( cons
    auto boundaryVelocity = body.velFromWF( boundaryPosition );
 
    // include effect of boundary velocity
-   pdf_new -= real_c(3.0) * alpha * LatticeModel_T::w[ Stencil_T::idx[dir] ] * ( real_c( stencil::cx[ dir ] ) * boundaryVelocity[0] +
-                                                                               real_c( stencil::cy[ dir ] ) * boundaryVelocity[1] +
-                                                                               real_c( stencil::cz[ dir ] ) * boundaryVelocity[2] );
+   if( LatticeModel_T::compressible )
+   {
+      const auto density  = pdfField_->getDensity(x,y,z);
+      pdf_new -= real_c(3.0) * alpha * density * LatticeModel_T::w[ Stencil_T::idx[dir] ] *
+                 ( real_c( stencil::cx[ dir ] ) * boundaryVelocity[0] +
+                   real_c( stencil::cy[ dir ] ) * boundaryVelocity[1] +
+                   real_c( stencil::cz[ dir ] ) * boundaryVelocity[2] );
+   }
+   else
+   {
+      pdf_new -= real_c(3.0) * alpha * LatticeModel_T::w[ Stencil_T::idx[dir] ] *
+                 ( real_c( stencil::cx[ dir ] ) * boundaryVelocity[0] +
+                   real_c( stencil::cy[ dir ] ) * boundaryVelocity[1] +
+                   real_c( stencil::cz[ dir ] ) * boundaryVelocity[2] );
+   }
 
    // carry out the boundary handling
    pdfField_->get( nx, ny, nz, Stencil_T::invDirIdx(dir) ) = pdf_new;
@@ -355,6 +369,7 @@ inline void CurvedQuadratic< LatticeModel_T, FlagField_T >::treatDirection( cons
    // add the force onto the body at the obstacle boundary
    body.addForceAtPos( force, boundaryPosition );
 
+   /*
    WALBERLA_LOG_DETAIL_SECTION() {
       std::stringstream ss;
       ss << "MOBoundary in cell <" << x << ", " << y << ", " << z << "> in dir <"
@@ -364,7 +379,7 @@ inline void CurvedQuadratic< LatticeModel_T, FlagField_T >::treatDirection( cons
          << ", with pdf_old=" << pdf_old << ", pdf_new=" << pdf_new;
       WALBERLA_LOG_DETAIL( ss.str() );
    }
-
+   */
 }
 
 } // namespace pe_coupling
diff --git a/src/pe_coupling/momentum_exchange_method/boundary/SimpleBB.h b/src/pe_coupling/momentum_exchange_method/boundary/SimpleBB.h
index 0c7d3aae8c5b7352eb6426ef87d54e0986f54e1b..cfe78d575837e17f1f6adc63179bf48a5a3ab93b 100644
--- a/src/pe_coupling/momentum_exchange_method/boundary/SimpleBB.h
+++ b/src/pe_coupling/momentum_exchange_method/boundary/SimpleBB.h
@@ -217,7 +217,7 @@ inline void SimpleBB< LatticeModel_T, FlagField_T >::treatDirection( const cell_
    // add the force onto the body at the obstacle boundary
    body.addForceAtPos( force, boundaryPosition );
 
-
+   /*
    WALBERLA_LOG_DETAIL_SECTION() {
       std::stringstream ss;
       ss << "MOBoundary in cell <" << x << ", " << y << ", " << z << "> in dir <"
@@ -227,6 +227,7 @@ inline void SimpleBB< LatticeModel_T, FlagField_T >::treatDirection( const cell_
          << ", with pdf_old=" << pdf_old << ", pdf_new=" << pdf_new;
       WALBERLA_LOG_DETAIL( ss.str() );
    }
+   */
 }
 
 } // namespace pe_coupling
diff --git a/src/pe_coupling/momentum_exchange_method/restoration/PDFReconstruction.h b/src/pe_coupling/momentum_exchange_method/restoration/PDFReconstruction.h
index 9d751bf331336010acfab7c74f1f8a602449f841..f7ca0d80a2b7a29ea0dd4159b7865ac8cc211b34 100644
--- a/src/pe_coupling/momentum_exchange_method/restoration/PDFReconstruction.h
+++ b/src/pe_coupling/momentum_exchange_method/restoration/PDFReconstruction.h
@@ -125,7 +125,7 @@ void PDFReconstruction< LatticeModel_T, BoundaryHandling_T, Reconstructer_T >
          if( bodyIt->isFixed() )
             continue;
 
-         CellInterval cellBB = getCellBB( *bodyIt, *block, blockStorage_, numberOfGhostLayersToInclude );
+         CellInterval cellBB = getCellBB( *bodyIt, *block, *blockStorage_, numberOfGhostLayersToInclude );
 
          for( auto cellIt = cellBB.begin(); cellIt != cellBB.end(); ++cellIt )
          {
@@ -160,7 +160,7 @@ void PDFReconstruction< LatticeModel_T, BoundaryHandling_T, Reconstructer_T >
          if( bodyIt->isFixed() )
             continue;
 
-         CellInterval cellBB = getCellBB( *bodyIt, *block, blockStorage_, numberOfGhostLayersToInclude );
+         CellInterval cellBB = getCellBB( *bodyIt, *block, *blockStorage_, numberOfGhostLayersToInclude );
 
          for( cell_idx_t z = cellBB.zMin(); z <= cellBB.zMax(); ++z )
          {
diff --git a/src/pe_coupling/momentum_exchange_method/restoration/Reconstructor.h b/src/pe_coupling/momentum_exchange_method/restoration/Reconstructor.h
index 8acb17a1e97fb993156de1050e72ac25dc89ba85..7f9ce1017f6cc13c703ee18711c792b1d07f8c85 100644
--- a/src/pe_coupling/momentum_exchange_method/restoration/Reconstructor.h
+++ b/src/pe_coupling/momentum_exchange_method/restoration/Reconstructor.h
@@ -263,7 +263,7 @@ public:
    typedef lbm::PdfField< LatticeModel_T > PdfField_T;
    typedef Field< pe::BodyID, 1 >          BodyField_T;
 
-   ExtrapolationReconstructor( const shared_ptr<StructuredBlockStorage> blockStorage, const BlockDataID & boundaryHandlingID,
+   ExtrapolationReconstructor( const shared_ptr<StructuredBlockStorage> & blockStorage, const BlockDataID & boundaryHandlingID,
                                const BlockDataID & pdfFieldID, const BlockDataID & bodyFieldID,
                                const ExtrapolationDirectionFinder_T & extrapolationDirectionFinder,
                                const bool & enforceNoSlipConstraintAfterExtrapolation = false )
diff --git a/src/pe_coupling/partially_saturated_cells_method/BodyAndVolumeFractionMapping.h b/src/pe_coupling/partially_saturated_cells_method/BodyAndVolumeFractionMapping.h
index 88c1092f193c2e664a1f4ed0441eb489ff2c7c9a..1e461ebbd57c0ddc48e9027d090e66eee0b1d142 100644
--- a/src/pe_coupling/partially_saturated_cells_method/BodyAndVolumeFractionMapping.h
+++ b/src/pe_coupling/partially_saturated_cells_method/BodyAndVolumeFractionMapping.h
@@ -45,7 +45,7 @@ namespace pe_coupling {
  * As several bodies could intersect with one cell, the pairs are stored in a vector with the size of the amount of intersecting bodies.
  *
  */
-void mapPSMBodyAndVolumeFraction( const pe::BodyID body, IBlock & block, const shared_ptr<StructuredBlockStorage> & blockStorage,
+void mapPSMBodyAndVolumeFraction( const pe::BodyID body, IBlock & block, StructuredBlockStorage & blockStorage,
                                   const BlockDataID bodyAndVolumeFractionFieldID )
 {
    typedef std::pair< pe::BodyID, real_t >                              BodyAndVolumeFraction_T;
@@ -63,9 +63,9 @@ void mapPSMBodyAndVolumeFraction( const pe::BodyID body, IBlock & block, const s
 
       // get the cell's center
       Vector3<real_t> cellCenter;
-      cellCenter = blockStorage->getBlockLocalCellCenter( block, cell );
+      cellCenter = blockStorage.getBlockLocalCellCenter( block, cell );
 
-      const real_t fraction = overlapFractionPe( *body, cellCenter, blockStorage->dx( blockStorage->getLevel( block ) ) );
+      const real_t fraction = overlapFractionPe( *body, cellCenter, blockStorage.dx( blockStorage.getLevel( block ) ) );
 
       // if the cell intersected with the body, store a pointer to that body and the corresponding volume fraction in the field
       if( fraction > real_t(0) )
@@ -100,7 +100,7 @@ public:
    typedef GhostLayerField< std::vector< BodyAndVolumeFraction_T >, 1 > BodyAndVolumeFractionField_T;
 
    BodyAndVolumeFractionMapping( const shared_ptr<StructuredBlockStorage> & blockStorage,
-                                 const shared_ptr<pe::BodyStorage> globalBodyStorage,
+                                 const shared_ptr<pe::BodyStorage> & globalBodyStorage,
                                  const BlockDataID & bodyStorageID,
                                  const BlockDataID & bodyAndVolumeFractionFieldID,
                                  const bool mapFixedBodies = false,
@@ -180,7 +180,7 @@ void BodyAndVolumeFractionMapping::initialize()
          // only PSM and finite bodies (no planes, etc.) are mapped
          if( /*!isPSMBody( *bodyIt ) ||*/ ( bodyIt->isFixed() && !mapFixed_ ) )
             continue;
-         mapPSMBodyAndVolumeFraction( *bodyIt, *blockIt, blockStorage_, bodyAndVolumeFractionFieldID_ );
+         mapPSMBodyAndVolumeFraction( *bodyIt, *blockIt, *blockStorage_, bodyAndVolumeFractionFieldID_ );
          lastUpdatedPositionMap_.insert( std::pair< walberla::id_t, Vector3< real_t > >( bodyIt->getSystemID(), bodyIt->getPosition() ) );
       }
 
@@ -188,7 +188,7 @@ void BodyAndVolumeFractionMapping::initialize()
       {
          for( auto globalBodyIt = globalBodyStorage_->begin(); globalBodyIt != globalBodyStorage_->end(); ++globalBodyIt )
          {
-            mapPSMBodyAndVolumeFraction( *globalBodyIt, *blockIt, blockStorage_, bodyAndVolumeFractionFieldID_ );
+            mapPSMBodyAndVolumeFraction( *globalBodyIt, *blockIt, *blockStorage_, bodyAndVolumeFractionFieldID_ );
          }
       }
    }
@@ -252,7 +252,7 @@ void BodyAndVolumeFractionMapping::updatePSMBodyAndVolumeFraction( const pe::Bod
    }
 
    // get bounding box of body
-   CellInterval cellBB = getCellBB( body, block, blockStorage_, oldBodyAndVolumeFractionField->nrOfGhostLayers() );
+   CellInterval cellBB = getCellBB( body, block, *blockStorage_, oldBodyAndVolumeFractionField->nrOfGhostLayers() );
 
    // if body has not moved (specified by some epsilon), just reuse old fraction values
    if( body->getLinearVel().sqrLength()  < velocityUpdatingEpsilonSquared_ &&
@@ -315,7 +315,7 @@ void BodyAndVolumeFractionMapping::updateGlobalPSMBodyAndVolumeFraction( const p
    WALBERLA_ASSERT_NOT_NULLPTR( oldBodyAndVolumeFractionField );
 
    // get bounding box of body
-   CellInterval cellBB = getCellBB( body, block, blockStorage_, oldBodyAndVolumeFractionField->nrOfGhostLayers() );
+   CellInterval cellBB = getCellBB( body, block, *blockStorage_, oldBodyAndVolumeFractionField->nrOfGhostLayers() );
 
    // copy values of global body to new field
    for( auto cellIt = cellBB.begin(); cellIt != cellBB.end(); ++cellIt )
diff --git a/src/pe_coupling/partially_saturated_cells_method/PSMUtility.h b/src/pe_coupling/partially_saturated_cells_method/PSMUtility.h
index 526c0f994c3b631ebd55a792caaacbc20c024c6f..e39c3b7b227a252f5e45b96e23d591eb717ad2aa 100644
--- a/src/pe_coupling/partially_saturated_cells_method/PSMUtility.h
+++ b/src/pe_coupling/partially_saturated_cells_method/PSMUtility.h
@@ -46,7 +46,7 @@ template < typename LatticeModel_T, int Weighting_T >
 Vector3<real_t> getPSMMacroscopicVelocity( const IBlock & block,
                                            lbm::PdfField< LatticeModel_T > * pdfField,
                                            GhostLayerField< std::vector< std::pair< pe::BodyID, real_t > >, 1 > * bodyAndVolumeFractionField,
-                                           const shared_ptr<StructuredBlockStorage> & blockStorage,
+                                           StructuredBlockStorage & blockStorage,
                                            const Cell & cell )
 {
    static_assert( LatticeModel_T::compressible == false, "Only works with incompressible models!" );
@@ -60,7 +60,7 @@ Vector3<real_t> getPSMMacroscopicVelocity( const IBlock & block,
 
    for( auto bodyFracIt = bodyAndVolumeFractionField->get( cell ).begin(); bodyFracIt != bodyAndVolumeFractionField->get( cell ).end(); ++bodyFracIt )
    {
-      const Vector3< real_t > coordsCellCenter = blockStorage->getBlockLocalCellCenter( block, cell );
+      const Vector3< real_t > coordsCellCenter = blockStorage.getBlockLocalCellCenter( block, cell );
 
       const real_t eps = (*bodyFracIt).second;
 
@@ -89,7 +89,7 @@ Vector3<real_t> getPSMMacroscopicVelocity( const IBlock & block,
  * Only the velocity of cells intersecting with bodies is set, pure fluid cells remain unchanged.
  */
 template < typename LatticeModel_T, int Weighting_T >
-void initializeDomainForPSM( const shared_ptr<StructuredBlockStorage> & blockStorage,
+void initializeDomainForPSM( StructuredBlockStorage & blockStorage,
                              const BlockDataID & pdfFieldID, const BlockDataID & bodyAndVolumeFractionFieldID )
 {
    typedef lbm::PdfField< LatticeModel_T >                              PdfField_T;
@@ -97,7 +97,7 @@ void initializeDomainForPSM( const shared_ptr<StructuredBlockStorage> & blockSto
    typedef GhostLayerField< std::vector< BodyAndVolumeFraction_T >, 1 > BodyAndVolumeFractionField_T;
 
    // iterate all blocks with an iterator 'block'
-   for( auto blockIt = blockStorage->begin(); blockIt != blockStorage->end(); ++blockIt )
+   for( auto blockIt = blockStorage.begin(); blockIt != blockStorage.end(); ++blockIt )
    {
       // get the field data out of the block
       PdfField_T* pdfField = blockIt->getData< PdfField_T > ( pdfFieldID );
@@ -115,7 +115,7 @@ void initializeDomainForPSM( const shared_ptr<StructuredBlockStorage> & blockSto
 
          for( auto bodyFracIt = bodyAndVolumeFractionField->get(cell).begin(); bodyFracIt != bodyAndVolumeFractionField->get(cell).end(); ++bodyFracIt )
          {
-            const Vector3< real_t > coordsCellCenter = blockStorage->getBlockLocalCellCenter( *blockIt, cell );
+            const Vector3< real_t > coordsCellCenter = blockStorage.getBlockLocalCellCenter( *blockIt, cell );
 
             const real_t eps = (*bodyFracIt).second;
 
diff --git a/src/pe_coupling/utility/LubricationCorrection.h b/src/pe_coupling/utility/LubricationCorrection.h
index 0903c3ffd0ed7b43f30caedb822bce9917b5a7b3..e64c74030b106e3bdb89edd306b6b02fd1f572d4 100644
--- a/src/pe_coupling/utility/LubricationCorrection.h
+++ b/src/pe_coupling/utility/LubricationCorrection.h
@@ -42,8 +42,8 @@ class LubricationCorrection
 public:
 
    // constructor
-   LubricationCorrection ( const shared_ptr<StructuredBlockStorage> & blockStorage, shared_ptr<pe::BodyStorage> globalBodyStorage, const BlockDataID & bodyStorageID,
-                           real_t dynamicViscosity, real_t cutOffDistance = real_t(2) / real_t(3) )
+   LubricationCorrection ( const shared_ptr<StructuredBlockStorage> & blockStorage, const shared_ptr<pe::BodyStorage> & globalBodyStorage,
+                           const BlockDataID & bodyStorageID, real_t dynamicViscosity, real_t cutOffDistance = real_t(2) / real_t(3) )
       : blockStorage_ ( blockStorage )
       , globalBodyStorage_( globalBodyStorage )
       , bodyStorageID_( bodyStorageID )
diff --git a/tests/pe_coupling/CMakeLists.txt b/tests/pe_coupling/CMakeLists.txt
index 44ce53a33729fb8fc68e13fd0dcb5650d1153aeb..8e9fc4f570a189ca3b94e4c2bea54eb2527c7529 100644
--- a/tests/pe_coupling/CMakeLists.txt
+++ b/tests/pe_coupling/CMakeLists.txt
@@ -89,13 +89,14 @@ waLBerla_execute_test( NAME DragForceSphereMEMRefinementSingleTest      COMMAND
 waLBerla_execute_test( NAME DragForceSphereMEMRefinementParallelTest    COMMAND $<TARGET_FILE:DragForceSphereMEMRefinement>                     PROCESSES 5 LABELS verylongrun CONFIGURATIONS Release RelWithDbgInfo )
 waLBerla_execute_test( NAME DragForceSphereMEMRefinementCLITest         COMMAND $<TARGET_FILE:DragForceSphereMEMRefinement> --MO_CLI            PROCESSES 4 LABELS verylongrun CONFIGURATIONS Release RelWithDbgInfo )
 
+waLBerla_compile_test( FILES momentum_exchange_method/GlobalBodyAsBoundaryMEMStaticRefinement.cpp DEPENDS blockforest pe timeloop )
+waLBerla_execute_test( NAME GlobalBodyAsBoundaryMEMStaticRefinementTest        COMMAND $<TARGET_FILE:GlobalBodyAsBoundaryMEMStaticRefinement>          PROCESSES 1 )
 
 waLBerla_compile_test( FILES momentum_exchange_method/LubricationCorrectionMEM.cpp DEPENDS blockforest timeloop )
 waLBerla_execute_test( NAME LubricationCorrectionMEMFuncTest         COMMAND $<TARGET_FILE:LubricationCorrectionMEM> --funcTest                   PROCESSES 3 )
 waLBerla_execute_test( NAME LubricationCorrectionMEMSphereSphereTest COMMAND $<TARGET_FILE:LubricationCorrectionMEM> --split --fzyx --sphSphTest  PROCESSES 6 LABELS longrun             CONFIGURATIONS Release RelWithDbgInfo )
 waLBerla_execute_test( NAME LubricationCorrectionMEMSphereWallTest   COMMAND $<TARGET_FILE:LubricationCorrectionMEM> --split --fzyx --sphWallTest PROCESSES 3 LABELS longrun verylongrun CONFIGURATIONS Release RelWithDbgInfo )
   
-  
 waLBerla_compile_test( FILES momentum_exchange_method/PeriodicParticleChannelMEM.cpp DEPENDS blockforest pe timeloop )
 waLBerla_execute_test( NAME PeriodicParticleChannelMEMTest COMMAND $<TARGET_FILE:PeriodicParticleChannelMEM> --shortrun PROCESSES 4 LABELS longrun )
 
@@ -115,7 +116,7 @@ waLBerla_execute_test( NAME SegreSilberbergMEMEanReconFuncTest    COMMAND $<TARG
 waLBerla_execute_test( NAME SegreSilberbergMEMEanReconTest        COMMAND $<TARGET_FILE:SegreSilberbergMEM> --MO_CLI --eanReconstructor   PROCESSES 18 LABELS verylongrun CONFIGURATIONS Release RelWithDbgInfo )
 waLBerla_execute_test( NAME SegreSilberbergMEMExtReconFuncTest    COMMAND $<TARGET_FILE:SegreSilberbergMEM> --funcTest --extReconstructor PROCESSES 9 )
 waLBerla_execute_test( NAME SegreSilberbergMEMExtReconTest        COMMAND $<TARGET_FILE:SegreSilberbergMEM> --MO_CLI --extReconstructor   PROCESSES 18 LABELS verylongrun CONFIGURATIONS Release RelWithDbgInfo )
-   
+
 waLBerla_compile_test( FILES momentum_exchange_method/TorqueSphereMEM.cpp DEPENDS blockforest pe timeloop )
 waLBerla_execute_test( NAME TorqueSphereMEMFuncTest        COMMAND $<TARGET_FILE:TorqueSphereMEM> --funcTest           PROCESSES 1 )
 waLBerla_execute_test( NAME TorqueSphereMEMSingleTest      COMMAND $<TARGET_FILE:TorqueSphereMEM>                      PROCESSES 1 LABELS longrun     CONFIGURATIONS Release RelWithDbgInfo )
diff --git a/tests/pe_coupling/momentum_exchange_method/BodyAtBlockBoarderCheck.cpp b/tests/pe_coupling/momentum_exchange_method/BodyAtBlockBoarderCheck.cpp
index 03cd4c395ae7b52c397c623b21cddf8aecb8f673..16e0afc6568ce793c9f0c72828d6c7d5338c7224 100644
--- a/tests/pe_coupling/momentum_exchange_method/BodyAtBlockBoarderCheck.cpp
+++ b/tests/pe_coupling/momentum_exchange_method/BodyAtBlockBoarderCheck.cpp
@@ -354,7 +354,7 @@ int main( int argc, char **argv )
                                     MyBoundaryHandling( flagFieldID, pdfFieldID, bodyFieldID ), "boundary handling" );
 
    // initially map pe bodies into the LBM simulation
-   pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,  MO_Flag );
+   pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,  MO_Flag );
 
    ///////////////
    // TIME LOOP //
diff --git a/tests/pe_coupling/momentum_exchange_method/DragForceSphereMEM.cpp b/tests/pe_coupling/momentum_exchange_method/DragForceSphereMEM.cpp
index 095fed5f11f8dcc5d0ac7f1b40a88f6827d31821..581f51deb50c6cd19d5058327726cb468f6443d1 100644
--- a/tests/pe_coupling/momentum_exchange_method/DragForceSphereMEM.cpp
+++ b/tests/pe_coupling/momentum_exchange_method/DragForceSphereMEM.cpp
@@ -13,7 +13,7 @@
 //  You should have received a copy of the GNU General Public License along
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file DragForceSphereMEMPe.cpp
+//! \file DragForceSphereMEM.cpp
 //! \ingroup pe_coupling
 //! \author Christoph Rettinger <christoph.rettinger@fau.de>
 //
@@ -64,7 +64,7 @@
 #include <iomanip>
 #include <iostream>
 
-namespace drag_force_sphere_mem_pe
+namespace drag_force_sphere_mem
 {
 
 ///////////
@@ -527,13 +527,13 @@ int main( int argc, char **argv )
    if( method == MEMVariant::CLI )
    {
       // uses a higher order boundary condition (CLI)
-      pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MO_CLI_Flag );
+      pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MO_CLI_Flag );
    }else if ( method == MEMVariant::MR ){
       // uses a higher order boundary condition (MR)
-      pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,  MO_MR_Flag );
+      pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,  MO_MR_Flag );
    }else{
       // uses standard bounce back boundary conditions
-      pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,  MO_BB_Flag );
+      pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,  MO_BB_Flag );
    }
 
    // since external forcing is applied, the evaluation of the velocity has to be carried out directly after the streaming step
@@ -608,8 +608,8 @@ int main( int argc, char **argv )
 
 }
 
-} //namespace drag_force_sphere_mem_pe
+} //namespace drag_force_sphere_mem
 
 int main( int argc, char **argv ){
-   drag_force_sphere_mem_pe::main(argc, argv);
+   drag_force_sphere_mem::main(argc, argv);
 }
diff --git a/tests/pe_coupling/momentum_exchange_method/DragForceSphereMEMRefinement.cpp b/tests/pe_coupling/momentum_exchange_method/DragForceSphereMEMRefinement.cpp
index 40fb413f62730015652120d39ecb2c747c261a7c..49ac5f453cf9daff48628f6e94394641de81be26 100644
--- a/tests/pe_coupling/momentum_exchange_method/DragForceSphereMEMRefinement.cpp
+++ b/tests/pe_coupling/momentum_exchange_method/DragForceSphereMEMRefinement.cpp
@@ -13,7 +13,7 @@
 //  You should have received a copy of the GNU General Public License along
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file DragForceSphereMEMPeRefinement.cpp
+//! \file DragForceSphereMEMRefinement.cpp
 //! \ingroup pe_coupling
 //! \author Christoph Rettinger <christoph.rettinger@fau.de>
 //
@@ -71,7 +71,7 @@
 #include <iomanip>
 #include <iostream>
 
-namespace drag_force_sphere_mem_pe_refinement
+namespace drag_force_sphere_mem_refinement
 {
 
 ///////////
@@ -549,10 +549,10 @@ int main( int argc, char **argv )
    if( MO_CLI )
    {
       // uses a higher order boundary condition (CLI)
-      pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MO_CLI_Flag );
+      pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MO_CLI_Flag );
    }else{
       // uses standard bounce back boundary conditions
-      pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,  MO_BB_Flag );
+      pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,  MO_BB_Flag );
    }
 
 
@@ -647,8 +647,8 @@ int main( int argc, char **argv )
 
 }
 
-} //namespace drag_force_sphere_mem_pe_refinement
+} //namespace drag_force_sphere_mem_refinement
 
 int main( int argc, char **argv ){
-   drag_force_sphere_mem_pe_refinement::main(argc, argv);
+   drag_force_sphere_mem_refinement::main(argc, argv);
 }
diff --git a/tests/pe_coupling/momentum_exchange_method/GlobalBodyAsBoundaryMEMStaticRefinement.cpp b/tests/pe_coupling/momentum_exchange_method/GlobalBodyAsBoundaryMEMStaticRefinement.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f3d40bc3afe96a461c6ca930d9b89be61c17dc10
--- /dev/null
+++ b/tests/pe_coupling/momentum_exchange_method/GlobalBodyAsBoundaryMEMStaticRefinement.cpp
@@ -0,0 +1,459 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file GlobalBodyAsBoundaryMEMStaticRefinement.cpp
+//! \ingroup pe_coupling
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/Initialization.h"
+#include "blockforest/communication/UniformBufferedScheme.h"
+
+#include "boundary/all.h"
+
+#include "core/DataTypes.h"
+#include "core/Environment.h"
+#include "core/SharedFunctor.h"
+#include "core/debug/Debug.h"
+#include "core/debug/TestSubsystem.h"
+#include "core/math/all.h"
+#include "core/timing/RemainingTimeLogger.h"
+
+#include "domain_decomposition/SharedSweep.h"
+
+#include "field/AddToStorage.h"
+#include "field/StabilityChecker.h"
+#include "field/communication/PackInfo.h"
+
+#include "lbm/boundary/all.h"
+#include "lbm/communication/PdfFieldPackInfo.h"
+#include "lbm/field/AddToStorage.h"
+#include "lbm/field/PdfField.h"
+#include "lbm/lattice_model/D3Q19.h"
+#include "lbm/refinement/all.h"
+#include "lbm/sweeps/CellwiseSweep.h"
+#include "lbm/sweeps/SweepWrappers.h"
+
+#include "pe/basic.h"
+#include "pe/Types.h"
+
+#include "pe_coupling/mapping/all.h"
+#include "pe_coupling/momentum_exchange_method/all.h"
+#include "pe_coupling/utility/all.h"
+
+#include "timeloop/SweepTimeloop.h"
+
+#include "vtk/all.h"
+#include "field/vtk/all.h"
+#include "lbm/vtk/all.h"
+
+namespace global_body_as_boundary_mem_static_refinement
+{
+
+///////////
+// USING //
+///////////
+
+using namespace walberla;
+using walberla::uint_t;
+
+//////////////
+// TYPEDEFS //
+//////////////
+
+// PDF field, flag field & body field
+typedef lbm::D3Q19< lbm::collision_model::TRT, false >  LatticeModel_T;
+typedef LatticeModel_T::Stencil          Stencil_T;
+typedef lbm::PdfField< LatticeModel_T > PdfField_T;
+
+typedef walberla::uint8_t                 flag_t;
+typedef FlagField< flag_t >               FlagField_T;
+typedef GhostLayerField< pe::BodyID, 1 >  BodyField_T;
+
+const uint_t FieldGhostLayers = 4;
+
+// boundary handling
+typedef pe_coupling::SimpleBB< LatticeModel_T, FlagField_T > MO_SBB_T;
+
+typedef boost::tuples::tuple< MO_SBB_T > BoundaryConditions_T;
+typedef BoundaryHandling< FlagField_T, Stencil_T, BoundaryConditions_T > BoundaryHandling_T;
+
+typedef boost::tuple< pe::Plane > BodyTypeTuple;
+
+///////////
+// FLAGS //
+///////////
+
+const FlagUID Fluid_Flag( "fluid" );
+const FlagUID MO_SBB_Flag( "moving obstacle SBB" );
+
+/////////////////////
+// BLOCK STRUCTURE //
+/////////////////////
+
+static void refinementSelection( SetupBlockForest& forest, uint_t levels, AABB refinementBox )
+{
+   real_t dx = real_t(1); // dx on finest level
+   for( auto block = forest.begin(); block != forest.end(); ++block )
+   {
+      uint_t blockLevel = block->getLevel();
+      uint_t levelScalingFactor = ( uint_t(1) << (levels - uint_t(1) - blockLevel) );
+      real_t dxOnLevel = dx * real_c(levelScalingFactor);
+      AABB blockAABB = block->getAABB();
+
+      // extend block AABB by ghostlayers
+      AABB extendedBlockAABB = blockAABB.getExtended( dxOnLevel * real_c(FieldGhostLayers) );
+
+      if( extendedBlockAABB.intersects( refinementBox ) )
+         if( blockLevel < ( levels - uint_t(1) ) )
+            block->setMarker( true );
+   }
+}
+
+static void workloadAndMemoryAssignment( SetupBlockForest& forest )
+{
+   for( auto block = forest.begin(); block != forest.end(); ++block )
+   {
+      block->setWorkload( numeric_cast< workload_t >( uint_t(1) << block->getLevel() ) );
+      block->setMemory( numeric_cast< memory_t >(1) );
+   }
+}
+
+static shared_ptr< StructuredBlockForest > createBlockStructure( const AABB & domainAABB, Vector3<uint_t> blockSizeInCells,
+                                                                 uint_t numberOfLevels, bool keepGlobalBlockInformation = false )
+{
+   SetupBlockForest sforest;
+
+   Vector3<uint_t> numberOfFineBlocksPerDirection( uint_c(domainAABB.size(0)) / blockSizeInCells[0],
+                                                   uint_c(domainAABB.size(1)) / blockSizeInCells[1],
+                                                   uint_c(domainAABB.size(2)) / blockSizeInCells[2] );
+
+   for(uint_t i = 0; i < 3; ++i )
+   {
+      WALBERLA_CHECK_EQUAL( numberOfFineBlocksPerDirection[i] * blockSizeInCells[i], uint_c(domainAABB.size(i)),
+                            "Domain can not be decomposed in direction " << i << " into fine blocks of size " << blockSizeInCells[i] );
+   }
+
+   uint_t levelScalingFactor = ( uint_t(1) << ( numberOfLevels - uint_t(1) ) );
+   Vector3<uint_t> numberOfCoarseBlocksPerDirection( numberOfFineBlocksPerDirection / levelScalingFactor );
+
+   for(uint_t i = 0; i < 3; ++i )
+   {
+      WALBERLA_CHECK_EQUAL(numberOfCoarseBlocksPerDirection[i] * levelScalingFactor, numberOfFineBlocksPerDirection[i],
+                            "Domain can not be refined in direction " << i << " according to the specified number of levels!" );
+   }
+
+
+   // refinement box is in the left lower corner of the domain
+   AABB refinementBox( domainAABB.xMin(), domainAABB.yMin(), domainAABB.zMin(),
+                       domainAABB.xMin()+real_t(1), domainAABB.yMin()+real_t(1), domainAABB.zMin()+real_t(1) );
+
+   WALBERLA_LOG_INFO_ON_ROOT(" - refinement box: " << refinementBox);
+
+   sforest.addRefinementSelectionFunction( boost::bind( refinementSelection, _1, numberOfLevels, refinementBox ) );
+   sforest.addWorkloadMemorySUIDAssignmentFunction( workloadAndMemoryAssignment );
+
+   sforest.init( domainAABB, numberOfCoarseBlocksPerDirection[0], numberOfCoarseBlocksPerDirection[1], numberOfCoarseBlocksPerDirection[2], false, false, false );
+
+   // calculate process distribution
+   const memory_t memoryLimit = math::Limits< memory_t >::inf();
+
+   sforest.balanceLoad( blockforest::StaticLevelwiseCurveBalance(true), uint_c( MPIManager::instance()->numProcesses() ), real_t(0), memoryLimit, true );
+
+   WALBERLA_LOG_INFO_ON_ROOT( sforest );
+
+   MPIManager::instance()->useWorldComm();
+
+   // create StructuredBlockForest (encapsulates a newly created BlockForest)
+   shared_ptr< StructuredBlockForest > sbf =
+         make_shared< StructuredBlockForest >( make_shared< BlockForest >( uint_c( MPIManager::instance()->rank() ), sforest, keepGlobalBlockInformation ),
+                                               blockSizeInCells[0], blockSizeInCells[1], blockSizeInCells[2]);
+   sbf->createCellBoundingBoxes();
+
+   return sbf;
+}
+
+/////////////////////////////////////
+// BOUNDARY HANDLING CUSTOMIZATION //
+/////////////////////////////////////
+class MyBoundaryHandling
+{
+public:
+
+   MyBoundaryHandling( const BlockDataID & flagFieldID, const BlockDataID & pdfFieldID, const BlockDataID & bodyFieldID ) :
+      flagFieldID_( flagFieldID ), pdfFieldID_( pdfFieldID ), bodyFieldID_ ( bodyFieldID ) {}
+
+   BoundaryHandling_T * operator()( IBlock* const block, const StructuredBlockStorage* const storage ) const;
+
+private:
+
+   const BlockDataID flagFieldID_;
+   const BlockDataID pdfFieldID_;
+   const BlockDataID bodyFieldID_;
+
+
+}; // class MyBoundaryHandling
+
+
+BoundaryHandling_T * MyBoundaryHandling::operator()( IBlock * const block, const StructuredBlockStorage * const storage ) const
+{
+   WALBERLA_ASSERT_NOT_NULLPTR( block );
+   WALBERLA_ASSERT_NOT_NULLPTR( storage );
+
+   FlagField_T * flagField       = block->getData< FlagField_T >( flagFieldID_ );
+   PdfField_T *  pdfField        = block->getData< PdfField_T > ( pdfFieldID_ );
+   BodyField_T * bodyField       = block->getData< BodyField_T >( bodyFieldID_ );
+
+   const auto fluid = flagField->flagExists( Fluid_Flag ) ? flagField->getFlag( Fluid_Flag ) : flagField->registerFlag( Fluid_Flag );
+
+   BoundaryHandling_T * handling = new BoundaryHandling_T( "moving obstacle boundary handling", flagField, fluid,
+         boost::tuples::make_tuple( MO_SBB_T( "MO_SBB", MO_SBB_Flag, pdfField, flagField, bodyField, fluid, *storage, *block ) ) );
+
+   // boundary conditions are set by mapping the (moving) planes into the domain
+
+   handling->fillWithDomain( FieldGhostLayers );
+
+   return handling;
+}
+//*******************************************************************************************************************
+
+
+
+//////////
+// MAIN //
+//////////
+
+//*******************************************************************************************************************
+/*!\brief Short test case that simulates a Lid driven cavity-like setup to check refinement near global bodies.
+ *
+ * The boundaries are set by mapping PE planes into the domain.
+ * The top plane is moving with a given velocity.
+ * Static grid refinement is applied in the left part of the domain, resulting in level boarders along the planes.
+ *
+ * This test case is used to check the applicability and correctness of the boundary conditions used in the
+ * momentum exchange method (currently SBB) for usage along global bodies with level boarders.
+ *
+ * The domain is on purpose very small ( 2x1x1 coarse blocks ), to facilitate debugging.
+ *
+ * Special care has to be taken when refinement boundaries are on the same PE body:
+ *
+ *  - the mapping has to be consistent on the coarse and the fine level,
+ *    i.e. a boundary cell on the coarse level has to be 8 boundary cells on the fine level.
+ *    This implies that only cell-aligned and axis-aligned PE bodies are allowed to have this refinement boundary.
+ *    All others (inclindes planes, cylinders, spheres) must have and maintain the same refinement level
+ *    throughout the entire simulation.
+ *
+ *  - boundary conditions that access PDF values from cells apart from the near-boundary cell
+ *    (usually higher order BCs like CLI) can usually not be used.
+ *    Communication patterns in LBM with refinement are complex (see Schornbaum, Ruede - "Massively Parallel Algorithms
+ *    for the Lattice Boltzmann Method on NonUniform Grids" (2016)). Thus accesses to ghost layer PDF values that
+ *    happen in those BCs are highly dangerous since those are often given NaN values.
+ *    Problems are here two-fold:
+ *     - boundary handling on fine levels is carried out in two of the four ghost layers. This is done tice on the
+ *       finer levels and in this second time, the third and fourth ghost layer feature NaN values only
+ *       (due to the swap during the streaming step).
+ *     - on coarse blocks, the ghost layer at the coarse-fine boundary does not contain valid PDF values since the
+ *       proceeding fine-to-coarse communication sets the PDF values directly inside the coarse block. They are
+ *       therefore also not usable for the BC.
+ *
+ * Due to these restrictions, it is currently only possible to use SimpleBB in those cases.
+ * Note, however, that this restriction drops if the a constant block level is maintained along a specific global body
+ * throughout the simulation.
+ *
+ * Run this test with debug functionality to switch on the NaN checks in the refinement functions.
+ *
+ */
+//*******************************************************************************************************************
+
+int main( int argc, char **argv )
+{
+   debug::enterTestMode();
+
+   mpi::Environment env( argc, argv );
+
+   ///////////////////
+   // Customization //
+   ///////////////////
+
+   //logging::Logging::instance()->setLogLevel(logging::Logging::DETAIL);
+
+   bool vtkIO = false;
+
+   for( int i = 1; i < argc; ++i )
+   {
+      if( std::strcmp( argv[i], "--vtkIO" ) == 0 ) { vtkIO = true; continue; }
+      WALBERLA_ABORT("Unrecognized command line argument found: " << argv[i]);
+   }
+
+   //////////////////////////
+   // NUMERICAL PARAMETERS //
+   //////////////////////////
+
+   Vector3<uint_t> domainSize( 32, 16, 16 );
+
+   const uint_t numberOfLevels = uint_t(2);
+   const real_t relaxationTime = real_t(1);
+   const real_t wallVelocity = real_t(0.01);
+
+   std::string baseFolder = "vtk_out";
+
+   ///////////////////////////
+   // BLOCK STRUCTURE SETUP //
+   ///////////////////////////
+
+   const uint_t finestLevel = numberOfLevels - uint_t(1);
+   const uint_t levelScalingFactor = ( uint_t(1) << finestLevel );
+
+   const uint_t timesteps = uint_t(5);
+
+   Vector3<uint_t> coarseBlocksPerDirection( 2, 1, 1 );
+   Vector3<uint_t> blockSizeInCells(domainSize[0] / ( coarseBlocksPerDirection[0] * levelScalingFactor ),
+                                    domainSize[1] / ( coarseBlocksPerDirection[1] * levelScalingFactor ),
+                                    domainSize[2] / ( coarseBlocksPerDirection[2] * levelScalingFactor ) );
+
+   AABB simulationDomain( real_t(0), real_t(0), real_t(0), real_c(domainSize[0]), real_c(domainSize[1]), real_c(domainSize[2]) );
+   auto blocks = createBlockStructure( simulationDomain, blockSizeInCells, numberOfLevels );
+
+   //write domain decomposition to file
+   if( vtkIO )
+   {
+      vtk::writeDomainDecomposition( blocks, "domain_decomposition", baseFolder );
+   }
+
+
+   /////////////////
+   // PE COUPLING //
+   /////////////////
+
+   // set up pe functionality
+   shared_ptr<pe::BodyStorage>  globalBodyStorage = make_shared<pe::BodyStorage>();
+   pe::SetBodyTypeIDs<BodyTypeTuple>::execute();
+
+   // create pe bodies
+
+   // bounding planes (global)
+   const auto planeMaterial = pe::createMaterial( "myPlaneMat", real_t(8920), real_t(0), real_t(1), real_t(1), real_t(0), real_t(1), real_t(1), real_t(0), real_t(0) );
+
+   // planes in E and W direction
+   pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(1,0,0), Vector3<real_t>(0,0,0), planeMaterial );
+   pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(-1,0,0), Vector3<real_t>(real_c(domainSize[0]),0,0), planeMaterial );
+
+   // planes in S and N direction
+   pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,1,0), Vector3<real_t>(0,0,0), planeMaterial );
+   pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,-1,0), Vector3<real_t>(0,real_c(domainSize[1]),0), planeMaterial );
+
+   // planes in B and T direction
+   pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,0,1), Vector3<real_t>(0,0,0), planeMaterial );
+   auto topPlane = pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,0,-1), Vector3<real_t>(0,0,real_c(domainSize[2])), planeMaterial );
+   topPlane->setLinearVel(wallVelocity, real_t(0), real_t(0));
+
+   ///////////////////////
+   // ADD DATA TO BLOCKS //
+   ////////////////////////
+
+   // create the lattice model
+   LatticeModel_T latticeModel = LatticeModel_T( lbm::collision_model::TRT::constructWithMagicNumber( real_t(1) / relaxationTime, lbm::collision_model::TRT::threeSixteenth, finestLevel ) );
+
+   // add PDF field
+   BlockDataID pdfFieldID = lbm::addPdfFieldToStorage< LatticeModel_T >( blocks, "pdf field (zyxf)", latticeModel,
+                                                                         Vector3< real_t >( real_t(0) ), real_t(1),
+                                                                         FieldGhostLayers, field::zyxf );
+
+   // add flag field
+   BlockDataID flagFieldID = field::addFlagFieldToStorage<FlagField_T>( blocks, "flag field", FieldGhostLayers );
+
+   // add body field
+   BlockDataID bodyFieldID = field::addToStorage<BodyField_T>( blocks, "body field", NULL, field::zyxf, FieldGhostLayers );
+
+   // add boundary handling
+   BlockDataID boundaryHandlingID = blocks->addStructuredBlockData< BoundaryHandling_T >( MyBoundaryHandling( flagFieldID, pdfFieldID, bodyFieldID ), "boundary handling" );
+
+   // map planes into the LBM simulation -> act as no-slip boundaries
+   pe_coupling::mapMovingGlobalBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, *globalBodyStorage, bodyFieldID, MO_SBB_Flag );
+
+
+   ///////////////
+   // TIME LOOP //
+   ///////////////
+
+   // setup of the LBM communication for synchronizing the pdf field between neighboring blocks
+   boost::function< void () > commFunction;
+   blockforest::communication::UniformBufferedScheme< Stencil_T > scheme( blocks );
+   scheme.addPackInfo( make_shared< lbm::PdfFieldPackInfo< LatticeModel_T > >( pdfFieldID ) );
+   commFunction = scheme;
+
+   // create the timeloop
+   SweepTimeloop timeloop( blocks->getBlockStorage(), timesteps );
+
+   auto sweep = lbm::makeCellwiseSweep< LatticeModel_T, FlagField_T >( pdfFieldID, flagFieldID, Fluid_Flag );
+
+   auto refinementTimestep = lbm::refinement::makeTimeStep< LatticeModel_T, BoundaryHandling_T >( blocks, sweep, pdfFieldID, boundaryHandlingID );
+
+   if( vtkIO )
+   {
+      // flag field (written only once in the first time step, ghost layers are also written, written for each block separately)
+      auto flagFieldVTK = vtk::createVTKOutput_BlockData(blocks, "flag_field", uint_t(1), FieldGhostLayers, false,
+                                                         baseFolder, "simulation_step", false, true, true, false);
+      flagFieldVTK->addCellDataWriter(make_shared<field::VTKWriter<FlagField_T> >(flagFieldID, "FlagField"));
+      timeloop.addFuncBeforeTimeStep(vtk::writeFiles(flagFieldVTK), "VTK (flag field data)");
+
+      // pdf field
+      auto pdfFieldVTKfine = vtk::createVTKOutput_BlockData(blocks, "fluid_field_fine_steps", uint_t(1), FieldGhostLayers, false,
+                                                            baseFolder, "simulation_step", false, true, true, false);
+
+      pdfFieldVTKfine->addCellDataWriter( make_shared< field::VTKWriter<PdfField_T> >(pdfFieldID, "PdfField"));
+      pdfFieldVTKfine->addCellDataWriter( make_shared< lbm::VelocityVTKWriter< LatticeModel_T, float > >( pdfFieldID, "VelocityFromPDF" ) );
+      pdfFieldVTKfine->addCellDataWriter( make_shared< lbm::DensityVTKWriter < LatticeModel_T, float > >( pdfFieldID, "DensityFromPDF" ) );
+
+      refinementTimestep->addPostCollideVoidFunction( lbm::refinement::FunctorWrapper(vtk::writeFiles( pdfFieldVTKfine )), "VTK (fluid field data post collide)", finestLevel);
+      refinementTimestep->addPostBoundaryHandlingVoidFunction( lbm::refinement::FunctorWrapper(vtk::writeFiles( pdfFieldVTKfine )), "VTK (fluid field data post bh)", finestLevel);
+      refinementTimestep->addPostStreamVoidFunction( lbm::refinement::FunctorWrapper(vtk::writeFiles( pdfFieldVTKfine )), "VTK (fluid field data post stream)", finestLevel);
+
+
+      // pdf field
+      auto pdfFieldVTKcoarse = vtk::createVTKOutput_BlockData(blocks, "fluid_field_coarse_steps", uint_t(1), FieldGhostLayers, false,
+                                                              baseFolder, "simulation_step", false, true, true, false);
+
+      pdfFieldVTKcoarse->addCellDataWriter( make_shared< field::VTKWriter<PdfField_T> >(pdfFieldID, "PdfField"));
+      pdfFieldVTKcoarse->addCellDataWriter( make_shared< lbm::VelocityVTKWriter< LatticeModel_T, float > >( pdfFieldID, "VelocityFromPDF" ) );
+      pdfFieldVTKcoarse->addCellDataWriter( make_shared< lbm::DensityVTKWriter < LatticeModel_T, float > >( pdfFieldID, "DensityFromPDF" ) );
+
+      uint_t coarseLevel = uint_t(0);
+      refinementTimestep->addPostCollideVoidFunction( lbm::refinement::FunctorWrapper(vtk::writeFiles( pdfFieldVTKcoarse )), "VTK (fluid field data collide stream)", coarseLevel);
+      refinementTimestep->addPostBoundaryHandlingVoidFunction( lbm::refinement::FunctorWrapper(vtk::writeFiles( pdfFieldVTKcoarse )), "VTK (fluid field data post bh)", coarseLevel);
+      refinementTimestep->addPostStreamVoidFunction( lbm::refinement::FunctorWrapper(vtk::writeFiles( pdfFieldVTKcoarse )), "VTK (fluid field data post stream)", coarseLevel);
+
+   }
+
+   // add LBM sweep with refinement
+   timeloop.addFuncBeforeTimeStep( makeSharedFunctor( refinementTimestep ), "LBM refinement time step" );
+
+   ////////////////////////
+   // EXECUTE SIMULATION //
+   ////////////////////////
+
+   WcTimingPool timeloopTiming;
+   timeloop.run(timeloopTiming);
+   timeloopTiming.logResultOnRoot();
+
+   return EXIT_SUCCESS;
+}
+
+} // namespace global_body_as_boundary_mem_static_refinement
+
+int main( int argc, char **argv ){
+   global_body_as_boundary_mem_static_refinement::main(argc, argv);
+}
diff --git a/tests/pe_coupling/momentum_exchange_method/LubricationCorrectionMEM.cpp b/tests/pe_coupling/momentum_exchange_method/LubricationCorrectionMEM.cpp
index 89068030d31fc0fe2dd5bbd50302ece0b482c5ee..62a3cfd2c9b0aaa8aba1fab8b3b99dfdee1465c8 100644
--- a/tests/pe_coupling/momentum_exchange_method/LubricationCorrectionMEM.cpp
+++ b/tests/pe_coupling/momentum_exchange_method/LubricationCorrectionMEM.cpp
@@ -13,7 +13,7 @@
 //  You should have received a copy of the GNU General Public License along
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file LubricationCorrectionMEMPe.cpp
+//! \file LubricationCorrectionMEM.cpp
 //! \ingroup pe_coupling
 //! \author Kristina Pickl <kristina.pickl@fau.de>
 //! \author Dominik Bartuschat
@@ -58,7 +58,7 @@
 #include "pe_coupling/utility/all.h"
 
 
-namespace lubrication_correction_mem_pe
+namespace lubrication_correction_mem
 {
 
 ///////////
@@ -127,7 +127,7 @@ class EvaluateLubricationForce
 public:
    EvaluateLubricationForce( const shared_ptr< StructuredBlockStorage > & blocks,
                              const BlockDataID & bodyStorageID,
-                             pe::BodyStorage & globalBodyStorage,
+                             const shared_ptr<pe::BodyStorage> & globalBodyStorage,
                              uint_t id1, uint_t id2, pe::Vec3 vel, real_t nu_L, real_t radius,
                              SweepTimeloop* timeloop, bool print, bool sphSphTest, bool sphWallTest )
    : blocks_( blocks ), bodyStorageID_( bodyStorageID ), globalBodyStorage_( globalBodyStorage ),
@@ -286,7 +286,7 @@ private:
             pe::SphereID sphereI = ( *curSphereIt );
             if ( sphereI->getID() == id1_ )
             {
-               for( auto globalBodyIt = globalBodyStorage_.begin(); globalBodyIt != globalBodyStorage_.end(); ++globalBodyIt)
+               for( auto globalBodyIt = globalBodyStorage_->begin(); globalBodyIt != globalBodyStorage_->end(); ++globalBodyIt)
                {
                   if( globalBodyIt->getID() == id2_ )
                   {
@@ -368,7 +368,7 @@ private:
 
    shared_ptr< StructuredBlockStorage > blocks_;
    const BlockDataID bodyStorageID_;
-   pe::BodyStorage & globalBodyStorage_;
+   shared_ptr<pe::BodyStorage> globalBodyStorage_;
 
    uint_t         id1_;
    uint_t         id2_;
@@ -922,7 +922,7 @@ int main( int argc, char **argv )
    }
 
    // map pe bodies into the LBM simulation
-   pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MO_Flag );
+   pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MO_Flag );
 
    ///////////////
    // TIME LOOP //
@@ -970,7 +970,7 @@ int main( int argc, char **argv )
    timeloop.addFuncAfterTimeStep( pe_coupling::LubricationCorrection( blocks, globalBodyStorage, bodyStorageID, nu_L ), "Lubrication Force" );
 
    // perform lubrication evaluation
-   timeloop.addFuncAfterTimeStep( EvaluateLubricationForce( blocks, bodyStorageID, *globalBodyStorage, id1, id2, velocity,
+   timeloop.addFuncAfterTimeStep( EvaluateLubricationForce( blocks, bodyStorageID, globalBodyStorage, id1, id2, velocity,
                                                             nu_L, radius, &timeloop, fileIO, sphSphTest, sphWallTest ), "Evaluate Lubrication Force" );
 
    // reset the forces and apply a constant velocity
@@ -992,10 +992,10 @@ int main( int argc, char **argv )
 
    return 0;
 }
-} //namespace lubrication_correction_mem_pe
+} //namespace lubrication_correction_mem
 
 int main( int argc, char **argv ){
-   lubrication_correction_mem_pe::main(argc, argv);
+   lubrication_correction_mem::main(argc, argv);
 }
 
 
diff --git a/tests/pe_coupling/momentum_exchange_method/PeriodicParticleChannelMEM.cpp b/tests/pe_coupling/momentum_exchange_method/PeriodicParticleChannelMEM.cpp
index ba33e553f3dd9aec26dbda0701bde1db84e1033b..2e351fa7930d88f9c958c03b3c9dbdc375f5fd2b 100644
--- a/tests/pe_coupling/momentum_exchange_method/PeriodicParticleChannelMEM.cpp
+++ b/tests/pe_coupling/momentum_exchange_method/PeriodicParticleChannelMEM.cpp
@@ -502,10 +502,11 @@ int main( int argc, char **argv )
    // special care has to be taken here that only the global spheres (not the planes) are mapped since the planes would overwrite the already set boundary flags
    for( auto globalBodyIt = globalBodiesToBeMapped.begin(); globalBodyIt != globalBodiesToBeMapped.end(); ++globalBodyIt )
    {
-      pe_coupling::mapGlobalBody< BoundaryHandling_T >( *globalBodyIt, blocks, boundaryHandlingID, *globalBodyStorage, NoSlip_Flag, false );
+      pe_coupling::mapGlobalBody< BoundaryHandling_T >( *globalBodyIt, *blocks, boundaryHandlingID, *globalBodyStorage, NoSlip_Flag );
    }
+
    // moving bodies are handled by the momentum exchange method
-   pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MO_Flag );
+   pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MO_Flag );
 
    ///////////////
    // TIME LOOP //
diff --git a/tests/pe_coupling/momentum_exchange_method/SegreSilberbergMEM.cpp b/tests/pe_coupling/momentum_exchange_method/SegreSilberbergMEM.cpp
index 3bff481c7b2f27b128219a193d0874c63d30b895..055c68b0de9d8242d6eac6e3cff89f739958bffe 100644
--- a/tests/pe_coupling/momentum_exchange_method/SegreSilberbergMEM.cpp
+++ b/tests/pe_coupling/momentum_exchange_method/SegreSilberbergMEM.cpp
@@ -13,7 +13,7 @@
 //  You should have received a copy of the GNU General Public License along
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file SegreSilberbergMEMPe.cpp
+//! \file SegreSilberbergMEM.cpp
 //! \ingroup pe_coupling
 //! \author Christoph Rettinger <christoph.rettinger@fau.de>
 //
@@ -65,7 +65,7 @@
 #include "field/vtk/all.h"
 #include "lbm/vtk/all.h"
 
-namespace segre_silberberg_mem_pe
+namespace segre_silberberg_mem
 {
 
 ///////////
@@ -609,13 +609,13 @@ int main( int argc, char **argv )
    if( MO_CLI )
    {
       // uses a higher order boundary condition (CLI)
-      pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MO_CLI_Flag );
+      pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MO_CLI_Flag );
    }else if ( MO_MR ){
       // uses a higher order boundary condition (MR)
-      pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,  MO_MR_Flag );
+      pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,  MO_MR_Flag );
    }else{
       // uses standard bounce back boundary conditions
-      pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,  MO_BB_Flag );
+      pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,  MO_BB_Flag );
    }
 
    ///////////////
@@ -820,8 +820,8 @@ int main( int argc, char **argv )
    return EXIT_SUCCESS;
 }
 
-} // namespace segre_silberberg_mem_pe
+} // namespace segre_silberberg_mem
 
 int main( int argc, char **argv ){
-   segre_silberberg_mem_pe::main(argc, argv);
+   segre_silberberg_mem::main(argc, argv);
 }
diff --git a/tests/pe_coupling/momentum_exchange_method/SquirmerTest.cpp b/tests/pe_coupling/momentum_exchange_method/SquirmerTest.cpp
index f0c248c1419f2ebaf1a4f2ae33736273dbae9509..5680cb5ab6956997d213badcca323c6004ca0fca 100644
--- a/tests/pe_coupling/momentum_exchange_method/SquirmerTest.cpp
+++ b/tests/pe_coupling/momentum_exchange_method/SquirmerTest.cpp
@@ -66,6 +66,11 @@
 #include "lbm/vtk/Velocity.h"
 #include "vtk/VTKOutput.h"
 
+
+namespace squirmer
+{
+
+
 ///////////
 // USING //
 ///////////
@@ -75,45 +80,44 @@ using walberla::uint_t;
 
 // PDF field, flag field & body field
 typedef lbm::force_model::None ForceModel_T;
-typedef lbm::D3Q19< lbm::collision_model::TRT, false, ForceModel_T >  LatticeModel_T;
+typedef lbm::D3Q19<lbm::collision_model::TRT, false, ForceModel_T> LatticeModel_T;
 
-typedef LatticeModel_T::Stencil                         Stencil_T;
-typedef lbm::PdfField< LatticeModel_T >                 PdfField_T;
+typedef LatticeModel_T::Stencil Stencil_T;
+typedef lbm::PdfField<LatticeModel_T> PdfField_T;
 
-typedef walberla::uint8_t                 flag_t;
-typedef FlagField< flag_t >               FlagField_T;
-typedef GhostLayerField< pe::BodyID, 1 >  PeBodyField_T;
+typedef walberla::uint8_t flag_t;
+typedef FlagField<flag_t> FlagField_T;
+typedef GhostLayerField<pe::BodyID, 1> PeBodyField_T;
 
 const uint_t FieldGhostLayers = 1;
 
 // boundary handling
-typedef pe_coupling::SimpleBB       < LatticeModel_T, FlagField_T >  MO_BB_T;
+typedef pe_coupling::SimpleBB<LatticeModel_T, FlagField_T> MO_BB_T;
 
-typedef boost::tuples::tuple< MO_BB_T >               BoundaryConditions_T;
-typedef BoundaryHandling< FlagField_T, Stencil_T, BoundaryConditions_T > BoundaryHandling_T;
+typedef boost::tuples::tuple<MO_BB_T> BoundaryConditions_T;
+typedef BoundaryHandling<FlagField_T, Stencil_T, BoundaryConditions_T> BoundaryHandling_T;
 
-typedef boost::tuple<pe::Squirmer> BodyTypeTuple ;
+typedef boost::tuple<pe::Squirmer> BodyTypeTuple;
 
 ///////////
 // FLAGS //
 ///////////
 
-const FlagUID Fluid_Flag   ( "fluid" );
-const FlagUID MO_BB_Flag   ( "moving obstacle BB" );
-const FlagUID FormerMO_BB_Flag   ( "former moving obstacle BB" );
+const FlagUID Fluid_Flag("fluid");
+const FlagUID MO_BB_Flag("moving obstacle BB");
+const FlagUID FormerMO_BB_Flag("former moving obstacle BB");
 
 /////////////////////////////////////
 // BOUNDARY HANDLING CUSTOMIZATION //
 /////////////////////////////////////
 
-class MyBoundaryHandling
-{
+class MyBoundaryHandling {
 public:
 
-   MyBoundaryHandling( const BlockDataID & flagFieldID, const BlockDataID & pdfFieldID, const BlockDataID & bodyFieldID ) :
-      flagFieldID_( flagFieldID ), pdfFieldID_( pdfFieldID ), bodyFieldID_ ( bodyFieldID ) {}
+   MyBoundaryHandling(const BlockDataID &flagFieldID, const BlockDataID &pdfFieldID, const BlockDataID &bodyFieldID) :
+         flagFieldID_(flagFieldID), pdfFieldID_(pdfFieldID), bodyFieldID_(bodyFieldID) {}
 
-   BoundaryHandling_T * operator()( IBlock* const block, const StructuredBlockStorage* const storage ) const;
+   BoundaryHandling_T *operator()(IBlock *const block, const StructuredBlockStorage *const storage) const;
 
 private:
 
@@ -127,141 +131,139 @@ private:
 // VTK OUTPUT HELPER FUNCTION //
 ////////////////////////////////
 
-BoundaryHandling_T * MyBoundaryHandling::operator()( IBlock * const block, const StructuredBlockStorage * const storage ) const
-{
-   WALBERLA_ASSERT_NOT_NULLPTR( block );
-   WALBERLA_ASSERT_NOT_NULLPTR( storage );
+BoundaryHandling_T *
+MyBoundaryHandling::operator()(IBlock *const block, const StructuredBlockStorage *const storage) const {
+   WALBERLA_ASSERT_NOT_NULLPTR(block);
+   WALBERLA_ASSERT_NOT_NULLPTR(storage);
 
-   FlagField_T * flagField       = block->getData< FlagField_T >( flagFieldID_ );
-   PdfField_T *  pdfField        = block->getData< PdfField_T > ( pdfFieldID_ );
-   PeBodyField_T * bodyField     = block->getData< PeBodyField_T >( bodyFieldID_ );
+   FlagField_T *flagField = block->getData<FlagField_T>(flagFieldID_);
+   PdfField_T *pdfField = block->getData<PdfField_T>(pdfFieldID_);
+   PeBodyField_T *bodyField = block->getData<PeBodyField_T>(bodyFieldID_);
 
-   const auto fluid = flagField->flagExists( Fluid_Flag ) ? flagField->getFlag( Fluid_Flag ) : flagField->registerFlag( Fluid_Flag );
+   const auto fluid = flagField->flagExists(Fluid_Flag) ? flagField->getFlag(Fluid_Flag) : flagField->registerFlag(
+         Fluid_Flag);
 
-   BoundaryHandling_T * handling = new BoundaryHandling_T( "fixed obstacle boundary handling", flagField, fluid,
-                                                           boost::tuples::make_tuple( MO_BB_T (  "MO_BB",  MO_BB_Flag, pdfField, flagField, bodyField, fluid, *storage, *block ) ) );
+   BoundaryHandling_T *handling = new BoundaryHandling_T("fixed obstacle boundary handling", flagField, fluid,
+                                                         boost::tuples::make_tuple(
+                                                               MO_BB_T("MO_BB", MO_BB_Flag, pdfField, flagField,
+                                                                       bodyField, fluid, *storage, *block)));
 
-   handling->fillWithDomain( FieldGhostLayers );
+   handling->fillWithDomain(FieldGhostLayers);
 
    return handling;
 }
 
-shared_ptr< vtk::VTKOutput> createFluidFieldVTKWriter( shared_ptr< StructuredBlockForest > & blocks,
-                                                       const BlockDataID & pdfFieldId, const BlockDataID & flagFieldId,
-                                                       std::string fname )
-{
-   auto pdfFieldVTKWriter = vtk::createVTKOutput_BlockData( blocks, fname, uint_c(1), uint_c(0) );
+shared_ptr<vtk::VTKOutput> createFluidFieldVTKWriter(shared_ptr<StructuredBlockForest> &blocks,
+                                                     const BlockDataID &pdfFieldId, const BlockDataID &flagFieldId,
+                                                     std::string fname) {
+   auto pdfFieldVTKWriter = vtk::createVTKOutput_BlockData(blocks, fname, uint_c(1), uint_c(0));
 
-   field::FlagFieldCellFilter< FlagField_T > fluidFilter( flagFieldId );
-   fluidFilter.addFlag( Fluid_Flag );
-   pdfFieldVTKWriter->addCellInclusionFilter( fluidFilter );
+   field::FlagFieldCellFilter<FlagField_T> fluidFilter(flagFieldId);
+   fluidFilter.addFlag(Fluid_Flag);
+   pdfFieldVTKWriter->addCellInclusionFilter(fluidFilter);
 
-   auto velocityWriter = make_shared< lbm::VelocityVTKWriter< LatticeModel_T, float > >( pdfFieldId, "VelocityFromPDF" );
-   pdfFieldVTKWriter->addCellDataWriter( velocityWriter );
+   auto velocityWriter = make_shared<lbm::VelocityVTKWriter<LatticeModel_T, float> >(pdfFieldId, "VelocityFromPDF");
+   pdfFieldVTKWriter->addCellDataWriter(velocityWriter);
 
    return pdfFieldVTKWriter;
 }
 
-class ResetPosition
-{
+class ResetPosition {
 public:
-   
-   explicit ResetPosition( const shared_ptr<StructuredBlockStorage> & blockStorage,
-                           const BlockDataID & bodyStorageID,
-                           real_t dt )
-   : blockStorage_( blockStorage )
-   , bodyStorageID_( bodyStorageID )
-   , dt_( dt )
-   {}
-   
-   void operator()()
-   {
-      for (auto blockIt = blockStorage_->begin(); blockIt != blockStorage_->end(); ++blockIt)
-      {
-         for( auto bodyIt = pe::BodyIterator::begin(*blockIt, bodyStorageID_); bodyIt != pe::BodyIterator::end(); ++bodyIt )
-         {
-            bodyIt->setPosition( bodyIt->getPosition() - bodyIt->getLinearVel() * dt_ );
+
+   explicit ResetPosition(const shared_ptr<StructuredBlockStorage> &blockStorage,
+                          const BlockDataID &bodyStorageID,
+                          real_t dt)
+         : blockStorage_(blockStorage), bodyStorageID_(bodyStorageID), dt_(dt) {}
+
+   void operator()() {
+      for (auto blockIt = blockStorage_->begin(); blockIt != blockStorage_->end(); ++blockIt) {
+         for (auto bodyIt = pe::BodyIterator::begin(*blockIt, bodyStorageID_);
+              bodyIt != pe::BodyIterator::end(); ++bodyIt) {
+            bodyIt->setPosition(bodyIt->getPosition() - bodyIt->getLinearVel() * dt_);
          }
       }
    }
-   
+
 private:
-   
+
    shared_ptr<StructuredBlockStorage> blockStorage_;
-   const BlockDataID &  bodyStorageID_;
-   
+   const BlockDataID &bodyStorageID_;
+
    real_t dt_;
-   
+
 }; // class ResetPosition
 
 
-Vector3<real_t> analytical_velocity(Vector3<real_t> r, real_t b1, real_t b2, Vector3<real_t> e, real_t radius)
-{
+Vector3<real_t> analytical_velocity(Vector3<real_t> r, real_t b1, real_t b2, Vector3<real_t> e, real_t radius) {
    auto r_len = r.length();
    auto rs = r.getNormalized();
-   auto frac = radius/r_len;
-   auto frac2 = frac*frac;
-   auto frac3 = frac2*frac;
-   auto frac4 = frac3*frac;
+   auto frac = radius / r_len;
+   auto frac2 = frac * frac;
+   auto frac3 = frac2 * frac;
+   auto frac4 = frac3 * frac;
 
-   return b1 * frac3 * ((e*rs) * rs - e / real_c(3.0)) + ( frac4 - frac2 ) * b2 * 0.5 * ( 3 * (e * rs) * (e * rs) - 1 ) * rs + frac4 * b2 * (e*rs) * ( (e*rs) * rs - e);
+   return b1 * frac3 * ((e * rs) * rs - e / real_c(3.0)) +
+          (frac4 - frac2) * b2 * 0.5 * (3 * (e * rs) * (e * rs) - 1) * rs + frac4 * b2 * (e * rs) * ((e * rs) * rs - e);
 }
 
 
-int main( int argc, char **argv )
-{
+int main(int argc, char **argv) {
    debug::enterTestMode();
 
-   mpi::Environment env( argc, argv );
+   mpi::Environment env(argc, argv);
 
    auto processes = MPIManager::instance()->numProcesses();
-   
-   if( processes != 1 && processes != 2 && processes != 4 && processes != 8)
-   {
+
+   if (processes != 1 && processes != 2 && processes != 4 && processes != 8) {
       std::cerr << "Number of processes must be equal to either 1, 2, 4, or 8!" << std::endl;
       return EXIT_FAILURE;
    }
-   
+
    bool shortrun = false;
    bool writevtk = false;
-   for( int i = 1; i < argc; ++i )
-   {
-      if( std::strcmp( argv[i], "--shortrun" ) == 0 ) { shortrun       = true; continue; }
-      if( std::strcmp( argv[i], "--vtk" )      == 0 ) { writevtk       = true; continue; }
+   for (int i = 1; i < argc; ++i) {
+      if (std::strcmp(argv[i], "--shortrun") == 0) {
+         shortrun = true;
+         continue;
+      }
+      if (std::strcmp(argv[i], "--vtk") == 0) {
+         writevtk = true;
+         continue;
+      }
    }
-   
+
    ///////////////////////////
    // SIMULATION PROPERTIES //
    ///////////////////////////
-   
+
    real_t R = 3;
    uint_t L = 24;
    uint_t T = 1000;
-   if (shortrun)
-   {
-       L = 10;
-       T = 10;
+   if (shortrun) {
+      L = 10;
+      T = 10;
    }
    const real_t dx = real_c(1.0);
    const real_t dt = real_c(1.0);
-   const real_t omega = lbm::collision_model::omegaFromViscosity( real_c(1.0/6.0) );
+   const real_t omega = lbm::collision_model::omegaFromViscosity(real_c(1.0 / 6.0));
    const real_t v0 = real_c(0.01);
    const real_t beta = real_c(5.0);
-   
+
    ///////////////////////////
    // BLOCK STRUCTURE SETUP //
    ///////////////////////////
-   const uint_t XBlocks = (processes >= 2) ? uint_t( 2 ) : uint_t( 1 );
-   const uint_t YBlocks = (processes >= 4) ? uint_t( 2 ) : uint_t( 1 );
-   const uint_t ZBlocks = (processes == 8) ? uint_t( 2 ) : uint_t( 1 );
+   const uint_t XBlocks = (processes >= 2) ? uint_t(2) : uint_t(1);
+   const uint_t YBlocks = (processes >= 4) ? uint_t(2) : uint_t(1);
+   const uint_t ZBlocks = (processes == 8) ? uint_t(2) : uint_t(1);
    const uint_t XCells = L / XBlocks;
    const uint_t YCells = L / YBlocks;
    const uint_t ZCells = L / ZBlocks;
 
    // create fully periodic domain
-   auto blocks = blockforest::createUniformBlockGrid( XBlocks, YBlocks, ZBlocks, XCells, YCells, ZCells, dx, true,
-                                                      true, true, true );
-   
+   auto blocks = blockforest::createUniformBlockGrid(XBlocks, YBlocks, ZBlocks, XCells, YCells, ZCells, dx, true,
+                                                     true, true, true);
+
    ////////
    // PE //
    ////////
@@ -269,35 +271,39 @@ int main( int argc, char **argv )
    auto globalBodyStorage = make_shared<pe::BodyStorage>();
    pe::SetBodyTypeIDs<BodyTypeTuple>::execute();
    auto bodyStorageID = blocks->addBlockData(pe::createStorageDataHandling<BodyTypeTuple>(), "pe Body Storage");
-   auto ccdID         = blocks->addBlockData(pe::ccd::createHashGridsDataHandling( globalBodyStorage, bodyStorageID ), "CCD");
-   auto fcdID         = blocks->addBlockData(pe::fcd::createGenericFCDDataHandling<BodyTypeTuple, pe::fcd::AnalyticCollideFunctor>(), "FCD");
-   
+   auto ccdID = blocks->addBlockData(pe::ccd::createHashGridsDataHandling(globalBodyStorage, bodyStorageID), "CCD");
+   auto fcdID = blocks->addBlockData(
+         pe::fcd::createGenericFCDDataHandling<BodyTypeTuple, pe::fcd::AnalyticCollideFunctor>(), "FCD");
+
    pe::cr::HCSITS cr(globalBodyStorage, blocks->getBlockStoragePointer(), bodyStorageID, ccdID, fcdID);
-   
+
    /////////////////
    // PE COUPLING //
    /////////////////
 
    // connect to pe
-   const real_t overlap = real_c( 1.5 ) * dx;
+   const real_t overlap = real_c(1.5) * dx;
 
-   if( R > real_c( L ) * real_c(0.5) - overlap )
-   {
+   if (R > real_c(L) * real_c(0.5) - overlap) {
       std::cerr << "Periodic sphere is too large and would lead to invalid PE state!" << std::endl;
       return EXIT_FAILURE;
    }
-   
-   boost::function<void(void)> syncCall = boost::bind( pe::syncShadowOwners<BodyTypeTuple>, boost::ref(blocks->getBlockForest()), bodyStorageID, static_cast<WcTimingTree*>(NULL), overlap, false );
-   
-   const auto myMat = pe::createMaterial( "myMat", real_c(1), real_t(0), real_t(1), real_t(1), real_t(0), real_t(1), real_t(1), real_t(0), real_t(0) );
+
+   boost::function<void(void)> syncCall = boost::bind(pe::syncShadowOwners<BodyTypeTuple>,
+                                                      boost::ref(blocks->getBlockForest()), bodyStorageID,
+                                                      static_cast<WcTimingTree *>(NULL), overlap, false);
+
+   const auto myMat = pe::createMaterial("myMat", real_c(1), real_t(0), real_t(1), real_t(1), real_t(0), real_t(1),
+                                         real_t(1), real_t(0), real_t(0));
 
    // create the squirmer in the middle of the domain
-   const Vector3<real_t> position (real_c(L)*real_c(0.5), real_c(L)*real_c(0.5), real_c(L)*real_c(0.5));
+   const Vector3<real_t> position(real_c(L) * real_c(0.5), real_c(L) * real_c(0.5), real_c(L) * real_c(0.5));
    const Vector3<real_t> up(0.0, 0.0, 1.0);
    const Vector3<real_t> orientation(1.0, 0.0, 0.0);
-   const auto w = std::acos(up*orientation);
+   const auto w = std::acos(up * orientation);
    math::Quaternion<real_t> q(up % orientation, w);
-   auto squirmer = pe::createSquirmer(*globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, position, R, v0, beta, myMat);
+   auto squirmer = pe::createSquirmer(*globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, position, R, v0,
+                                      beta, myMat);
    if (squirmer)
       squirmer->setOrientation(q);
 
@@ -308,88 +314,96 @@ int main( int argc, char **argv )
    ////////////////////////
 
    // create the lattice model
-   LatticeModel_T latticeModel = LatticeModel_T( lbm::collision_model::TRT::constructWithMagicNumber( omega ) );
+   LatticeModel_T latticeModel = LatticeModel_T(lbm::collision_model::TRT::constructWithMagicNumber(omega));
 
    // add PDF field ( uInit = <0,0,0>, rhoInit = 1 )
-   BlockDataID pdfFieldID = lbm::addPdfFieldToStorage< LatticeModel_T >( blocks, "pdf field (zyxf)", latticeModel, FieldGhostLayers );
+   BlockDataID pdfFieldID = lbm::addPdfFieldToStorage<LatticeModel_T>(blocks, "pdf field (zyxf)", latticeModel,
+                                                                      FieldGhostLayers);
 
    // add flag field
-   BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >( blocks, "flag field" );
+   BlockDataID flagFieldID = field::addFlagFieldToStorage<FlagField_T>(blocks, "flag field");
 
    // add body field
-   BlockDataID bodyFieldID = field::addToStorage< PeBodyField_T >( blocks, "body field", NULL, field::zyxf );
+   BlockDataID bodyFieldID = field::addToStorage<PeBodyField_T>(blocks, "body field", NULL, field::zyxf);
 
    // add boundary handling & initialize outer domain boundaries
-   BlockDataID boundaryHandlingID = blocks->addStructuredBlockData< BoundaryHandling_T >(
-                                    MyBoundaryHandling( flagFieldID, pdfFieldID, bodyFieldID ), "boundary handling" );
+   BlockDataID boundaryHandlingID = blocks->addStructuredBlockData<BoundaryHandling_T>(
+         MyBoundaryHandling(flagFieldID, pdfFieldID, bodyFieldID), "boundary handling");
 
    ///////////////
    // TIME LOOP //
    ///////////////
 
    // create the timeloop
-   SweepTimeloop timeloop( blocks->getBlockStorage(), T );
-   
+   SweepTimeloop timeloop(blocks->getBlockStorage(), T);
+
    // sweep for updating the pe body mapping into the LBM simulation
    timeloop.add()
-   << Sweep( pe_coupling::BodyMapping< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MO_BB_Flag, FormerMO_BB_Flag ),
-            "Body Mapping" );
-   
+         << Sweep(pe_coupling::BodyMapping<BoundaryHandling_T>(blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,
+                                                               MO_BB_Flag, FormerMO_BB_Flag),
+                  "Body Mapping");
+
    // sweep for restoring PDFs in cells previously occupied by pe bodies
-   typedef pe_coupling::EquilibriumReconstructor< LatticeModel_T, BoundaryHandling_T > Reconstructor_T;
-   Reconstructor_T reconstructor( blocks, boundaryHandlingID, pdfFieldID, bodyFieldID );
+   typedef pe_coupling::EquilibriumReconstructor<LatticeModel_T, BoundaryHandling_T> Reconstructor_T;
+   Reconstructor_T reconstructor(blocks, boundaryHandlingID, pdfFieldID, bodyFieldID);
    timeloop.add()
-   << Sweep( pe_coupling::PDFReconstruction< LatticeModel_T, BoundaryHandling_T, Reconstructor_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,
-                                                                                                   reconstructor, FormerMO_BB_Flag, Fluid_Flag  ), "PDF Restore" );
+         << Sweep(pe_coupling::PDFReconstruction<LatticeModel_T, BoundaryHandling_T, Reconstructor_T>(blocks,
+                                                                                                      boundaryHandlingID,
+                                                                                                      bodyStorageID,
+                                                                                                      bodyFieldID,
+                                                                                                      reconstructor,
+                                                                                                      FormerMO_BB_Flag,
+                                                                                                      Fluid_Flag),
+                  "PDF Restore");
 
    // setup of the LBM communication for synchronizing the pdf field between neighboring blocks
-   boost::function< void () > commFunction;
+   boost::function<void()> commFunction;
 
-   blockforest::communication::UniformBufferedScheme< stencil::D3Q27 > scheme( blocks );
-   scheme.addPackInfo( make_shared< field::communication::PackInfo< PdfField_T > >( pdfFieldID ) );
+   blockforest::communication::UniformBufferedScheme<stencil::D3Q27> scheme(blocks);
+   scheme.addPackInfo(make_shared<field::communication::PackInfo<PdfField_T> >(pdfFieldID));
    commFunction = scheme;
 
    // uses standard bounce back boundary conditions
-   pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MO_BB_Flag );
-   
-   auto sweep = lbm::makeCellwiseSweep< LatticeModel_T, FlagField_T >( pdfFieldID, flagFieldID, Fluid_Flag );
-   
+   pe_coupling::mapMovingBodies<BoundaryHandling_T>(*blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,
+                                                    MO_BB_Flag);
+
+   auto sweep = lbm::makeCellwiseSweep<LatticeModel_T, FlagField_T>(pdfFieldID, flagFieldID, Fluid_Flag);
+
    // collision sweep
-   timeloop.add() << Sweep( lbm::makeCollideSweep( sweep ), "cell-wise LB sweep (collide)" );
+   timeloop.add() << Sweep(lbm::makeCollideSweep(sweep), "cell-wise LB sweep (collide)");
 
    // add LBM communication function and boundary handling sweep (does the hydro force calculations and the no-slip treatment)
-   timeloop.add() << BeforeFunction( commFunction, "LBM Communication" )
-                  << Sweep( BoundaryHandling_T::getBlockSweep( boundaryHandlingID ), "Boundary Handling" );
+   timeloop.add() << BeforeFunction(commFunction, "LBM Communication")
+                  << Sweep(BoundaryHandling_T::getBlockSweep(boundaryHandlingID), "Boundary Handling");
 
    // streaming
-   timeloop.add() << Sweep( lbm::makeStreamSweep( sweep ), "cell-wise LB sweep (stream)" );
-   
+   timeloop.add() << Sweep(lbm::makeStreamSweep(sweep), "cell-wise LB sweep (stream)");
+
    // add pe timesteps
-   timeloop.addFuncAfterTimeStep( pe_coupling::TimeStep( blocks, bodyStorageID, cr, syncCall, dt ), "pe Time Step" );
-   timeloop.addFuncAfterTimeStep( ResetPosition( blocks, bodyStorageID, dt ), "Reset pe positions" );
+   timeloop.addFuncAfterTimeStep(pe_coupling::TimeStep(blocks, bodyStorageID, cr, syncCall, dt), "pe Time Step");
+   timeloop.addFuncAfterTimeStep(ResetPosition(blocks, bodyStorageID, dt), "Reset pe positions");
 
-   timeloop.addFuncAfterTimeStep( RemainingTimeLogger( timeloop.getNrOfTimeSteps() ), "Remaining Time Logger" );
+   timeloop.addFuncAfterTimeStep(RemainingTimeLogger(timeloop.getNrOfTimeSteps()), "Remaining Time Logger");
 
    ////////////////////////
    // EXECUTE SIMULATION //
    ////////////////////////
-   
+
    WALBERLA_LOG_INFO_ON_ROOT("Starting test...");
    WcTimingPool timeloopTiming;
-   timeloop.run( timeloopTiming );
+   timeloop.run(timeloopTiming);
    timeloopTiming.logResultOnRoot();
 
-   if (writevtk)
-   {
+   if (writevtk) {
       WALBERLA_LOG_INFO_ON_ROOT("Writing out VTK file...");
-      auto pdfFieldVTKWriter = createFluidFieldVTKWriter( blocks, pdfFieldID, flagFieldID, "fluid_field" );
-      auto wf = vtk::writeFiles( pdfFieldVTKWriter );
+      auto pdfFieldVTKWriter = createFluidFieldVTKWriter(blocks, pdfFieldID, flagFieldID, "fluid_field");
+      auto wf = vtk::writeFiles(pdfFieldVTKWriter);
       wf();
-      auto flagFieldVTK = vtk::createVTKOutput_BlockData( blocks, "flag_field", 1, 0 );
-      flagFieldVTK->addCellDataWriter( make_shared< field::VTKWriter< FlagField_T > >( flagFieldID, "FlagField" ) );
-      vtk::writeFiles( flagFieldVTK )();
+      auto flagFieldVTK = vtk::createVTKOutput_BlockData(blocks, "flag_field", 1, 0);
+      flagFieldVTK->addCellDataWriter(make_shared<field::VTKWriter<FlagField_T> >(flagFieldID, "FlagField"));
+      vtk::writeFiles(flagFieldVTK)();
    }
-   
+
    if (squirmer) // this object only exists on the node that contains position
    {
       WALBERLA_ASSERT_FLOAT_EQUAL(squirmer->getSquirmerVelocity(), v0);
@@ -398,7 +412,7 @@ int main( int argc, char **argv )
       WALBERLA_ASSERT_FLOAT_EQUAL(squirmer->getPosition(), position);
       WALBERLA_ASSERT_FLOAT_EQUAL(squirmer->getQuaternion(), q);
    }
-   
+
    WALBERLA_LOG_INFO_ON_ROOT("Checking result...");
    auto b1 = real_c(1.5) * v0;
    auto b2 = beta * b1;
@@ -409,66 +423,71 @@ int main( int argc, char **argv )
    real_t abs_tolerance = real_c(0.0026);
    real_t rel_tolerance = real_c(0.10);
 
-   for ( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
-   {
-      IBlock & block = *blockIt;
-      auto src = block.getData< PdfField_T >(pdfFieldID);
+   for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) {
+      IBlock &block = *blockIt;
+      auto src = block.getData<PdfField_T>(pdfFieldID);
 
-      auto flagField = block.getData<FlagField_T> (flagFieldID);
+      auto flagField = block.getData<FlagField_T>(flagFieldID);
       auto flag = flagField->getFlag(Fluid_Flag);
 
       WALBERLA_FOR_ALL_CELLS_XYZ(src, {
          Vector3<real_t> pos;
-         blocks->getBlockLocalCellCenter( block, Cell(x,y,z), pos[0], pos[1], pos[2] );
+         blocks->getBlockLocalCellCenter(block, Cell(x, y, z), pos[0], pos[1], pos[2]);
 
-         if( flagField->isFlagSet(x, y, z, flag) ) //check for fluid/non-boundary
+         if (flagField->isFlagSet(x, y, z, flag)) //check for fluid/non-boundary
          {
             auto r = pos - squirmer_pos;
-            
+
             auto v_ana = analytical_velocity(r, b1, b2, e, radius);
-            
+
             // Superposition with the nearest neighbours (faces)
-            for( auto s = stencil::D3Q7::beginNoCenter(); s != stencil::D3Q7::end(); ++s )
-            {
+            for (auto s = stencil::D3Q7::beginNoCenter(); s != stencil::D3Q7::end(); ++s) {
                Vector3<real_t> imidpoint(squirmer_pos);
                imidpoint[0] += real_c(s.cx() * int_c(L));
                imidpoint[1] += real_c(s.cy() * int_c(L));
                imidpoint[2] += real_c(s.cz() * int_c(L));
-               v_ana += analytical_velocity( pos - imidpoint, b1, b2, e, radius );
+               v_ana += analytical_velocity(pos - imidpoint, b1, b2, e, radius);
             }
-            
-            if( !shortrun && r.length() > 2*radius ) // Exclude the volume around the squirmer, as the analytical solution is a far field solution only
+
+            if (!shortrun && r.length() > 2 *
+                                          radius) // Exclude the volume around the squirmer, as the analytical solution is a far field solution only
             {
                auto v_data = src->getVelocity(x, y, z);
                auto diff = v_data - v_ana;
-               
-               for( uint_t d = 0; d < 3; ++d )
-               {
-                  if( std::abs( diff[d] / v_ana[d] ) > rel_tolerance && std::abs( diff[d] ) > abs_tolerance )
-                  {
-                     WALBERLA_LOG_DEVEL("Difference too large in " << Cell(x,y,z) << " (" << r.length() << "). Expected " << v_ana << ", got " << v_data << ", relative " << std::abs(diff[d] / v_ana[d]) << ", absolute " << std::abs(diff[d]));
+
+               for (uint_t d = 0; d < 3; ++d) {
+                  if (std::abs(diff[d] / v_ana[d]) > rel_tolerance && std::abs(diff[d]) > abs_tolerance) {
+                     WALBERLA_LOG_DEVEL(
+                           "Difference too large in " << Cell(x, y, z) << " (" << r.length() << "). Expected " << v_ana
+                                                      << ", got " << v_data << ", relative "
+                                                      << std::abs(diff[d] / v_ana[d]) << ", absolute "
+                                                      << std::abs(diff[d]));
                   }
                }
             }
-            
-            if( writevtk )
-            {
+
+            if (writevtk) {
                // store the value into the PDF field so we can write it to VTK later
-               src->setToEquilibrium( x, y, z, v_ana );
+               src->setToEquilibrium(x, y, z, v_ana);
             }
          }
       });
    }
-   
-   if (writevtk)
-   {
+
+   if (writevtk) {
       WALBERLA_LOG_INFO_ON_ROOT("Writing out reference VTK file...");
-      auto pdfFieldVTKWriter = createFluidFieldVTKWriter( blocks, pdfFieldID, flagFieldID, "fluid_field_ref" );
-      auto wf = vtk::writeFiles( pdfFieldVTKWriter );
+      auto pdfFieldVTKWriter = createFluidFieldVTKWriter(blocks, pdfFieldID, flagFieldID, "fluid_field_ref");
+      auto wf = vtk::writeFiles(pdfFieldVTKWriter);
       wf();
    }
-   
+
    WALBERLA_LOG_INFO_ON_ROOT("Completed test.");
-   
+
    return 0;
 }
+
+} // namespace squirmer
+
+int main( int argc, char **argv ){
+   squirmer::main(argc, argv);
+}
\ No newline at end of file
diff --git a/tests/pe_coupling/momentum_exchange_method/TaylorCouetteFlowMEM.cpp b/tests/pe_coupling/momentum_exchange_method/TaylorCouetteFlowMEM.cpp
index b7d4fc43c277766caaacf7f03a129577dbc79bd1..f4c2df52544f1d6e80ca2571749a6baea4dbc0bd 100644
--- a/tests/pe_coupling/momentum_exchange_method/TaylorCouetteFlowMEM.cpp
+++ b/tests/pe_coupling/momentum_exchange_method/TaylorCouetteFlowMEM.cpp
@@ -339,7 +339,7 @@ int main( int argc, char **argv )
 
    // map pe bodies into the LBM simulation
    // moving bodies are handled by the momentum exchange method, thus act here as velocity boundary condition
-   pe_coupling::mapMovingGlobalBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, *globalBodyStorage, bodyFieldID, MO_Flag );
+   pe_coupling::mapMovingGlobalBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, *globalBodyStorage, bodyFieldID, MO_Flag );
 
    ///////////////
    // TIME LOOP //
diff --git a/tests/pe_coupling/momentum_exchange_method/TorqueSphereMEM.cpp b/tests/pe_coupling/momentum_exchange_method/TorqueSphereMEM.cpp
index 4e41079db7b9a0b8e18993d776340a36de2cd535..96bb5a31ea5e74250760dad2c6571bde68241cc5 100644
--- a/tests/pe_coupling/momentum_exchange_method/TorqueSphereMEM.cpp
+++ b/tests/pe_coupling/momentum_exchange_method/TorqueSphereMEM.cpp
@@ -13,7 +13,7 @@
 //  You should have received a copy of the GNU General Public License along
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file TorqueSphereMEMPe.cpp
+//! \file TorqueSphereMEM.cpp
 //! \ingroup pe_coupling
 //! \author Christoph Rettinger <christoph.rettinger@fau.de>
 //
@@ -64,7 +64,7 @@
 #include <iomanip>
 #include <iostream>
 
-namespace torque_sphere_mem_pe
+namespace torque_sphere_mem
 {
 
 
@@ -476,13 +476,13 @@ int main( int argc, char **argv )
    if( MO_CLI )
    {
       // uses a higher order boundary condition (CLI)
-      pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MO_CLI_Flag );
+      pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MO_CLI_Flag );
    }else if ( MO_MR ){
       // uses a higher order boundary condition (MR)
-      pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,  MO_MR_Flag );
+      pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,  MO_MR_Flag );
    }else{
       // uses standard bounce back boundary conditions
-      pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,  MO_BB_Flag );
+      pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, bodyFieldID,  MO_BB_Flag );
    }
 
    shared_ptr< TorqueEval > torqueEval = make_shared< TorqueEval >( &timeloop, &setup, blocks, bodyStorageID, fileIO );
@@ -563,8 +563,8 @@ int main( int argc, char **argv )
 
 }
 
-} //namespace torque_sphere_mem_pe
+} //namespace torque_sphere_mem
 
 int main( int argc, char **argv ){
-   torque_sphere_mem_pe::main(argc, argv);
+   torque_sphere_mem::main(argc, argv);
 }
diff --git a/tests/pe_coupling/partially_saturated_cells_method/DragForceSpherePSM.cpp b/tests/pe_coupling/partially_saturated_cells_method/DragForceSpherePSM.cpp
index 8444242feb0e6b0ede9a075edb8667b5f6f3b05f..79e3c2f4901866bc7a1dd3e9304fc0629981da8a 100644
--- a/tests/pe_coupling/partially_saturated_cells_method/DragForceSpherePSM.cpp
+++ b/tests/pe_coupling/partially_saturated_cells_method/DragForceSpherePSM.cpp
@@ -13,7 +13,7 @@
 //  You should have received a copy of the GNU General Public License along
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file DragForceSpherePSMPe.cpp
+//! \file DragForceSpherePSM.cpp
 //! \ingroup pe_coupling
 //! \author Christoph Rettinger <christoph.rettinger@fau.de>
 //
@@ -58,12 +58,13 @@
 #include "pe/basic.h"
 
 #include "pe_coupling/partially_saturated_cells_method/all.h"
+#include "pe_coupling/utility/all.h"
 
 #include <vector>
 #include <iomanip>
 #include <iostream>
 
-namespace drag_force_sphere_psm_pe
+namespace drag_force_sphere_psm
 {
 
 ///////////
@@ -252,12 +253,12 @@ private:
          {
             for( auto fieldIt = xyzFieldSize.begin(); fieldIt != xyzFieldSize.end(); ++fieldIt )
             {
-               velocity_sum += pe_coupling::getPSMMacroscopicVelocity<LatticeModel_T, 1>( *blockIt, pdfField, bodyAndVolumeFractionField, blocks_, *fieldIt )[0];
+               velocity_sum += pe_coupling::getPSMMacroscopicVelocity<LatticeModel_T, 1>( *blockIt, pdfField, bodyAndVolumeFractionField, *blocks_, *fieldIt )[0];
             }
          }else{
             for( auto fieldIt = xyzFieldSize.begin(); fieldIt != xyzFieldSize.end(); ++fieldIt )
             {
-               velocity_sum += pe_coupling::getPSMMacroscopicVelocity<LatticeModel_T, 2>( *blockIt, pdfField, bodyAndVolumeFractionField, blocks_, *fieldIt )[0];
+               velocity_sum += pe_coupling::getPSMMacroscopicVelocity<LatticeModel_T, 2>( *blockIt, pdfField, bodyAndVolumeFractionField, *blocks_, *fieldIt )[0];
             }
          }
       }
@@ -287,30 +288,6 @@ private:
 };
 
 
-class ResetForce
-{
-public:
-   ResetForce( const shared_ptr< StructuredBlockStorage > & blocks,
-               const BlockDataID & bodyStorageID )
-   : blocks_( blocks ), bodyStorageID_( bodyStorageID )
-   { }
-
-   void operator()()
-   {
-      for( auto blockIt = blocks_->begin(); blockIt != blocks_->end(); ++blockIt )
-      {
-         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID_); bodyIt != pe::BodyIterator::end(); ++bodyIt )
-         {
-            bodyIt->resetForceAndTorque();
-         }
-      }
-   }
-private:
-   shared_ptr< StructuredBlockStorage > blocks_;
-   const BlockDataID bodyStorageID_;
-};
-
-
 //////////
 // MAIN //
 //////////
@@ -463,11 +440,11 @@ int main( int argc, char **argv )
    // initialize the PDF field for PSM
    if( method == PSMVariant::SC1W1 || method == PSMVariant::SC2W1 || method == PSMVariant::SC3W1 )
    {
-      pe_coupling::initializeDomainForPSM< LatticeModel_T, 1 >( blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
+      pe_coupling::initializeDomainForPSM< LatticeModel_T, 1 >( *blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
    }
    else
    {
-      pe_coupling::initializeDomainForPSM< LatticeModel_T, 2 >( blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
+      pe_coupling::initializeDomainForPSM< LatticeModel_T, 2 >( *blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
    }
 
    ///////////////
@@ -553,7 +530,7 @@ int main( int argc, char **argv )
    timeloop.addFuncAfterTimeStep( SharedFunctor< DragForceEvaluator >(forceEval), "drag force evaluation" );
 
    // resetting force
-   timeloop.addFuncAfterTimeStep( ResetForce( blocks, bodyStorageID ), "reset force on sphere");
+   timeloop.addFuncAfterTimeStep( pe_coupling::ForceTorqueOnBodiesResetter( blocks, bodyStorageID ), "reset force on sphere");
 
    timeloop.addFuncAfterTimeStep( RemainingTimeLogger( timeloop.getNrOfTimeSteps() ), "Remaining Time Logger" );
 
@@ -600,8 +577,8 @@ int main( int argc, char **argv )
    return 0;
 }
 
-} //namespace drag_force_sphere_psm_pe
+} //namespace drag_force_sphere_psm
 
 int main( int argc, char **argv ){
-   drag_force_sphere_psm_pe::main(argc, argv);
+   drag_force_sphere_psm::main(argc, argv);
 }
diff --git a/tests/pe_coupling/partially_saturated_cells_method/DragForceSpherePSMRefinement.cpp b/tests/pe_coupling/partially_saturated_cells_method/DragForceSpherePSMRefinement.cpp
index 329c56767bad255061d286f36f413ce08017f622..f29f7d650c9e0bbd02ada920f48b4dd2cde1efee 100644
--- a/tests/pe_coupling/partially_saturated_cells_method/DragForceSpherePSMRefinement.cpp
+++ b/tests/pe_coupling/partially_saturated_cells_method/DragForceSpherePSMRefinement.cpp
@@ -13,7 +13,7 @@
 //  You should have received a copy of the GNU General Public License along
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file DragForceSpherePSMRefinementPe.cpp
+//! \file DragForceSpherePSMRefinement.cpp
 //! \ingroup pe_coupling
 //! \author Christoph Rettinger <christoph.rettinger@fau.de>
 //
@@ -61,6 +61,7 @@
 #include "pe/vtk/SphereVtkOutput.h"
 
 #include "pe_coupling/partially_saturated_cells_method/all.h"
+#include "pe_coupling/utility/all.h"
 
 #include "vtk/all.h"
 #include "field/vtk/all.h"
@@ -70,7 +71,7 @@
 #include <iomanip>
 #include <iostream>
 
-namespace drag_force_sphere_psm_pe_refinement
+namespace drag_force_sphere_psm_refinement
 {
 
 ///////////
@@ -371,12 +372,12 @@ private:
          {
             for( auto fieldIt = xyzFieldSize.begin(); fieldIt != xyzFieldSize.end(); ++fieldIt )
             {
-               velocity_sum += cellVolume * pe_coupling::getPSMMacroscopicVelocity<LatticeModel_T, 1>( *blockIt, pdfField, bodyAndVolumeFractionField, blocks_, *fieldIt )[0];
+               velocity_sum += cellVolume * pe_coupling::getPSMMacroscopicVelocity<LatticeModel_T, 1>( *blockIt, pdfField, bodyAndVolumeFractionField, *blocks_, *fieldIt )[0];
             }
          }else{
             for( auto fieldIt = xyzFieldSize.begin(); fieldIt != xyzFieldSize.end(); ++fieldIt )
             {
-               velocity_sum += cellVolume * pe_coupling::getPSMMacroscopicVelocity<LatticeModel_T, 2>( *blockIt, pdfField, bodyAndVolumeFractionField, blocks_, *fieldIt )[0];
+               velocity_sum += cellVolume * pe_coupling::getPSMMacroscopicVelocity<LatticeModel_T, 2>( *blockIt, pdfField, bodyAndVolumeFractionField, *blocks_, *fieldIt )[0];
             }
          }
       }
@@ -408,30 +409,6 @@ private:
 };
 
 
-class ResetForce
-{
-   public:
-      ResetForce( const shared_ptr< StructuredBlockStorage > & blocks,
-                  const BlockDataID & bodyStorageID )
-      : blocks_( blocks ), bodyStorageID_( bodyStorageID )
-      { }
-
-      void operator()()
-      {
-         for( auto blockIt = blocks_->begin(); blockIt != blocks_->end(); ++blockIt )
-         {
-            for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID_); bodyIt != pe::BodyIterator::end(); ++bodyIt )
-            {
-                bodyIt->resetForceAndTorque();
-            }
-         }
-      }
-private:
-      shared_ptr< StructuredBlockStorage > blocks_;
-      const BlockDataID bodyStorageID_;
-};
-
-
 //////////
 // MAIN //
 //////////
@@ -581,11 +558,11 @@ int main( int argc, char **argv )
    // initialize the PDF field for PSM
    if( method == PSMVariant::SC1W1 || method == PSMVariant::SC2W1 || method == PSMVariant::SC3W1 )
    {
-      pe_coupling::initializeDomainForPSM< LatticeModel_T, 1 >( blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
+      pe_coupling::initializeDomainForPSM< LatticeModel_T, 1 >( *blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
    }
    else
    {
-      pe_coupling::initializeDomainForPSM< LatticeModel_T, 2 >( blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
+      pe_coupling::initializeDomainForPSM< LatticeModel_T, 2 >( *blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
    }
    ///////////////
    // TIME LOOP //
@@ -632,7 +609,7 @@ int main( int argc, char **argv )
    timeloop.addFuncAfterTimeStep( SharedFunctor< DragForceEvaluator >(forceEval), "drag force evaluation" );
 
    // resetting force
-   timeloop.addFuncAfterTimeStep( ResetForce( blocks, bodyStorageID ), "reset force on sphere");
+   timeloop.addFuncAfterTimeStep( pe_coupling::ForceTorqueOnBodiesResetter( blocks, bodyStorageID ), "reset force on sphere");
 
    if( vtkIO )
    {
@@ -702,8 +679,8 @@ int main( int argc, char **argv )
 
 }
 
-} //namespace drag_force_sphere_psm_pe_refinement
+} //namespace drag_force_sphere_psm_refinement
 
 int main( int argc, char **argv ){
-   drag_force_sphere_psm_pe_refinement::main(argc, argv);
+   drag_force_sphere_psm_refinement::main(argc, argv);
 }
diff --git a/tests/pe_coupling/partially_saturated_cells_method/SegreSilberbergPSM.cpp b/tests/pe_coupling/partially_saturated_cells_method/SegreSilberbergPSM.cpp
index ef3378bcace821843d098592ee5781e0739fb141..f05d9b43db7d971f4364c1daf7fe8faf2d53f1e4 100644
--- a/tests/pe_coupling/partially_saturated_cells_method/SegreSilberbergPSM.cpp
+++ b/tests/pe_coupling/partially_saturated_cells_method/SegreSilberbergPSM.cpp
@@ -13,7 +13,7 @@
 //  You should have received a copy of the GNU General Public License along
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file SegreSilberbergPSMPe.cpp
+//! \file SegreSilberbergPSM.cpp
 //! \ingroup pe_coupling
 //! \author Christoph Rettinger <christoph.rettinger@fau.de>
 //
@@ -64,7 +64,7 @@
 #include "field/vtk/all.h"
 #include "lbm/vtk/all.h"
 
-namespace segre_silberberg_psm_pe
+namespace segre_silberberg_psm
 {
 
 ///////////
@@ -353,71 +353,6 @@ class SteadyStateCheck
       real_t posNew_;
 };
 
-// When using N LBM steps and one (larger) pe step in a single simulation step, the force applied on the pe bodies in each LBM sep has to be scaled
-// by a factor of 1/N before running the pe simulation. This corresponds to an averaging of the force and torque over the N LBM steps and is said to
-// damp oscillations. Usually, N = 2.
-// See Ladd - " Numerical simulations of particulate suspensions via a discretized Boltzmann equation. Part 1. Theoretical foundation", 1994, p. 302
-class AverageForce
-{
-   public:
-      AverageForce( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & bodyStorageID,
-                    const uint_t lbmSubCycles )
-      : blocks_( blocks ), bodyStorageID_( bodyStorageID ), invLbmSubCycles_( real_c(1) / real_c( lbmSubCycles ) ) { }
-
-      void operator()()
-      {
-         pe::Vec3 force  = pe::Vec3(0.0);
-         pe::Vec3 torque = pe::Vec3(0.0);
-
-         for( auto blockIt = blocks_->begin(); blockIt != blocks_->end(); ++blockIt )
-         {
-            for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID_); bodyIt != pe::BodyIterator::end(); ++bodyIt )
-            {
-               force  = invLbmSubCycles_ * bodyIt->getForce();
-               torque = invLbmSubCycles_ * bodyIt->getTorque();
-
-               bodyIt->resetForceAndTorque();
-
-               bodyIt->setForce ( force );
-               bodyIt->setTorque( torque );
-            }
-         }
-      }
-   private:
-      shared_ptr< StructuredBlockStorage > blocks_;
-      const BlockDataID bodyStorageID_;
-      real_t invLbmSubCycles_;
-};
-
-
-// The flow in the channel is here driven by a body force and is periodic
-// Thus, the pressure gradient, usually encountered in a channel flow, is not present here
-// The buoyancy force on the body due to this pressure gradient has to be added 'artificially'
-// F_{buoyancy} = - V_{body} * grad ( p ) = V_{body} * \rho_{fluid} *  a
-// ( V_{body} := volume of body,  a := acceleration driving the flow )
-class BuoyancyForce
-{
-   public:
-   BuoyancyForce( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & bodyStorageID,
-                  const Vector3<real_t> pressureForce )
-      : blocks_( blocks ), bodyStorageID_( bodyStorageID ), pressureForce_( pressureForce ) { }
-
-      void operator()()
-      {
-
-         for( auto blockIt = blocks_->begin(); blockIt != blocks_->end(); ++blockIt )
-         {
-            for( auto bodyIt = pe::LocalBodyIterator::begin( *blockIt, bodyStorageID_); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt )
-            {
-               bodyIt->addForce ( pressureForce_ );
-            }
-         }
-      }
-   private:
-      shared_ptr< StructuredBlockStorage > blocks_;
-      const BlockDataID bodyStorageID_;
-      const Vector3<real_t> pressureForce_;
-};
 
 //////////
 // MAIN //
@@ -621,11 +556,11 @@ int main( int argc, char **argv )
    // initialize the PDF field for PSM
    if( SC1W1 || SC2W1 || SC3W1 )
    {
-      pe_coupling::initializeDomainForPSM< LatticeModel_T, 1 >( blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
+      pe_coupling::initializeDomainForPSM< LatticeModel_T, 1 >( *blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
    }
    else
    {
-      pe_coupling::initializeDomainForPSM< LatticeModel_T, 2 >( blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
+      pe_coupling::initializeDomainForPSM< LatticeModel_T, 2 >( *blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
    }
 
    ///////////////
@@ -679,13 +614,17 @@ int main( int argc, char **argv )
    }
 
    // average the forces acting on the bodies over the number of LBM steps
-   timeloop.addFuncAfterTimeStep( AverageForce( blocks, bodyStorageID, numLbmSubCycles ), "Force averaging for several LBM steps" );
+   timeloop.addFuncAfterTimeStep( pe_coupling::ForceTorqueOnBodiesScaler( blocks, bodyStorageID, real_t(1)/real_c(numLbmSubCycles) ), "Force averaging for several LBM steps" );
 
    // add pressure force contribution
-   timeloop.addFuncAfterTimeStep( BuoyancyForce( blocks, bodyStorageID,
-                                                 Vector3<real_t>( math::M_PI / real_c(6) * setup.forcing * setup.particleDiam * setup.particleDiam * setup.particleDiam ,
-                                                                  real_c(0), real_c(0) ) ),
-                                  "Buoyancy force" );
+   // The flow in the channel is here driven by a body force and is periodic
+   // Thus, the pressure gradient, usually encountered in a channel flow, is not present here
+   // The buoyancy force on the body due to this pressure gradient has to be added 'artificially'
+   // F_{buoyancy} = - V_{body} * grad ( p ) = V_{body} * \rho_{fluid} *  a
+   // ( V_{body} := volume of body,  a := acceleration driving the flow )
+   Vector3<real_t> buoyancyForce(math::M_PI / real_t(6) * setup.forcing * setup.particleDiam * setup.particleDiam * setup.particleDiam ,
+                                 real_t(0), real_t(0));
+   timeloop.addFuncAfterTimeStep( pe_coupling::ForceOnBodiesAdder( blocks, bodyStorageID, buoyancyForce ), "Buoyancy force" );
 
    // add pe timesteps
    timeloop.addFuncAfterTimeStep( pe_coupling::TimeStep( blocks, bodyStorageID, cr, syncCall, dt_pe, pe_interval ), "pe Time Step" );
@@ -775,8 +714,8 @@ int main( int argc, char **argv )
    return EXIT_SUCCESS;
 }
 
-} // namespace segre_silberberg_psm_pe
+} // namespace segre_silberberg_psm
 
 int main( int argc, char **argv ){
-   segre_silberberg_psm_pe::main(argc, argv);
+   segre_silberberg_psm::main(argc, argv);
 }
diff --git a/tests/pe_coupling/partially_saturated_cells_method/TorqueSpherePSM.cpp b/tests/pe_coupling/partially_saturated_cells_method/TorqueSpherePSM.cpp
index 75fd18cba24ac1ea16d0d43c21d38cb67e8304ab..31921f2f02306b36cd19480d3b3d043504072583 100644
--- a/tests/pe_coupling/partially_saturated_cells_method/TorqueSpherePSM.cpp
+++ b/tests/pe_coupling/partially_saturated_cells_method/TorqueSpherePSM.cpp
@@ -13,7 +13,7 @@
 //  You should have received a copy of the GNU General Public License along
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file TorqueSpherePSMPe.cpp
+//! \file TorqueSpherePSM.cpp
 //! \ingroup pe_coupling
 //! \author Christoph Rettinger <christoph.rettinger@fau.de>
 //
@@ -57,12 +57,13 @@
 #include "pe/basic.h"
 
 #include "pe_coupling/partially_saturated_cells_method/all.h"
+#include "pe_coupling/utility/all.h"
 
 #include <vector>
 #include <iomanip>
 #include <iostream>
 
-namespace torque_sphere_psm_pe
+namespace torque_sphere_psm
 {
 
 
@@ -219,36 +220,10 @@ class TorqueEval
 };
 
 
-class ResetForce
-{
-   public:
-      ResetForce( const shared_ptr< StructuredBlockStorage > & blocks,
-                  const BlockDataID & bodyStorageID )
-      : blocks_( blocks ), bodyStorageID_( bodyStorageID )
-      { }
-
-      void operator()()
-      {
-         for( auto blockIt = blocks_->begin(); blockIt != blocks_->end(); ++blockIt )
-         {
-            for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID_); bodyIt != pe::BodyIterator::end(); ++bodyIt )
-            {
-                bodyIt->resetForceAndTorque();
-            }
-         }
-      }
-
-   private:
-      shared_ptr< StructuredBlockStorage > blocks_;
-      const BlockDataID bodyStorageID_;
-};
-
-
 //////////
 // MAIN //
 //////////
 
-
 //*******************************************************************************************************************
 /*!\brief Testcase that checks the torque acting on a constantly rotating sphere in the center of a cubic domain
  *
@@ -273,7 +248,6 @@ class ResetForce
  */
 //*******************************************************************************************************************
 
-
 int main( int argc, char **argv )
 {
    debug::enterTestMode();
@@ -413,11 +387,11 @@ int main( int argc, char **argv )
    // initialize the PDF field for PSM
    if( SC1W1 || SC2W1 || SC3W1 )
    {
-      pe_coupling::initializeDomainForPSM< LatticeModel_T, 1 >( blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
+      pe_coupling::initializeDomainForPSM< LatticeModel_T, 1 >( *blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
    }
    else
    {
-      pe_coupling::initializeDomainForPSM< LatticeModel_T, 2 >( blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
+      pe_coupling::initializeDomainForPSM< LatticeModel_T, 2 >( *blocks, pdfFieldID, bodyAndVolumeFractionFieldID );
    }
 
    ///////////////
@@ -481,7 +455,7 @@ int main( int argc, char **argv )
    }
 
    // resetting force
-   timeloop.addFuncAfterTimeStep( ResetForce( blocks, bodyStorageID ), "reset force on sphere");
+   timeloop.addFuncAfterTimeStep( pe_coupling::ForceTorqueOnBodiesResetter( blocks, bodyStorageID ), "reset force on sphere");
 
    timeloop.addFuncAfterTimeStep( RemainingTimeLogger( timeloop.getNrOfTimeSteps() ), "Remaining Time Logger" );
 
@@ -528,8 +502,8 @@ int main( int argc, char **argv )
 
 }
 
-} //namespace torque_sphere_psm_pe
+} //namespace torque_sphere_psm
 
 int main( int argc, char **argv ){
-   torque_sphere_psm_pe::main(argc, argv);
+   torque_sphere_psm::main(argc, argv);
 }