diff --git a/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/AMRSedimentSettling.cpp b/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/AMRSedimentSettling.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3bb791552152d802519ea2f321309a14c696f147
--- /dev/null
+++ b/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/AMRSedimentSettling.cpp
@@ -0,0 +1,2244 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file AMRSedimentSettling.cpp
+//! \ingroup pe_coupling
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/Initialization.h"
+#include "blockforest/communication/UniformBufferedScheme.h"
+#include "blockforest/loadbalancing/all.h"
+#include "blockforest/AABBRefinementSelection.h"
+
+#include "boundary/all.h"
+
+#include "core/DataTypes.h"
+#include "core/Environment.h"
+#include "core/SharedFunctor.h"
+#include "core/debug/Debug.h"
+#include "core/debug/TestSubsystem.h"
+#include "core/math/all.h"
+#include "core/timing/RemainingTimeLogger.h"
+#include "core/mpi/Broadcast.h"
+
+#include "domain_decomposition/SharedSweep.h"
+#include "domain_decomposition/BlockSweepWrapper.h"
+
+#include "field/AddToStorage.h"
+#include "field/StabilityChecker.h"
+#include "field/communication/PackInfo.h"
+
+#include "lbm/boundary/all.h"
+#include "lbm/communication/PdfFieldPackInfo.h"
+#include "lbm/field/AddToStorage.h"
+#include "lbm/field/PdfField.h"
+#include "lbm/field/VelocityFieldWriter.h"
+#include "lbm/lattice_model/D3Q19.h"
+#include "lbm/refinement/all.h"
+#include "lbm/sweeps/CellwiseSweep.h"
+#include "lbm/sweeps/SweepWrappers.h"
+
+#include "pe/basic.h"
+#include "pe/Types.h"
+#include "pe/fcd/GJKEPACollideFunctor.h"
+#include "pe/vtk/BodyVtkOutput.h"
+#include "pe/vtk/SphereVtkOutput.h"
+#include "pe/vtk/EllipsoidVtkOutput.h"
+#include "pe/cr/ICR.h"
+#include "pe/synchronization/ClearSynchronization.h"
+#include "pe/amr/InfoCollection.h"
+#include "pe/amr/weight_assignment/WeightAssignmentFunctor.h"
+#include "pe/amr/weight_assignment/MetisAssignmentFunctor.h"
+
+#include "pe_coupling/amr/all.h"
+#include "pe_coupling/mapping/all.h"
+#include "pe_coupling/momentum_exchange_method/all.h"
+#include "pe_coupling/utility/all.h"
+
+#include "timeloop/SweepTimeloop.h"
+
+#include "vtk/all.h"
+#include "field/vtk/all.h"
+#include "lbm/vtk/all.h"
+
+namespace amr_sediment_settling
+{
+
+///////////
+// USING //
+///////////
+
+using namespace walberla;
+using walberla::uint_t;
+
+//////////////
+// TYPEDEFS //
+//////////////
+
+// PDF field, flag field & body field
+typedef lbm::D3Q19< lbm::collision_model::TRT, false>  LatticeModel_T;
+typedef LatticeModel_T::Stencil          Stencil_T;
+typedef lbm::PdfField< LatticeModel_T > PdfField_T;
+
+typedef walberla::uint8_t                 flag_t;
+typedef FlagField< flag_t >               FlagField_T;
+typedef GhostLayerField< pe::BodyID, 1 >  BodyField_T;
+typedef GhostLayerField< Vector3<real_t>, 1 >  VelocityField_T;
+
+const uint_t FieldGhostLayers = 4;
+
+// boundary handling
+typedef lbm::NoSlip< LatticeModel_T, flag_t > NoSlip_T;
+
+typedef pe_coupling::CurvedLinear< LatticeModel_T, FlagField_T > MO_T;
+
+typedef boost::tuples::tuple< NoSlip_T, MO_T > BoundaryConditions_T;
+typedef BoundaryHandling< FlagField_T, Stencil_T, BoundaryConditions_T > BoundaryHandling_T;
+
+typedef boost::tuple<pe::Sphere, pe::Ellipsoid, pe::Plane> BodyTypeTuple;
+
+///////////
+// FLAGS //
+///////////
+
+const FlagUID Fluid_Flag( "fluid" );
+const FlagUID NoSlip_Flag( "no slip" );
+const FlagUID MO_Flag( "moving obstacle" );
+const FlagUID FormerMO_Flag( "former moving obstacle" );
+
+
+//////////////////////////////////////
+// DYNAMIC REFINEMENT FUNCTIONALITY //
+//////////////////////////////////////
+
+
+/*
+ * Refinement check based on gradient magnitude
+ * If gradient magnitude is below lowerLimit in all cells of a block, that block could be coarsened.
+ * If the gradient value is above the upperLimit for at least one cell, that block gets marked for refinement.
+ * Else, the block remains on the current level.
+ */
+template< typename LatticeModel_T, typename Filter_T >
+class VectorGradientRefinement
+{
+public:
+   typedef GhostLayerField< Vector3<real_t>, 1 >  VectorField_T;
+   typedef typename LatticeModel_T::Stencil       Stencil_T;
+
+   VectorGradientRefinement( const ConstBlockDataID & fieldID, const Filter_T & filter,
+                             const real_t upperLimit, const real_t lowerLimit, const uint_t maxLevel ) :
+         fieldID_( fieldID ), filter_( filter ),
+         upperLimit_( upperLimit ), lowerLimit_( lowerLimit ), maxLevel_( maxLevel )
+   {}
+
+   void operator()( std::vector< std::pair< const Block *, uint_t > > & minTargetLevels,
+                    std::vector< const Block * > & blocksAlreadyMarkedForRefinement,
+                    const BlockForest & forest );
+
+private:
+
+   ConstBlockDataID fieldID_;
+
+   Filter_T filter_;
+
+   real_t upperLimit_;
+   real_t lowerLimit_;
+
+   uint_t maxLevel_;
+
+}; // class GradientRefinement
+
+template< typename LatticeModel_T, typename Filter_T >
+void VectorGradientRefinement< LatticeModel_T, Filter_T >::operator()( std::vector< std::pair< const Block *, uint_t > > & minTargetLevels,
+                                                                       std::vector< const Block * > &, const BlockForest & )
+{
+   for( auto it = minTargetLevels.begin(); it != minTargetLevels.end(); ++it )
+   {
+      const Block * const block = it->first;
+
+      const uint_t currentLevelOfBlock = block->getLevel();
+
+      const VectorField_T * uField = block->template getData< VectorField_T >( fieldID_ );
+
+      if( uField == nullptr )
+      {
+         it->second = uint_t(0);
+         continue;
+      }
+
+      Matrix3<real_t> uGradient( real_t(0) );
+
+      bool refine( false );
+      bool coarsen( true );
+
+      filter_( *block );
+
+      WALBERLA_FOR_ALL_CELLS_XYZ( uField,
+
+          std::vector< Vector3<real_t> > uValues( Stencil_T::Size, Vector3<real_t>(real_t(0)) );
+
+          Vector3<real_t> uInCenterCell = uField->get( x,y,z );
+
+          for( auto dir = Stencil_T::beginNoCenter(); dir != Stencil_T::end(); ++dir)
+          {
+             // check if boundary treatment is necessary
+             if( filter_( x+dir.cx(),y+dir.cy(),z+dir.cz() ) )
+             {
+                // copy from center cell
+                uValues[ *dir ] = uInCenterCell;
+             } else {
+                uValues[ *dir ] = uField->get( x+dir.cx(),y+dir.cy(),z+dir.cz() );
+             }
+          }
+
+          // obtain the matrix grad(u) with the help of the gradient formula from
+          // See: Ramadugu et al - Lattice differential operators for computational physics (2013)
+          // with T = c_s**2
+          const auto inv_c_s_sqr = real_t(3);
+          uGradient = real_t(0);
+          for( auto dir = Stencil_T::beginNoCenter(); dir != Stencil_T::end(); ++dir)
+          {
+             real_t cx = real_c(dir.cx());
+             real_t cy = real_c(dir.cy());
+             real_t cz = real_c(dir.cz());
+
+             // grad(ux)
+             real_t ux = uValues[ *dir ][0];
+             uGradient[ 0 ] += LatticeModel_T::w[ dir.toIdx() ] * cx * ux;
+             uGradient[ 3 ] += LatticeModel_T::w[ dir.toIdx() ] * cy * ux;
+             uGradient[ 6 ] += LatticeModel_T::w[ dir.toIdx() ] * cz * ux;
+
+             // grad(uy)
+             real_t uy = uValues[ *dir ][1];
+             uGradient[ 1 ] += LatticeModel_T::w[ dir.toIdx() ] * cx * uy;
+             uGradient[ 4 ] += LatticeModel_T::w[ dir.toIdx() ] * cy * uy;
+             uGradient[ 7 ] += LatticeModel_T::w[ dir.toIdx() ] * cz * uy;
+
+             // grad(uz)
+             real_t uz = uValues[ *dir ][2];
+             uGradient[ 2 ] += LatticeModel_T::w[ dir.toIdx() ] * cx * uz;
+             uGradient[ 5 ] += LatticeModel_T::w[ dir.toIdx() ] * cy * uz;
+             uGradient[ 8 ] += LatticeModel_T::w[ dir.toIdx() ] * cz * uz;
+
+          }
+          uGradient *= inv_c_s_sqr;
+
+          auto norm = real_t(0);
+          //compute maximums norm of 3x3 matrix
+          for (auto i = uint_t(0); i < uint_t(3*3); ++i)
+             norm = std::max(norm, std::fabs(uGradient[i]));
+
+          if( norm > lowerLimit_ )
+          {
+             coarsen = false;
+             if( norm > upperLimit_ )
+                refine = true;
+          }
+
+      )
+
+      if( refine && currentLevelOfBlock < maxLevel_ )
+      {
+         WALBERLA_ASSERT( !coarsen );
+         it->second = currentLevelOfBlock + uint_t(1);
+      }
+      if( coarsen && currentLevelOfBlock > uint_t(0) )
+      {
+         WALBERLA_ASSERT( !refine );
+         it->second = currentLevelOfBlock - uint_t(1);
+      }
+   }
+}
+
+
+// Load estimators for spheres and ellipsoids, obtained at SuperMUC Phase 2
+// See Sec. 3 in the paper for more infos
+
+/////////////
+// Spheres //
+/////////////
+real_t fittedLBMWeightEvaluationFunctionSpheres(const pe_coupling::BlockInfo& blockInfo)
+{
+   uint_t Ce = blockInfo.numberOfCells;
+   uint_t F  = blockInfo.numberOfFluidCells;
+
+   // from fits with optimized D3Q19 LBM kernel (no forces)
+   real_t weight = real_t(9.990957667738165e-06) * real_c(Ce) + real_t(0.00015749920523711047) * real_c(F) + real_t(-0.08232498905584973);
+   return weight;
+}
+
+real_t fittedBHWeightEvaluationFunctionSpheres(const pe_coupling::BlockInfo& blockInfo)
+{
+   uint_t Ce = blockInfo.numberOfCells;
+   uint_t NB = blockInfo.numberOfNearBoundaryCells;
+
+   // from fits with optimized D3Q19 LBM kernel (no forces)
+   real_t weight = real_t(6.654810986939097e-06) * real_c(Ce) + real_t(0.0007061414693533274) * real_c(NB) + real_t(-0.1094292992294259);
+   return weight;
+}
+
+real_t fittedCoupling1WeightEvaluationFunctionSpheres(const pe_coupling::BlockInfo& blockInfo)
+{
+   uint_t Ce = blockInfo.numberOfCells;
+   uint_t F  = blockInfo.numberOfFluidCells;
+   uint_t Pl = blockInfo.numberOfLocalBodies;
+   uint_t Ps = blockInfo.numberOfShadowBodies;
+
+   // from fits with optimized D3Q19 LBM kernel (no forces)
+   real_t weight = real_t(3.07542641675429e-06) * real_c(Ce) + real_t(2.419364600880769e-07) * real_c(F) + real_t(0.01413718259604757) * real_c(Pl) + real_t(0.027761707343462727) * real_c(Ps) + real_t(-0.13991481483939272);
+   return weight;
+}
+
+real_t fittedCoupling2WeightEvaluationFunctionSpheres(const pe_coupling::BlockInfo& blockInfo)
+{
+   uint_t Ce = blockInfo.numberOfCells;
+   uint_t F  = blockInfo.numberOfFluidCells;
+   uint_t Pl = blockInfo.numberOfLocalBodies;
+   uint_t Ps = blockInfo.numberOfShadowBodies;
+
+   // from fits with optimized D3Q19 LBM kernel (no forces)
+   real_t weight = real_t(5.988401232749505e-06) * real_c(Ce) + real_t(3.903532223977357e-06) * real_c(F) + real_t(-0.008802674250816316) * real_c(Pl) + real_t(0.02505020738346139) * real_c(Ps) + real_t(-0.12970723676003335);
+   return weight;
+}
+
+real_t fittedPEWeightEvaluationFunctionSpheres(const pe_coupling::BlockInfo& blockInfo)
+{
+   uint_t Pl = blockInfo.numberOfLocalBodies;
+   uint_t Ps = blockInfo.numberOfShadowBodies;
+   uint_t Ct = blockInfo.numberOfContacts;
+   uint_t Sc = blockInfo.numberOfPeSubCycles;
+
+   // from fits with optimized D3Q19 LBM kernel (no forces)
+   real_t cPlPs2 = real_t(1.1562854544700417e-06);
+   real_t cPl    = real_t(0.0009620525068318354);
+   real_t cPs    = real_t(0.00027549401081063894);
+   real_t cCt    = real_t(0.0014801932788115464);
+   real_t c      = real_t(0.01883682418448259);
+   real_t weight = real_c(Sc) * ( cPlPs2 * real_c(Pl+Ps) * real_c(Pl+Ps) + cPl * real_c(Pl) + cPs * real_c(Ps) + cCt * real_c(Ct) + c );
+   return weight;
+}
+
+real_t fittedTotalWeightEvaluationFunctionSpheres(const pe_coupling::BlockInfo& blockInfo)
+{
+   return fittedLBMWeightEvaluationFunctionSpheres(blockInfo) + fittedBHWeightEvaluationFunctionSpheres(blockInfo) +
+          fittedCoupling1WeightEvaluationFunctionSpheres(blockInfo) + fittedCoupling2WeightEvaluationFunctionSpheres(blockInfo) +
+          fittedPEWeightEvaluationFunctionSpheres(blockInfo);
+}
+
+////////////////
+// Ellipsoids //
+////////////////
+real_t fittedLBMWeightEvaluationFunctionEllipsoids(const pe_coupling::BlockInfo& blockInfo)
+{
+   uint_t Ce = blockInfo.numberOfCells;
+   uint_t F  = blockInfo.numberOfFluidCells;
+   uint_t NB = blockInfo.numberOfNearBoundaryCells;
+
+   // from fits with optimized D3Q19 LBM kernel (no forces)
+   real_t cCe = real_t(4.69973868717e-05);
+   real_t cF  = real_t(0.000110568537442);
+   real_t weight = cCe * real_c(Ce) + cF * real_c(F);
+   if( NB > uint_t(0) ) weight += real_t(5.96551488486e-05) * real_c(Ce) + real_t(-5.75351782026e-05) * real_c(F) + real_t(0.000695800745231) * real_c(NB);
+   return weight;
+}
+
+real_t fittedCouplingWeightEvaluationFunctionEllipsoids(const pe_coupling::BlockInfo& blockInfo)
+{
+   uint_t Ce = blockInfo.numberOfCells;
+   uint_t F  = blockInfo.numberOfFluidCells;
+   uint_t Pl = blockInfo.numberOfLocalBodies;
+   uint_t Ps = blockInfo.numberOfShadowBodies;
+
+   // from fits with optimized D3Q19 LBM kernel (no forces)
+   real_t cCe = real_t(0.000176674935526);
+   real_t cF  = real_t(-0.000170513513027);
+   real_t cPl = real_t(0.0252031634776);
+   real_t cPs = real_t(0.0356835220918);
+   real_t weight = real_t(0);
+   if( (Pl + Ps ) > uint_t(0) ) weight = cCe * real_c(Ce) + cF * real_c(F) + cPl * real_c(Pl) + cPs * real_c(Ps);
+   return weight;
+}
+
+real_t fittedPEWeightEvaluationFunctionEllipsoids(const pe_coupling::BlockInfo& blockInfo)
+{
+   uint_t Pl = blockInfo.numberOfLocalBodies;
+   uint_t Ps = blockInfo.numberOfShadowBodies;
+   uint_t Ct = blockInfo.numberOfContacts;
+   uint_t Sc = blockInfo.numberOfPeSubCycles;
+
+   // from fits with optimized D3Q19 LBM kernel (no forces)
+   real_t cPlPs2 = real_t(8.24153555785e-06);
+   real_t cPl    = real_t(0.00135966650494);
+   real_t cPs    = real_t(0.00440464092538);
+   real_t cCt    = real_t(0.0216278259881);
+   real_t weight = real_c(Sc) * ( cPlPs2 * real_c(Pl+Ps) * real_c(Pl+Ps) + cPl * real_c(Pl) + cPs * real_c(Ps) + cCt * real_c(Ct) );
+   return weight;
+}
+
+real_t fittedTotalWeightEvaluationFunctionEllipsoids(const pe_coupling::BlockInfo& blockInfo)
+{
+   return fittedLBMWeightEvaluationFunctionEllipsoids(blockInfo) +
+          fittedCouplingWeightEvaluationFunctionEllipsoids(blockInfo) +
+          fittedPEWeightEvaluationFunctionEllipsoids(blockInfo);
+}
+
+
+struct TimingPoolLogger
+{
+   TimingPoolLogger( WcTimingPool & timingPool, const SweepTimeloop & timeloop, const uint_t interval )
+         : timingPool_( timingPool ), timeloop_( timeloop ), interval_( interval )
+   {
+   }
+
+   void operator()()
+   {
+      if( interval_ > uint_t(0) && timeloop_.getCurrentTimeStep() % interval_ == uint_t(0) )
+      {
+         timingPool_.logResultOnRoot();
+      }
+   }
+
+private:
+   WcTimingPool & timingPool_;
+   const SweepTimeloop & timeloop_;
+   uint_t interval_;
+};
+
+struct TimingTreeLogger
+{
+   TimingTreeLogger( WcTimingTree & timingTree, const SweepTimeloop & timeloop, const uint_t interval )
+         : timingTree_( timingTree ), timeloop_( timeloop ), interval_( interval )
+   {
+   }
+
+   void operator()()
+   {
+      if( interval_ > uint_t(0) && timeloop_.getCurrentTimeStep() % interval_ == uint_t(0) )
+      {
+         timingTree_.synchronize();
+         auto reducedTimingTree = timingTree_.getReduced();
+         WALBERLA_LOG_INFO_ON_ROOT( reducedTimingTree );
+      }
+   }
+
+private:
+   WcTimingTree & timingTree_;
+   const SweepTimeloop & timeloop_;
+   uint_t interval_;
+};
+
+/////////////////////
+// BLOCK STRUCTURE //
+/////////////////////
+
+static void refinementSelection( SetupBlockForest& forest, uint_t levels, const AABB & refinementBox )
+{
+   auto dx = real_t(1); // dx on finest level
+   for (auto &block : forest) {
+      uint_t blockLevel = block.getLevel();
+      uint_t levelScalingFactor = ( uint_t(1) << (levels - uint_t(1) - blockLevel) );
+      real_t dxOnLevel = dx * real_c(levelScalingFactor);
+      AABB blockAABB = block.getAABB();
+
+      // extend block AABB by ghostlayers
+      AABB extendedBlockAABB = blockAABB.getExtended( dxOnLevel * real_c(FieldGhostLayers) );
+
+      if( extendedBlockAABB.intersects( refinementBox ) )
+         if( blockLevel < ( levels - uint_t(1) ) )
+            block.setMarker( true );
+   }
+}
+
+static void workloadAndMemoryAssignment( SetupBlockForest& forest )
+{
+   for (auto &block : forest) {
+      block.setWorkload( numeric_cast< workload_t >( uint_t(1) << block.getLevel() ) );
+      block.setMemory( numeric_cast< memory_t >(1) );
+   }
+}
+
+static shared_ptr< StructuredBlockForest > createBlockStructure( const AABB & domainAABB, Vector3<uint_t> blockSizeInCells,
+                                                                 uint_t numberOfLevels, const AABB & refinementBox,
+                                                                 bool useBox, const std::string & loadDistributionStrategy,
+                                                                 bool keepGlobalBlockInformation = false )
+{
+   SetupBlockForest sforest;
+
+   Vector3<uint_t> numberOfFineBlocksPerDirection( uint_c(domainAABB.size(0)) / blockSizeInCells[0],
+                                                   uint_c(domainAABB.size(1)) / blockSizeInCells[1],
+                                                   uint_c(domainAABB.size(2)) / blockSizeInCells[2] );
+
+   for(uint_t i = 0; i < 3; ++i )
+   {
+      WALBERLA_CHECK_EQUAL( numberOfFineBlocksPerDirection[i] * blockSizeInCells[i], uint_c(domainAABB.size(i)),
+                            "Domain can not be decomposed in direction " << i << " into fine blocks of size " << blockSizeInCells[i] );
+   }
+
+   uint_t levelScalingFactor = ( uint_t(1) << ( numberOfLevels - uint_t(1) ) );
+   Vector3<uint_t> numberOfCoarseBlocksPerDirection( numberOfFineBlocksPerDirection / levelScalingFactor );
+
+   for(uint_t i = 0; i < 3; ++i )
+   {
+      WALBERLA_CHECK_EQUAL(numberOfCoarseBlocksPerDirection[i] * levelScalingFactor, numberOfFineBlocksPerDirection[i],
+                            "Domain can not be refined in direction " << i << " according to the specified number of levels!" );
+   }
+
+   WALBERLA_LOG_INFO_ON_ROOT(" - refinement box: " << refinementBox);
+
+   MPIManager::instance()->useWorldComm();
+
+   sforest.addRefinementSelectionFunction( std::bind( refinementSelection, std::placeholders::_1, numberOfLevels, refinementBox ) );
+   sforest.addWorkloadMemorySUIDAssignmentFunction( workloadAndMemoryAssignment );
+
+   Vector3<bool> periodicity( true, true, false);
+   if( useBox )
+   {
+      periodicity[0] = false;
+      periodicity[1] = false;
+   }
+   sforest.init( domainAABB,
+                 numberOfCoarseBlocksPerDirection[0], numberOfCoarseBlocksPerDirection[1], numberOfCoarseBlocksPerDirection[2],
+                 periodicity[0], periodicity[1], periodicity[2]);
+
+   // calculate process distribution
+   const memory_t memoryLimit = math::Limits< memory_t >::inf();
+
+   if( loadDistributionStrategy == "Hilbert" )
+   {
+      bool useHilbert = true;
+      sforest.balanceLoad( blockforest::StaticLevelwiseCurveBalance(useHilbert), uint_c( MPIManager::instance()->numProcesses() ), real_t(0), memoryLimit, true );
+   } else if ( loadDistributionStrategy == "Morton" )
+   {
+      bool useHilbert = false;
+      sforest.balanceLoad( blockforest::StaticLevelwiseCurveBalance(useHilbert), uint_c( MPIManager::instance()->numProcesses() ), real_t(0), memoryLimit, true );
+   } else if ( loadDistributionStrategy == "ParMetis" )
+   {
+      blockforest::StaticLevelwiseParMetis::Algorithm algorithm = blockforest::StaticLevelwiseParMetis::Algorithm::PARMETIS_PART_GEOM_KWAY;
+      blockforest::StaticLevelwiseParMetis staticParMetis(algorithm);
+      sforest.balanceLoad( staticParMetis, uint_c( MPIManager::instance()->numProcesses() ), real_t(0), memoryLimit, true );
+   } else if (loadDistributionStrategy == "Diffusive" )
+   {
+      // also use Hilbert curve here
+      bool useHilbert = true;
+      sforest.balanceLoad( blockforest::StaticLevelwiseCurveBalance(useHilbert), uint_c( MPIManager::instance()->numProcesses() ), real_t(0), memoryLimit, true );
+   } else
+   {
+      WALBERLA_ABORT("Load distribution strategy \"" << loadDistributionStrategy << "\t not implemented! - Aborting" );
+   }
+
+   WALBERLA_LOG_INFO_ON_ROOT( sforest );
+
+
+   // create StructuredBlockForest (encapsulates a newly created BlockForest)
+   shared_ptr< StructuredBlockForest > sbf =
+         make_shared< StructuredBlockForest >( make_shared< BlockForest >( uint_c( MPIManager::instance()->rank() ), sforest, keepGlobalBlockInformation ),
+                                               blockSizeInCells[0], blockSizeInCells[1], blockSizeInCells[2]);
+   sbf->createCellBoundingBoxes();
+
+   return sbf;
+}
+
+/////////////////////////////////////
+// BOUNDARY HANDLING CUSTOMIZATION //
+/////////////////////////////////////
+class MyBoundaryHandling : public blockforest::AlwaysInitializeBlockDataHandling< BoundaryHandling_T >
+{
+public:
+   MyBoundaryHandling( const weak_ptr< StructuredBlockStorage > & blocks,
+                       const BlockDataID & flagFieldID, const BlockDataID & pdfFieldID, const BlockDataID & bodyFieldID ) :
+         blocks_( blocks ), flagFieldID_( flagFieldID ), pdfFieldID_( pdfFieldID ), bodyFieldID_ ( bodyFieldID )
+   {}
+
+   BoundaryHandling_T * initialize( IBlock * const block ) override;
+
+private:
+
+   weak_ptr< StructuredBlockStorage > blocks_;
+
+   const BlockDataID flagFieldID_;
+   const BlockDataID pdfFieldID_;
+   const BlockDataID bodyFieldID_;
+
+
+}; // class MyBoundaryHandling
+
+BoundaryHandling_T * MyBoundaryHandling::initialize( IBlock * const block )
+{
+   WALBERLA_ASSERT_NOT_NULLPTR( block );
+
+   auto * flagField = block->getData< FlagField_T >( flagFieldID_ );
+   auto *  pdfField = block->getData< PdfField_T > ( pdfFieldID_ );
+   auto * bodyField = block->getData< BodyField_T >( bodyFieldID_ );
+
+   const auto fluid = flagField->flagExists( Fluid_Flag ) ? flagField->getFlag( Fluid_Flag ) : flagField->registerFlag( Fluid_Flag );
+
+   auto blocksPtr = blocks_.lock();
+   WALBERLA_CHECK_NOT_NULLPTR( blocksPtr );
+
+   BoundaryHandling_T * handling = new BoundaryHandling_T( "moving obstacle boundary handling", flagField, fluid,
+                                                           boost::tuples::make_tuple( NoSlip_T( "NoSlip", NoSlip_Flag, pdfField ),
+                                                                                      MO_T( "MO", MO_Flag, pdfField, flagField, bodyField, fluid, *blocksPtr, *block ) ),
+                                                           BoundaryHandling_T::Mode::ENTIRE_FIELD_TRAVERSAL);
+
+   handling->fillWithDomain( FieldGhostLayers );
+
+   return handling;
+}
+
+
+//*******************************************************************************************************************
+
+
+//*******************************************************************************************************************
+/*!\brief Evaluating the position and velocity of the sediments
+ *
+ */
+//*******************************************************************************************************************
+class PropertyLogger
+{
+public:
+   PropertyLogger( SweepTimeloop* timeloop, const shared_ptr< StructuredBlockStorage > & blocks,
+                   const BlockDataID & bodyStorageID, const std::string & fileName, bool fileIO) :
+      timeloop_( timeloop ), blocks_( blocks ), bodyStorageID_( bodyStorageID ), fileName_( fileName ), fileIO_(fileIO),
+      meanPos_( real_t(0) ), meanVel_( real_t(0) ), maxVel_( real_t(0) )
+   {
+      if ( fileIO_ )
+      {
+         WALBERLA_ROOT_SECTION()
+         {
+            std::ofstream file;
+            file.open( fileName_.c_str() );
+            file << "#\t pos\t vel\t maxVel\n";
+            file.close();
+         }
+      }
+   }
+
+   void operator()()
+   {
+      const uint_t timestep (timeloop_->getCurrentTimeStep() );
+
+      auto numSediments = uint_t(0);
+      auto meanPos = real_t(0);
+      auto meanVel = real_t(0);
+      auto maxVel = real_t(0);
+
+      for( auto blockIt = blocks_->begin(); blockIt != blocks_->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::LocalBodyIterator::begin( *blockIt, bodyStorageID_); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt )
+         {
+            meanPos += bodyIt->getPosition()[2];
+            meanVel += bodyIt->getLinearVel()[2];
+            maxVel = std::max(maxVel, std::fabs(bodyIt->getLinearVel()[2]));
+            ++numSediments;
+         }
+      }
+
+      WALBERLA_MPI_SECTION()
+      {
+         mpi::allReduceInplace( numSediments, mpi::SUM );
+         mpi::allReduceInplace( meanPos, mpi::SUM );
+         mpi::allReduceInplace( meanVel, mpi::SUM );
+         mpi::allReduceInplace( maxVel, mpi::MAX );
+      }
+
+      meanPos /= real_c(numSediments);
+      meanVel /= real_c(numSediments);
+
+      meanPos_ = meanPos;
+      meanVel_ = meanVel;
+      maxVel_ = maxVel;
+
+      if( fileIO_ )
+         writeToFile( timestep );
+   }
+
+   real_t getMeanPosition() const
+   {
+      return meanPos_;
+   }
+
+   real_t getMaxVelocity() const
+   {
+      return maxVel_;
+   }
+
+   real_t getMeanVelocity() const
+   {
+      return meanVel_;
+   }
+
+
+private:
+   void writeToFile( uint_t timestep )
+   {
+      WALBERLA_ROOT_SECTION()
+      {
+         std::ofstream file;
+         file.open( fileName_.c_str(), std::ofstream::app );
+
+         file << timestep << "\t" << meanPos_ << "\t" << meanVel_ << "\t" << maxVel_ << "\n";
+         file.close();
+      }
+   }
+
+   SweepTimeloop* timeloop_;
+   shared_ptr< StructuredBlockStorage > blocks_;
+   const BlockDataID bodyStorageID_;
+   std::string fileName_;
+   bool fileIO_;
+
+   real_t meanPos_;
+   real_t meanVel_;
+   real_t maxVel_;
+};
+
+void clearBoundaryHandling( BlockForest & forest, const BlockDataID & boundaryHandlingID )
+{
+   for( auto blockIt = forest.begin(); blockIt != forest.end(); ++blockIt )
+   {
+      auto * boundaryHandling = blockIt->getData<BoundaryHandling_T>(boundaryHandlingID);
+      boundaryHandling->clear( FieldGhostLayers );
+   }
+}
+
+void clearBodyField( BlockForest & forest, const BlockDataID & bodyFieldID )
+{
+   for( auto blockIt = forest.begin(); blockIt != forest.end(); ++blockIt )
+   {
+      auto * bodyField = blockIt->getData<BodyField_T>(bodyFieldID);
+      bodyField->setWithGhostLayer( NULL );
+   }
+}
+
+void recreateBoundaryHandling( BlockForest & forest, const BlockDataID & boundaryHandlingID )
+{
+   for( auto blockIt = forest.begin(); blockIt != forest.end(); ++blockIt )
+   {
+      auto * boundaryHandling = blockIt->getData<BoundaryHandling_T>(boundaryHandlingID);
+      boundaryHandling->fillWithDomain( FieldGhostLayers );
+   }
+}
+
+
+class TimingEvaluator
+{
+public:
+   TimingEvaluator( WcTimingPool & levelwiseTimingPool, WcTimingTree & peTimingTree, uint_t numberOfLevels)
+   : levelwiseTimingPool_( levelwiseTimingPool ), peTimingTree_( peTimingTree ), numberOfLevels_( numberOfLevels )
+   {}
+
+   real_t getTimings(const std::vector<std::string> & timerNames, uint_t level )
+   {
+
+      auto timing = real_t(0);
+      for (const auto &timerName : timerNames)
+      {
+         std::string timerNameLvlWise = timerName;// +
+         // put level between timer string and possible suffix
+         auto suffixBegin = timerNameLvlWise.find_first_of('[');
+         if( suffixBegin != std::string::npos)
+         {
+            // suffix detected
+            auto suffixEnd = timerNameLvlWise.find_last_of(']');
+            if( suffixEnd != std::string::npos)
+            {
+               auto timerString = timerNameLvlWise.substr(0,suffixBegin);
+               auto suffixString = timerNameLvlWise.substr(suffixBegin,suffixEnd-suffixBegin+1);
+
+               timerNameLvlWise = timerString + "(" + std::to_string(level) + ") " + suffixString; // NOLINT
+
+            }
+            else
+            {
+               WALBERLA_ABORT("Invalid timer string");
+            }
+         }
+         else
+         {
+            timerNameLvlWise += " (" + std::to_string(level) + ")";;
+         }
+
+         if( levelwiseTimingPool_.timerExists(timerNameLvlWise))
+            timing += levelwiseTimingPool_[timerNameLvlWise].total();
+
+         if( level == numberOfLevels_- 1)
+         {
+            if( peTimingTree_.timerExists(timerName))
+               timing += peTimingTree_[timerName].total();
+         }
+      }
+
+      return timing;
+   }
+
+
+private:
+
+   WcTimingPool & levelwiseTimingPool_;
+   WcTimingTree & peTimingTree_;
+   uint_t numberOfLevels_;
+};
+
+
+real_t weightEvaluation(BlockForest & forest,
+                        const shared_ptr<pe_coupling::InfoCollection>& couplingInfoCollection,
+                        const shared_ptr<pe::InfoCollection> & peInfoCollection,
+                        real_t peBlockBaseWeight,
+                        const std::string & loadEvaluationStrategy,
+                        uint_t level,
+                        bool useEllipsoids )
+{
+   auto weight = real_t(0);
+   for( auto blockIt = forest.begin(); blockIt != forest.end(); ++blockIt )
+   {
+      if( forest.getLevel(*blockIt) != level) continue;
+
+
+      auto * block = static_cast<blockforest::Block*> (&(*blockIt));
+      const auto &blockID = block->getId();
+
+      if(loadEvaluationStrategy == "LBM")
+      {
+         auto infoIt = couplingInfoCollection->find( blockID );
+         weight += pe_coupling::amr::defaultWeightEvaluationFunction(infoIt->second);
+
+      }else if(loadEvaluationStrategy == "PE")
+      {
+         auto infoIt = peInfoCollection->find( blockID );
+         weight += real_c(infoIt->second.numberOfLocalBodies) + peBlockBaseWeight;
+      }else if(loadEvaluationStrategy == "Fit" || loadEvaluationStrategy == "FitMulti")
+      {
+         auto infoIt = couplingInfoCollection->find( blockID );
+         if( useEllipsoids )
+         {
+            weight += fittedTotalWeightEvaluationFunctionEllipsoids(infoIt->second);
+         }
+         else
+         {
+            weight += fittedTotalWeightEvaluationFunctionSpheres(infoIt->second);
+         }
+      }else
+      {
+         WALBERLA_ABORT("Load balancing strategy not defined");
+      }
+   }
+   return weight;
+}
+
+
+uint_t evaluateEdgeCut(BlockForest & forest)
+{
+
+   //note: only works for edges in uniform grids
+
+   auto edgecut = uint_t(0); // = edge weights between processes
+
+   for( auto blockIt = forest.begin(); blockIt != forest.end(); ++blockIt )
+   {
+      auto * block = static_cast<blockforest::Block*> (&(*blockIt));
+
+      real_t blockVolume = block->getAABB().volume();
+      real_t approximateEdgeLength = std::cbrt( blockVolume );
+
+      uint_t faceNeighborWeight = uint_c(approximateEdgeLength * approximateEdgeLength ); //common face
+      uint_t edgeNeighborWeight = uint_c(approximateEdgeLength); //common edge
+      uint_t cornerNeighborWeight = uint_c( 1 ); //common corner
+
+
+      for( const uint_t idx : blockforest::getFaceNeighborhoodSectionIndices() )
+      {
+         for (auto nb = uint_t(0); nb < block->getNeighborhoodSectionSize(idx); ++nb)
+         {
+            if( block->neighborExistsRemotely(idx,nb) ) edgecut += faceNeighborWeight;
+         }
+      }
+
+      for( const uint_t idx : blockforest::getEdgeNeighborhoodSectionIndices() )
+      {
+         for (auto nb = uint_t(0); nb < block->getNeighborhoodSectionSize(idx); ++nb)
+         {
+            if( block->neighborExistsRemotely(idx,nb) ) edgecut += edgeNeighborWeight;
+         }
+      }
+
+      for( const uint_t idx : blockforest::getCornerNeighborhoodSectionIndices() )
+      {
+         for (auto nb = uint_t(0); nb < block->getNeighborhoodSectionSize(idx); ++nb)
+         {
+            if( block->neighborExistsRemotely(idx,nb) ) edgecut += cornerNeighborWeight;
+         }
+      }
+   }
+   return edgecut;
+}
+
+
+void evaluateTotalSimulationTimePassed(WcTimingPool & timeloopTimingPool, real_t & totalSimTime, real_t & totalLBTime)
+{
+   shared_ptr< WcTimingPool> reduced = timeloopTimingPool.getReduced(WcTimingPool::REDUCE_TOTAL, 0);
+
+   std::string simulationString("LBM refinement time step");
+   auto totalTime = real_t(0);
+   WALBERLA_ROOT_SECTION(){
+      totalTime = (*reduced)[simulationString].total();
+   }
+   totalSimTime = totalTime;
+
+   std::string lbString("refinement checking");
+   auto lbTime = real_t(0);
+   WALBERLA_ROOT_SECTION(){
+      lbTime = (*reduced)[lbString].total();
+   }
+   totalLBTime = lbTime;
+
+}
+
+void createSedimentLayer(uint_t numberOfSediments, const AABB & generationDomain, real_t diameter, real_t heightBorder,
+                         pe::MaterialID peMaterial,
+                         pe::cr::HCSITS & cr, const std::function<void(void)> & syncCall,
+                         const shared_ptr< StructuredBlockForest > & blocks,
+                         const shared_ptr<pe::BodyStorage> & globalBodyStorage, BlockDataID bodyStorageID,
+                         real_t gravitationalAcceleration, bool useEllipsoids, bool shortRun)
+{
+   WALBERLA_LOG_INFO_ON_ROOT("Starting creation of sediments");
+
+   auto xParticle = real_t(0);
+   auto yParticle = real_t(0);
+   auto zParticle = real_t(0);
+
+   for( uint_t nSed = 0; nSed < numberOfSediments; ++nSed )
+   {
+
+      WALBERLA_ROOT_SECTION()
+      {
+         xParticle = math::realRandom<real_t>(generationDomain.xMin(), generationDomain.xMax());
+         yParticle = math::realRandom<real_t>(generationDomain.yMin(), generationDomain.yMax());
+         zParticle = math::realRandom<real_t>(generationDomain.zMin(), generationDomain.zMax());
+      }
+
+      WALBERLA_MPI_SECTION()
+      {
+         mpi::broadcastObject( xParticle );
+         mpi::broadcastObject( yParticle );
+         mpi::broadcastObject( zParticle );
+      }
+
+      if( useEllipsoids )
+      {
+         // prolate ellipsoids
+         auto axisFactor = real_t(1.5);
+         real_t axisFactor2 = std::sqrt(real_t(1)/axisFactor);
+         real_t radius = diameter * real_t(0.5);
+         pe::createEllipsoid( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, Vector3<real_t>( xParticle, yParticle, zParticle ), Vector3<real_t>(axisFactor*radius, axisFactor2*radius, axisFactor2*radius), peMaterial );
+      }
+      else
+      {
+         pe::createSphere( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, Vector3<real_t>( xParticle, yParticle, zParticle ), diameter * real_t(0.5), peMaterial );
+      }
+
+   }
+
+   syncCall();
+
+   // carry out 100 simulations to resolve all overlaps
+   for (auto pet = uint_t(1); pet <= uint_t(100); ++pet)
+   {
+      cr.timestep( real_t(1) );
+      syncCall();
+
+      // reset all velocities to zero
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            bodyIt->setLinearVel(Vector3<real_t>(real_t(0)));
+            bodyIt->setAngularVel(Vector3<real_t>(real_t(0)));
+         }
+      }
+   }
+
+
+   const auto maxInitialPeSteps = (shortRun) ? uint_t(10) : uint_t(200000);
+   const auto dt_PE_init = real_t(1);
+
+   real_t gravityGeneration = real_t(0.1) * gravitationalAcceleration;
+   cr.setGlobalLinearAcceleration(Vector3<real_t>(real_t(0), real_t(0), gravityGeneration));
+
+   auto oldMinBodyPosition = real_t(0);
+   real_t convergenceLimit = std::fabs(gravityGeneration);
+   for (auto pet = uint_t(1); pet <= maxInitialPeSteps; ++pet)
+   {
+      cr.timestep( dt_PE_init );
+      syncCall();
+
+      real_t minBodyPosition = generationDomain.zMax();
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::LocalBodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt )
+         {
+            minBodyPosition = std::min(bodyIt->getPosition()[2], minBodyPosition);
+         }
+      }
+
+      WALBERLA_MPI_SECTION()
+      {
+         mpi::allReduceInplace(minBodyPosition, mpi::MIN);
+      }
+
+      if( minBodyPosition > heightBorder ) break;
+
+      if( pet % 500 == 0)
+      {
+         if( std::fabs(minBodyPosition - oldMinBodyPosition) / minBodyPosition  < convergenceLimit ) break;
+         oldMinBodyPosition = minBodyPosition;
+      }
+
+      WALBERLA_ROOT_SECTION()
+      {
+         if( pet % 100 == 0)
+         {
+            WALBERLA_LOG_INFO("[" << pet << "] Min position of all bodies = " << minBodyPosition << " with goal height " << heightBorder);
+         }
+      }
+
+   }
+
+   // revert gravitational acceleration to 'real' direction
+   cr.setGlobalLinearAcceleration(Vector3<real_t>(real_t(0), real_t(0), -gravityGeneration));
+
+   // carry out a few time steps to relax the system towards the real condition
+   const auto relaxationTimeSteps = uint_t(std::sqrt(real_t(2)/std::fabs(gravitationalAcceleration)));
+   WALBERLA_LOG_INFO_ON_ROOT("Carrying out " << relaxationTimeSteps << " more time steps with correct gravity");
+   for (auto pet = uint_t(1); pet <= relaxationTimeSteps; ++pet)
+   {
+      cr.timestep(dt_PE_init);
+      syncCall();
+   }
+
+   WALBERLA_LOG_INFO_ON_ROOT("Sediment layer creation done!");
+
+   // reset all velocities to zero
+   Vector3<real_t> initialBodyVelocity(real_t(0));
+   WALBERLA_LOG_INFO_ON_ROOT("Setting initial velocity " << initialBodyVelocity << " of all bodies");
+   for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+   {
+      for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+      {
+         bodyIt->setLinearVel(initialBodyVelocity);
+         bodyIt->setAngularVel(Vector3<real_t>(real_t(0)));
+      }
+   }
+
+   cr.setGlobalLinearAcceleration(Vector3<real_t>(real_t(0)));
+}
+
+
+//*******************************************************************************************************************
+/*!\brief Simulation of settling particles inside a rectangular column filled with viscous fluid
+ *
+ * This application is used in the paper
+ *  Rettinger, Ruede - "Dynamic Load Balancing Techniques for Particulate Flow Simulations", submitted to Computation
+ * in Section 4 to apply the load estimator and to evaluate different load distribution strategies.
+ *
+ * It, however, features several different command line arguments that can be used to tweak the simulation.
+ * The setup can be horizontally period, a box or a hopper geometry (configurable, as in the paper).
+ * The size, resolution and used blocks for the domain partitioning can be changed.
+ * It even features adaptive mesh refinement, with different refinement criteria:
+ *  - particle based (always on, also for global bodies like bounding planes)
+ *  - optionally: vorticity- or gradient-based (with lower and upper limits)
+ * Since the paper, however, uses a uniform grid, many evaluation functionalities might not work properly for this case.
+ * Initially, all particles are pushed upwards to obtain a dense packing at the top plane.
+ *
+ * Most importantly, the load balancing can be modified:
+ *  - load estimation strategies:
+ *    - pure LBM = number of cells per block = constant workload per block
+ *    - pure PE = number of local particles + baseweight
+ *    - coupling based load estimator = use fitted function from Sec. 3 of paper
+ *  - load distribution strategies:
+ *    - space-filling curves: Hilbert and Morton
+ *    - ParMETIS (and several algorithms and parameters, also multiple constraints possible)
+ *    - diffusive (and options)
+ *  - load balancing (/refinement check ) frequency
+ */
+//*******************************************************************************************************************
+int main( int argc, char **argv )
+{
+   debug::enterTestMode();
+
+   mpi::Environment env( argc, argv );
+
+   ///////////////////
+   // Customization //
+   ///////////////////
+
+   // simulation control
+   bool shortRun = false;
+   bool funcTest = false;
+   bool fileIO = true;
+   bool logging = false; // logging of physical components
+   uint_t vtkWriteFreqDD = 0; //domain decomposition
+   uint_t vtkWriteFreqBo = 0; //bodies
+   uint_t vtkWriteFreqFl = 0; //fluid
+   uint_t vtkWriteFreq = 0; //general
+   std::string baseFolder = "vtk_out_AMRSedimentSettling"; // folder for vtk and file output
+
+   // physical setup
+   auto GalileoNumber = real_t(50);
+   auto densityRatio = real_t(1.5);
+   auto diameter = real_t(15);
+   auto solidVolumeFraction = real_t(0.1);
+   auto blockSize = uint_t(32);
+   auto XBlocks = uint_t(12);
+   auto YBlocks = uint_t(12);
+   auto ZBlocks = uint_t(16);
+   bool useBox = false;
+   bool useHopper = false;
+   bool useEllipsoids = false;
+   auto hopperRelHeight = real_t(0.5); // for hopper setup
+   auto hopperRelOpening = real_t(0.3); // for hopper setup
+
+   auto timestepsOnFinestLevel = uint_t(80000);
+
+   //numerical parameters
+   bool averageForceTorqueOverTwoTimSteps = true;
+   auto numberOfLevels = uint_t(1);
+   auto refinementCheckFrequency = uint_t(100);
+   auto numPeSubCycles = uint_t(10);
+
+   // refinement criteria
+   auto lowerFluidRefinementLimit = real_t(0);
+   auto upperFluidRefinementLimit = std::numeric_limits<real_t>::infinity();
+   bool useVorticityCriterion = false;
+   bool useGradientCriterion = false;
+
+   // load balancing
+   std::string loadEvaluationStrategy = "LBM"; //LBM, PE, Fit
+   std::string loadDistributionStrategy = "Hilbert"; //Morton, Hilbert, ParMetis, Diffusive
+
+   auto parMetis_ipc2redist = real_t(1000);
+   auto parMetisTolerance = real_t(-1);
+   std::string parMetisAlgorithmString = "ADAPTIVE_REPART";
+
+   auto diffusionFlowIterations = uint_t(15);
+   auto diffusionMaxIterations = uint_t(20);
+
+
+   for( int i = 1; i < argc; ++i )
+   {
+      if( std::strcmp( argv[i], "--shortRun" )                 == 0 ) { shortRun = true; continue; }
+      if( std::strcmp( argv[i], "--funcTest" )                 == 0 ) { funcTest = true; continue; }
+      if( std::strcmp( argv[i], "--fileIO" )                   == 0 ) { fileIO = true; continue; }
+      if( std::strcmp( argv[i], "--logging" )                  == 0 ) { logging = true; continue; }
+      if( std::strcmp( argv[i], "--vtkWriteFreqDD" )           == 0 ) { vtkWriteFreqDD = uint_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--vtkWriteFreqBo" )           == 0 ) { vtkWriteFreqBo = uint_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--vtkWriteFreqFl" )           == 0 ) { vtkWriteFreqFl = uint_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--vtkWriteFreq" )             == 0 ) { vtkWriteFreq = uint_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--baseFolder" )               == 0 ) { baseFolder = argv[++i]; continue; }
+      if( std::strcmp( argv[i], "--densityRatio" )             == 0 ) { densityRatio = std::atof( argv[++i] ); continue; }
+      if( std::strcmp( argv[i], "--Ga" )                       == 0 ) { GalileoNumber = std::atof( argv[++i] ); continue; }
+      if( std::strcmp( argv[i], "--diameter" )                 == 0 ) { diameter = std::atof( argv[++i] ); continue; }
+      if( std::strcmp( argv[i], "--blockSize" )                == 0 ) { blockSize = uint_c(std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--XBlocks" )                  == 0 ) { XBlocks = uint_c(std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--YBlocks" )                  == 0 ) { YBlocks = uint_c(std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--ZBlocks" )                  == 0 ) { ZBlocks = uint_c(std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--useBox" )                   == 0 ) { useBox = true; continue; }
+      if( std::strcmp( argv[i], "--useHopper" )                == 0 ) { useHopper = true; continue; }
+      if( std::strcmp( argv[i], "--hopperHeight" )             == 0 ) { hopperRelHeight = std::atof( argv[++i] ); continue; }
+      if( std::strcmp( argv[i], "--hopperOpening" )            == 0 ) { hopperRelOpening = std::atof( argv[++i] ); continue; }
+      if( std::strcmp( argv[i], "--timesteps" )                == 0 ) { timestepsOnFinestLevel = uint_c(std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--noForceAveraging" )         == 0 ) { averageForceTorqueOverTwoTimSteps = false; continue; }
+      if( std::strcmp( argv[i], "--numPeSubCycles" )           == 0 ) { numPeSubCycles = uint_c(std::atof( argv[++i] )); continue; }
+      if( std::strcmp( argv[i], "--numLevels" )                == 0 ) { numberOfLevels = uint_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--refinementCheckFrequency" ) == 0 ) { refinementCheckFrequency = uint_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--lowerLimit" )               == 0 ) { lowerFluidRefinementLimit = std::atof( argv[++i] ); continue; }
+      if( std::strcmp( argv[i], "--upperLimit" )               == 0 ) { upperFluidRefinementLimit = std::atof( argv[++i] ); continue; }
+      if( std::strcmp( argv[i], "--useVorticityCriterion" )    == 0 ) { useVorticityCriterion = true; continue; }
+      if( std::strcmp( argv[i], "--useGradientCriterion" )     == 0 ) { useGradientCriterion = true; continue; }
+      if( std::strcmp( argv[i], "--loadEvaluationStrategy" )   == 0 ) { loadEvaluationStrategy = argv[++i]; continue; }
+      if( std::strcmp( argv[i], "--loadDistributionStrategy" ) == 0 ) { loadDistributionStrategy = argv[++i]; continue; }
+      if( std::strcmp( argv[i], "--ipc2redist" )               == 0 ) { parMetis_ipc2redist = std::atof( argv[++i] ); continue; }
+      if( std::strcmp( argv[i], "--parMetisTolerance" )        == 0 ) { parMetisTolerance = std::atof( argv[++i] ); continue; }
+      if( std::strcmp( argv[i], "--parMetisAlgorithm" )        == 0 ) { parMetisAlgorithmString = argv[++i]; continue; }
+      if( std::strcmp( argv[i], "--diffusionFlowIterations" )  == 0 ) { diffusionFlowIterations = uint_c(std::atof(argv[++i])); continue; }
+      if( std::strcmp( argv[i], "--diffusionMaxIterations" )   == 0 ) { diffusionMaxIterations = uint_c(std::atof(argv[++i])); continue; }
+      if( std::strcmp( argv[i], "--useEllipsoids" )            == 0 ) { useEllipsoids = true; continue; }
+      WALBERLA_ABORT("Unrecognized command line argument found: " << argv[i]);
+   }
+
+   if( funcTest )
+   {
+      walberla::logging::Logging::instance()->setLogLevel(logging::Logging::LogLevel::WARNING);
+   }
+
+   if( fileIO || logging )
+   {
+      WALBERLA_ROOT_SECTION(){
+         // create base directory if it does not yet exist
+         filesystem::path tpath( baseFolder );
+         if( !filesystem::exists( tpath ) )
+            filesystem::create_directory( tpath );
+      }
+   }
+
+   if( useVorticityCriterion && useGradientCriterion )
+   {
+      WALBERLA_ABORT("Use either vorticity or gradient criterion for refinement!");
+   }
+
+   if( loadEvaluationStrategy != "LBM" && loadEvaluationStrategy != "PE" && loadEvaluationStrategy != "Fit" && loadEvaluationStrategy != "FitMulti")
+   {
+      WALBERLA_ABORT("Invalid load evaluation strategy: " << loadEvaluationStrategy);
+   }
+
+   if( vtkWriteFreq != 0 )
+   {
+      vtkWriteFreqDD = vtkWriteFreq;
+      vtkWriteFreqBo = vtkWriteFreq;
+      vtkWriteFreqFl = vtkWriteFreq;
+   }
+
+   if( diameter > real_c(blockSize) )
+   {
+      WALBERLA_LOG_WARNING("PE Body Synchronization might not work since bodies are large compared to block size!");
+   }
+
+   if( useHopper )
+   {
+      WALBERLA_CHECK(hopperRelHeight >= real_t(0) && hopperRelHeight <= real_t(1), "Invalid relative hopper height of " << hopperRelHeight);
+      WALBERLA_CHECK(hopperRelOpening >= real_t(0) && hopperRelOpening <= real_t(1), "Invalid relative hopper opening of " << hopperRelOpening);
+   }
+
+
+   //////////////////////////
+   // NUMERICAL PARAMETERS //
+   //////////////////////////
+
+   const Vector3<uint_t> domainSize( XBlocks * blockSize, YBlocks * blockSize, ZBlocks * blockSize );
+   const auto domainVolume = real_t(domainSize[0] * domainSize[1] * domainSize[2]);
+   const real_t sphereVolume = math::M_PI / real_t(6) * diameter * diameter * diameter;
+   const uint_t numberOfSediments = uint_c(std::ceil(solidVolumeFraction * domainVolume / sphereVolume));
+
+   real_t expectedSedimentVolumeFraction = (useBox||useHopper) ? real_t(0.45) : real_t(0.52);
+   const real_t expectedSedimentedVolume = real_t(1)/expectedSedimentVolumeFraction * real_c(numberOfSediments) * sphereVolume;
+   const real_t expectedSedimentedHeight = std::max(diameter, expectedSedimentedVolume / real_c(domainSize[0] * domainSize[1]));
+
+   const auto uRef = real_t(0.02);
+   const real_t xRef = diameter;
+   const real_t tRef = xRef / uRef;
+
+   const real_t gravitationalAcceleration = uRef * uRef / ( (densityRatio-real_t(1)) * diameter );
+   const real_t viscosity = uRef * diameter / GalileoNumber;
+   const real_t omega = lbm::collision_model::omegaFromViscosity(viscosity);
+   const real_t tau = real_t(1) / omega;
+
+   const auto loggingDisplayFrequency = uint_t(100);
+
+   const auto dx = real_t(1);
+   const real_t overlap = real_t( 1.5 ) * dx;
+
+   if( useVorticityCriterion && floatIsEqual(lowerFluidRefinementLimit, real_t(0)) && std::isinf(upperFluidRefinementLimit) )
+   {
+      // use computed criterion instead of user input
+      lowerFluidRefinementLimit = real_t(0.05) * uRef;
+      upperFluidRefinementLimit = real_t(0.1) * uRef;
+   }
+
+   const uint_t finestLevel = numberOfLevels - uint_t(1);
+   std::stringstream omega_msg;
+   for( uint_t i = 0; i < numberOfLevels; ++i )
+   {
+      real_t omegaLvl = lbm::collision_model::levelDependentRelaxationParameter( i, omega, finestLevel );
+      omega_msg << omegaLvl << " ( on level " << i << ", tau = " << real_t(1)/omega << " ), ";
+   }
+
+   const uint_t levelScalingFactor = ( uint_t(1) << finestLevel );
+   const uint_t lbmTimeStepsPerTimeLoopIteration = levelScalingFactor;
+
+   const uint_t timesteps = funcTest ? 1 : ( shortRun ? uint_t(100) : uint_t( timestepsOnFinestLevel / lbmTimeStepsPerTimeLoopIteration ) );
+
+   WALBERLA_LOG_INFO_ON_ROOT("Setup (in simulation, i.e. lattice, units):");
+   WALBERLA_LOG_INFO_ON_ROOT(" - domain size = " << domainSize);
+   WALBERLA_LOG_INFO_ON_ROOT(" - sediment diameter = " << diameter );
+   WALBERLA_LOG_INFO_ON_ROOT(" - Galileo number = " << GalileoNumber );
+   WALBERLA_LOG_INFO_ON_ROOT(" - number of sediments: " << numberOfSediments);
+   WALBERLA_LOG_INFO_ON_ROOT(" - densityRatio = " << densityRatio );
+   WALBERLA_LOG_INFO_ON_ROOT(" - fluid: relaxation time (tau) = " << tau << ", kin. visc = " << viscosity );
+   WALBERLA_LOG_INFO_ON_ROOT(" - gravitational acceleration = " << gravitationalAcceleration );
+   WALBERLA_LOG_INFO_ON_ROOT(" - reference values: x = " << xRef << ", t = " << tRef << ", vel = " << uRef);
+   WALBERLA_LOG_INFO_ON_ROOT(" - omega: " << omega_msg.str());
+   WALBERLA_LOG_INFO_ON_ROOT(" - number of levels: " << numberOfLevels);
+   WALBERLA_LOG_INFO_ON_ROOT(" - number of pe sub cycles: " << numPeSubCycles);
+   if( useVorticityCriterion )
+   {
+      WALBERLA_LOG_INFO_ON_ROOT(" - using vorticity criterion with lower limit = " << lowerFluidRefinementLimit << " and upper limit = " << upperFluidRefinementLimit );
+   }
+   if( useGradientCriterion )
+   {
+      WALBERLA_LOG_INFO_ON_ROOT(" - using gradient criterion with lower limit = " << lowerFluidRefinementLimit << " and upper limit = " << upperFluidRefinementLimit );
+   }
+   if( vtkWriteFreqDD > 0 )
+   {
+      WALBERLA_LOG_INFO_ON_ROOT(" - writing vtk files of domain decomposition to folder \"" << baseFolder << "\" with frequency " << vtkWriteFreqDD);
+   }
+   if( vtkWriteFreqBo > 0 )
+   {
+      WALBERLA_LOG_INFO_ON_ROOT(" - writing vtk files of bodies data to folder \"" << baseFolder << "\" with frequency " << vtkWriteFreqBo);
+   }
+   if( vtkWriteFreqFl > 0 )
+   {
+      WALBERLA_LOG_INFO_ON_ROOT(" - writing vtk files of fluid data to folder \"" << baseFolder << "\" with frequency " << vtkWriteFreqFl);
+   }
+   if( useEllipsoids )
+   {
+      WALBERLA_LOG_INFO_ON_ROOT(" - using (prolate) ellipsoids as sediments");
+   }
+   if( useBox )
+   {
+      WALBERLA_LOG_INFO_ON_ROOT(" - using box setup");
+   }
+   else if ( useHopper )
+   {
+      WALBERLA_LOG_INFO_ON_ROOT(" - using hopper setup");
+   }
+   else
+   {
+      WALBERLA_LOG_INFO_ON_ROOT(" - using horizontally periodic domain");
+   }
+
+
+
+   if( refinementCheckFrequency == 0 && numberOfLevels != 1 )
+   {
+      // determine check frequency automatically based on maximum admissible velocity and block sizes
+      auto uMax = real_t(0.1);
+      refinementCheckFrequency = uint_c(( overlap + real_c(blockSize) - real_t(2) * real_t(FieldGhostLayers) * dx) / uMax) / lbmTimeStepsPerTimeLoopIteration;
+   }
+   WALBERLA_LOG_INFO_ON_ROOT(" - refinement / load balancing check frequency (coarse time steps): " << refinementCheckFrequency);
+   WALBERLA_LOG_INFO_ON_ROOT(" - load evaluation strategy: " << loadEvaluationStrategy);
+   WALBERLA_LOG_INFO_ON_ROOT(" - load distribution strategy: " << loadDistributionStrategy);
+
+   ///////////////////////////
+   // BLOCK STRUCTURE SETUP //
+   ///////////////////////////
+
+   Vector3<uint_t> blockSizeInCells( blockSize );
+
+   AABB simulationDomain( real_t(0), real_t(0), real_t(0), real_c(domainSize[0]), real_c(domainSize[1]), real_c(domainSize[2]) );
+   AABB sedimentDomain( real_t(0), real_t(0), real_c(domainSize[2]) - expectedSedimentedHeight, real_c(domainSize[0]), real_c(domainSize[1]), real_c(domainSize[2]) );
+
+   AABB initialRefinementDomain = sedimentDomain;
+   if( useBox || useHopper )
+   {
+      // require finest levels also along bounding planes -> initially refine everywhere
+      initialRefinementDomain = simulationDomain;
+   }
+
+   auto blocks = createBlockStructure( simulationDomain, blockSizeInCells, numberOfLevels, initialRefinementDomain, (useBox||useHopper), loadDistributionStrategy );
+
+   //write initial domain decomposition to file
+   if( vtkWriteFreqDD > 0 )
+   {
+      vtk::writeDomainDecomposition( blocks, "initial_domain_decomposition", baseFolder );
+   }
+
+
+   /////////////////
+   // PE COUPLING //
+   /////////////////
+
+   // set up pe functionality
+   shared_ptr<pe::BodyStorage> globalBodyStorage = make_shared<pe::BodyStorage>();
+   pe::SetBodyTypeIDs<BodyTypeTuple>::execute();
+
+   auto bodyStorageID  = blocks->addBlockData(pe::createStorageDataHandling<BodyTypeTuple>(), "pe Body Storage");
+   auto ccdID          = blocks->addBlockData(pe::ccd::createHashGridsDataHandling( globalBodyStorage, bodyStorageID ), "CCD");
+   BlockDataID fcdID   = (useEllipsoids) ? blocks->addBlockData( pe::fcd::createGenericFCDDataHandling<BodyTypeTuple, pe::fcd::GJKEPACollideFunctor>(), "FCD" )
+                                         : blocks->addBlockData(pe::fcd::createGenericFCDDataHandling<BodyTypeTuple, pe::fcd::AnalyticCollideFunctor>(), "FCD");
+
+   WcTimingTree timingTreePE;
+
+   // set up collision response
+   pe::cr::HCSITS cr(globalBodyStorage, blocks->getBlockStoragePointer(), bodyStorageID, ccdID, fcdID, &timingTreePE );
+   cr.setMaxIterations(10);
+   cr.setRelaxationModel( pe::cr::HardContactSemiImplicitTimesteppingSolvers::ApproximateInelasticCoulombContactByDecoupling );
+
+   // set up synchronization procedure
+   std::function<void(void)> syncCall = std::bind( pe::syncNextNeighbors<BodyTypeTuple>, std::ref(blocks->getBlockForest()), bodyStorageID, &timingTreePE, overlap, false );
+
+   // create pe bodies
+
+   // add the sediments
+   auto peMaterial = pe::createMaterial( "mat", densityRatio, real_t(1), real_t(0.25), real_t(0.25), real_t(0), real_t(200), real_t(100), real_t(100), real_t(100) );
+
+   // create two planes at bottom and top of domain for a horizontally periodic box
+   pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,0,1), Vector3<real_t>(0,0,0), peMaterial );
+   pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,0,-1), Vector3<real_t>(0,0,simulationDomain.zMax()), peMaterial );
+   if( useBox )
+   {
+      // add four more planes to obtain a closed box
+      pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(1,0,0), Vector3<real_t>(0,0,0), peMaterial );
+      pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(-1,0,0), Vector3<real_t>(simulationDomain.xMax(),0,0), peMaterial );
+      pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,1,0), Vector3<real_t>(0,0,0), peMaterial );
+      pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,-1,0), Vector3<real_t>(0,simulationDomain.yMax(),0), peMaterial );
+   }
+   else if ( useHopper )
+   {
+      // box bounding planes
+      pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(1,0,0), Vector3<real_t>(0,0,0), peMaterial );
+      pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(-1,0,0), Vector3<real_t>(simulationDomain.xMax(),0,0), peMaterial );
+      pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,1,0), Vector3<real_t>(0,0,0), peMaterial );
+      pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,-1,0), Vector3<real_t>(0,simulationDomain.yMax(),0), peMaterial );
+
+      //hopper planes
+      real_t xMax = simulationDomain.xMax();
+      real_t yMax = simulationDomain.yMax();
+      real_t zMax = simulationDomain.zMax();
+      Vector3<real_t> p1(0,0,hopperRelHeight*zMax);
+      pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(p1[2],0,hopperRelOpening*xMax-p1[0]), p1, peMaterial );
+      pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,p1[2],hopperRelOpening*yMax-p1[0]), p1, peMaterial );
+
+      Vector3<real_t> p2(xMax,yMax,hopperRelHeight*zMax);
+      pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(-p2[2],0,-((real_t(1)-hopperRelOpening)*xMax-p2[0])), p2, peMaterial );
+      pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,-p2[2],-((real_t(1)-hopperRelOpening)*yMax-p2[1])), p2, peMaterial );
+   }
+
+   AABB sedimentGenerationDomain( real_t(0), real_t(0), real_t(0.5)*real_c(domainSize[2]), real_c(domainSize[0]), real_c(domainSize[1]), real_c(domainSize[2]) );
+   createSedimentLayer(numberOfSediments, sedimentGenerationDomain, diameter, sedimentDomain.zMin(), peMaterial, cr, syncCall, blocks, globalBodyStorage, bodyStorageID, gravitationalAcceleration, useEllipsoids, shortRun );
+
+   // reset timer to not cover init stats
+   timingTreePE.reset();
+
+   // now we can use the information about the body positions to adapt the refinement
+
+   ///////////////////////////
+   // DYNAMIC REFINEMENT, 1 //
+   ///////////////////////////
+
+   auto & blockforest = blocks->getBlockForest();
+   blockforest.recalculateBlockLevelsInRefresh( true );
+   blockforest.alwaysRebalanceInRefresh( true ); //load balancing every time refresh is triggered
+   blockforest.reevaluateMinTargetLevelsAfterForcedRefinement( false );
+   blockforest.allowRefreshChangingDepth( false );
+   blockforest.allowMultipleRefreshCycles( false ); // otherwise info collections are invalid
+
+   {
+      blockforest::CombinedMinTargetLevelDeterminationFunctions initialMinTargetLevelDeterminationFunctions;
+
+      blockforest::AABBRefinementSelection aabbRefinementSelection;
+      aabbRefinementSelection.addAABB(sedimentDomain,finestLevel );
+      initialMinTargetLevelDeterminationFunctions.add( aabbRefinementSelection );
+
+      // refinement along global bodies (bounding planes) to have consistent mapping (required for CLI always, or SimpleBB with non-AABB planes)
+      real_t blockExtension = real_c(FieldGhostLayers);
+      pe_coupling::amr::GlobalBodyPresenceLevelDetermination globalBodyPresenceRefinement( globalBodyStorage, finestLevel, blockExtension, pe_coupling::selectGlobalBodies );
+      initialMinTargetLevelDeterminationFunctions.add(globalBodyPresenceRefinement);
+
+      blockforest.setRefreshMinTargetLevelDeterminationFunction( initialMinTargetLevelDeterminationFunctions );
+
+      for ( auto refreshCycle = uint_t(0); refreshCycle < finestLevel; ++refreshCycle)
+      {
+
+         WALBERLA_LOG_INFO_ON_ROOT("Refreshing blockforest...")
+
+         // check refinement criterions and refine/coarsen if necessary
+         uint_t stampBefore = blocks->getBlockForest().getModificationStamp();
+         blocks->refresh();
+         uint_t stampAfter = blocks->getBlockForest().getModificationStamp();
+
+         if( stampBefore == stampAfter )
+         {
+            break;
+         }
+
+         WALBERLA_LOG_INFO_ON_ROOT("Recreating data structures..");
+
+         // rebuild PE data structures
+         pe::clearSynchronization( blockforest, bodyStorageID);
+
+         syncCall();
+
+         for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+         {
+            auto * ccd = blockIt->getData< pe::ccd::ICCD >( ccdID );
+            ccd->reloadBodies();
+         }
+      }
+   }
+
+   uint_t numberOfInitialFineBlocks = blockforest.getNumberOfBlocks(finestLevel);
+   mpi::allReduceInplace(numberOfInitialFineBlocks, mpi::SUM);
+   WALBERLA_LOG_INFO_ON_ROOT("Total number of initial fine blocks in simulation: " << numberOfInitialFineBlocks);
+
+   uint_t numberOfProcesses = uint_c(MPIManager::instance()->numProcesses());
+
+
+   ///////////////////////
+   // ADD DATA TO BLOCKS //
+   ////////////////////////
+
+   // create the lattice model
+   LatticeModel_T latticeModel = LatticeModel_T( lbm::collision_model::TRT::constructWithMagicNumber( omega, lbm::collision_model::TRT::threeSixteenth, finestLevel ) );
+
+   // add PDF field
+   BlockDataID pdfFieldID = lbm::addPdfFieldToStorage< LatticeModel_T >( blocks, "pdf field (zyxf)", latticeModel,
+                                                                         Vector3< real_t >( real_t(0) ), real_t(1),
+                                                                         FieldGhostLayers, field::zyxf );
+   // add flag field
+   BlockDataID flagFieldID = field::addFlagFieldToStorage<FlagField_T>( blocks, "flag field", FieldGhostLayers );
+
+   // add body field
+   BlockDataID bodyFieldID = field::addToStorage<BodyField_T>( blocks, "body field", nullptr, field::zyxf, FieldGhostLayers );
+
+   // add velocity field and utility
+   BlockDataID velocityFieldID = field::addToStorage<VelocityField_T>( blocks, "velocity field", Vector3<real_t>(real_t(0)), field::zyxf, uint_t(2) );
+
+   typedef lbm::VelocityFieldWriter< PdfField_T, VelocityField_T > VelocityFieldWriter_T;
+   BlockSweepWrapper< VelocityFieldWriter_T > velocityFieldWriter( blocks, VelocityFieldWriter_T( pdfFieldID, velocityFieldID ) );
+
+
+   shared_ptr<blockforest::communication::NonUniformBufferedScheme<stencil::D3Q27> > velocityCommunicationScheme = make_shared<blockforest::communication::NonUniformBufferedScheme<stencil::D3Q27> >( blocks );
+   velocityCommunicationScheme->addPackInfo( make_shared< field::refinement::PackInfo<VelocityField_T, stencil::D3Q27> >( velocityFieldID ) );
+
+   // add boundary handling & initialize outer domain boundaries
+   BlockDataID boundaryHandlingID = blocks->addBlockData( make_shared< MyBoundaryHandling >( blocks, flagFieldID, pdfFieldID, bodyFieldID ),
+                                                          "boundary handling" );
+
+   // map planes into the LBM simulation -> act as no-slip boundaries
+   //pe_coupling::mapBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, *globalBodyStorage, NoSlip_Flag, pe_coupling::selectGlobalBodies );
+   pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, *globalBodyStorage, bodyFieldID, MO_Flag, pe_coupling::selectGlobalBodies );
+
+   // map pe bodies into the LBM simulation
+   pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, *globalBodyStorage, bodyFieldID, MO_Flag, pe_coupling::selectRegularBodies );
+
+
+   // force averaging functionality
+   shared_ptr<pe_coupling::BodiesForceTorqueContainer> bodiesFTContainer1 = make_shared<pe_coupling::BodiesForceTorqueContainer>(blocks, bodyStorageID);
+   std::function<void(void)> storeForceTorqueInCont1 = std::bind(&pe_coupling::BodiesForceTorqueContainer::store, bodiesFTContainer1);
+
+   shared_ptr<pe_coupling::BodiesForceTorqueContainer> bodiesFTContainer2 = make_shared<pe_coupling::BodiesForceTorqueContainer>(blocks, bodyStorageID);
+   std::function<void(void)> setForceTorqueOnBodiesFromCont2 = std::bind(&pe_coupling::BodiesForceTorqueContainer::setOnBodies, bodiesFTContainer2);
+
+   shared_ptr<pe_coupling::ForceTorqueOnBodiesScaler> forceScaler = make_shared<pe_coupling::ForceTorqueOnBodiesScaler>(blocks, bodyStorageID, real_t(0.5));
+   std::function<void(void)> setForceScalingFactorToOne = std::bind(&pe_coupling::ForceTorqueOnBodiesScaler::resetScalingFactor,forceScaler,real_t(1));
+   std::function<void(void)> setForceScalingFactorToHalf = std::bind(&pe_coupling::ForceTorqueOnBodiesScaler::resetScalingFactor,forceScaler,real_t(0.5));
+
+   if( averageForceTorqueOverTwoTimSteps ) {
+      bodiesFTContainer2->store();
+
+      setForceScalingFactorToOne();
+   }
+
+   ///////////////////////////
+   // DYNAMIC REFINEMENT, 2 //
+   ///////////////////////////
+
+   blockforest::CombinedMinTargetLevelDeterminationFunctions minTargetLevelDeterminationFunctions;
+
+   // add refinement criterion based on particle presence
+   shared_ptr<pe_coupling::InfoCollection> couplingInfoCollection = walberla::make_shared<pe_coupling::InfoCollection>();
+   pe_coupling::amr::BodyPresenceLevelDetermination particlePresenceRefinement( couplingInfoCollection, finestLevel );
+
+   minTargetLevelDeterminationFunctions.add( particlePresenceRefinement );
+
+   // also add (possible) refinement criteria based on fluid quantities
+
+   if( useVorticityCriterion )
+   {
+      // add refinement criterion based on vorticity magnitude
+      field::FlagFieldEvaluationFilter<FlagField_T> flagFieldFilter( flagFieldID, Fluid_Flag );
+      lbm::refinement::VorticityBasedLevelDetermination< field::FlagFieldEvaluationFilter<FlagField_T> > vorticityRefinement(
+            velocityFieldID, flagFieldFilter, upperFluidRefinementLimit, lowerFluidRefinementLimit, finestLevel );
+
+      minTargetLevelDeterminationFunctions.add( vorticityRefinement );
+   }
+
+   if( useGradientCriterion )
+   {
+      // add refinement criterion based on velocity gradient magnitude
+      field::FlagFieldEvaluationFilter<FlagField_T> flagFieldFilter( flagFieldID, Fluid_Flag );
+      VectorGradientRefinement< LatticeModel_T, field::FlagFieldEvaluationFilter<FlagField_T> > gradientRefinement(
+            velocityFieldID, flagFieldFilter, upperFluidRefinementLimit, lowerFluidRefinementLimit, finestLevel );
+
+      minTargetLevelDeterminationFunctions.add( gradientRefinement );
+   }
+
+   // refinement along global bodies (bounding planes) to have consistent mapping (required for CLI always, or SimpleBB with non-AABB planes)
+   real_t blockExtension = real_c(FieldGhostLayers);
+   pe_coupling::amr::GlobalBodyPresenceLevelDetermination globalBodyPresenceRefinement( globalBodyStorage, finestLevel, blockExtension, pe_coupling::selectGlobalBodies );
+   minTargetLevelDeterminationFunctions.add(globalBodyPresenceRefinement);
+
+   blockforest.setRefreshMinTargetLevelDeterminationFunction( minTargetLevelDeterminationFunctions );
+
+   bool curveAllGather = true;
+   bool balanceLevelwise = true;
+
+   auto peBlockBaseWeight = real_t(1); //default value, might not be the best
+   shared_ptr<pe::InfoCollection> peInfoCollection = walberla::make_shared<pe::InfoCollection>();
+
+   if( loadDistributionStrategy == "Hilbert" || loadDistributionStrategy == "Morton")
+   {
+      if( loadDistributionStrategy == "Hilbert")
+      {
+         bool useHilbert = true;
+         blockforest.setRefreshPhantomBlockMigrationPreparationFunction( blockforest::DynamicCurveBalance< blockforest::PODPhantomWeight<real_t> >( useHilbert, curveAllGather, balanceLevelwise ) );
+      }
+      else if (loadDistributionStrategy == "Morton" )
+      {
+         bool useHilbert = false;
+         blockforest.setRefreshPhantomBlockMigrationPreparationFunction( blockforest::DynamicCurveBalance< blockforest::PODPhantomWeight<real_t> >( useHilbert, curveAllGather, balanceLevelwise ) );
+      }
+
+      blockforest.setRefreshPhantomBlockDataPackFunction(blockforest::PODPhantomWeightPackUnpack<real_t>());
+      blockforest.setRefreshPhantomBlockDataUnpackFunction(blockforest::PODPhantomWeightPackUnpack<real_t>());
+
+      if( loadEvaluationStrategy == "Fit" )
+      {
+         if( useEllipsoids )
+         {
+            pe_coupling::amr::WeightAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, fittedTotalWeightEvaluationFunctionEllipsoids);
+            blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor);
+         } else{
+            pe_coupling::amr::WeightAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, fittedTotalWeightEvaluationFunctionSpheres);
+            blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor);
+         }
+      }
+      else if( loadEvaluationStrategy == "PE" )
+      {
+         pe::amr::WeightAssignmentFunctor weightAssignmentFunctor(peInfoCollection, peBlockBaseWeight );
+         blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor);
+      }
+      else if( loadEvaluationStrategy == "LBM" )
+      {
+         pe_coupling::amr::WeightAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, pe_coupling::amr::defaultWeightEvaluationFunction);
+         blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor);
+      }
+      else
+      {
+         WALBERLA_ABORT("Invalid load evaluation strategy: " << loadEvaluationStrategy);
+      }
+
+   }
+   else if( loadDistributionStrategy == "ParMetis")
+   {
+      uint_t ncon = 1;
+      if( loadEvaluationStrategy == "FitMulti")
+      {
+         ncon = 2;
+      }
+
+      blockforest::DynamicParMetis::Algorithm parMetisAlgorithm = blockforest::DynamicParMetis::stringToAlgorithm(parMetisAlgorithmString);
+      blockforest::DynamicParMetis::WeightsToUse parMetisWeightsToUse = blockforest::DynamicParMetis::WeightsToUse::PARMETIS_BOTH_WEIGHTS;
+      blockforest::DynamicParMetis::EdgeSource parMetisEdgeSource = blockforest::DynamicParMetis::EdgeSource::PARMETIS_EDGES_FROM_EDGE_WEIGHTS;
+
+      blockforest::DynamicParMetis dynamicParMetis(parMetisAlgorithm, parMetisWeightsToUse, parMetisEdgeSource, ncon);
+      dynamicParMetis.setipc2redist(parMetis_ipc2redist);
+
+      real_t loadImbalanceTolerance = (parMetisTolerance < real_t(1)) ? std::max(real_t(1.05), real_t(1) + real_t(1) / ( real_c(numberOfInitialFineBlocks) / real_c(numberOfProcesses) ) ) : parMetisTolerance;
+      std::vector<double> parMetisLoadImbalanceTolerance(ncon, double(loadImbalanceTolerance));
+      dynamicParMetis.setImbalanceTolerance(parMetisLoadImbalanceTolerance[0], 0);
+
+      WALBERLA_LOG_INFO_ON_ROOT(" - ParMetis configuration: ");
+      WALBERLA_LOG_INFO_ON_ROOT("   - algorithm = " << dynamicParMetis.algorithmToString() );
+      WALBERLA_LOG_INFO_ON_ROOT("   - weights to use = " << dynamicParMetis.weightsToUseToString() );
+      WALBERLA_LOG_INFO_ON_ROOT("   - edge source = " << dynamicParMetis.edgeSourceToString() );
+      WALBERLA_LOG_INFO_ON_ROOT("   - ncon = " << ncon );
+      WALBERLA_LOG_INFO_ON_ROOT("   - ipc2redist parameter = " << dynamicParMetis.getipc2redist() );
+
+      blockforest.setRefreshPhantomBlockDataPackFunction(blockforest::DynamicParMetisBlockInfoPackUnpack());
+      blockforest.setRefreshPhantomBlockDataUnpackFunction(blockforest::DynamicParMetisBlockInfoPackUnpack());
+
+      if( loadEvaluationStrategy == "Fit" )
+      {
+         WALBERLA_LOG_INFO_ON_ROOT("   - load imbalance tolerance = <" << parMetisLoadImbalanceTolerance[0] << ">" );
+         if( useEllipsoids )
+         {
+            pe_coupling::amr::MetisAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, fittedTotalWeightEvaluationFunctionEllipsoids);
+            blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor);
+         } else{
+            pe_coupling::amr::MetisAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, fittedTotalWeightEvaluationFunctionSpheres);
+            blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor);
+         }
+      }
+      else if( loadEvaluationStrategy == "FitMulti" )
+      {
+         double imbalanceTolerancePE = 10.;
+         parMetisLoadImbalanceTolerance[1] = std::min(imbalanceTolerancePE, real_c(MPIManager::instance()->numProcesses()));
+         WALBERLA_LOG_INFO_ON_ROOT("   - load imbalance tolerances = <" << parMetisLoadImbalanceTolerance[0] << ", " << parMetisLoadImbalanceTolerance[1] << ">" );
+         dynamicParMetis.setImbalanceTolerance(parMetisLoadImbalanceTolerance[1], 1);
+
+         if( useEllipsoids )
+         {
+            std::vector< std::function<real_t(const pe_coupling::BlockInfo&)> > weightEvaluationFunctions(ncon);
+            weightEvaluationFunctions[0] = fittedLBMWeightEvaluationFunctionEllipsoids;
+            weightEvaluationFunctions[1] = fittedPEWeightEvaluationFunctionEllipsoids;
+            pe_coupling::amr::MetisAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, weightEvaluationFunctions);
+            blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor);
+         } else{
+            std::vector< std::function<real_t(const pe_coupling::BlockInfo&)> > weightEvaluationFunctions(ncon);
+            weightEvaluationFunctions[0] = fittedLBMWeightEvaluationFunctionSpheres;
+            weightEvaluationFunctions[1] = fittedPEWeightEvaluationFunctionSpheres;
+            pe_coupling::amr::MetisAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, weightEvaluationFunctions);
+            blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor);
+         }
+      }
+      else if( loadEvaluationStrategy == "PE" )
+      {
+         pe::amr::MetisAssignmentFunctor weightAssignmentFunctor(peInfoCollection, peBlockBaseWeight );
+         blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor);
+      }
+      else if( loadEvaluationStrategy == "LBM" )
+      {
+         pe_coupling::amr::MetisAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, pe_coupling::amr::defaultWeightEvaluationFunction);
+         blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor);
+      }
+      else
+      {
+         WALBERLA_ABORT("Invalid load evaluation strategy: " << loadEvaluationStrategy);
+      }
+
+      blockforest.setRefreshPhantomBlockMigrationPreparationFunction( dynamicParMetis );
+
+   }
+   else if( loadDistributionStrategy == "Diffusive")
+   {
+      using DB_T = blockforest::DynamicDiffusionBalance< blockforest::PODPhantomWeight<real_t> >;
+      DB_T dynamicDiffusion(diffusionMaxIterations, diffusionFlowIterations );
+      dynamicDiffusion.setMode(DB_T::Mode::DIFFUSION_PUSH);
+
+      WALBERLA_LOG_INFO_ON_ROOT(" - Dynamic diffusion configuration: ");
+      WALBERLA_LOG_INFO_ON_ROOT("   - max iterations = " << dynamicDiffusion.getMaxIterations() );
+      WALBERLA_LOG_INFO_ON_ROOT("   - flow iterations = " << dynamicDiffusion.getFlowIterations());
+
+      blockforest.setRefreshPhantomBlockDataPackFunction(blockforest::PODPhantomWeightPackUnpack<real_t>());
+      blockforest.setRefreshPhantomBlockDataUnpackFunction(blockforest::PODPhantomWeightPackUnpack<real_t>());
+      blockforest.setRefreshPhantomBlockMigrationPreparationFunction( dynamicDiffusion );
+
+      if( loadEvaluationStrategy == "Fit" )
+      {
+         if( useEllipsoids )
+         {
+            pe_coupling::amr::WeightAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, fittedTotalWeightEvaluationFunctionEllipsoids);
+            blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor);
+         } else{
+            pe_coupling::amr::WeightAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, fittedTotalWeightEvaluationFunctionSpheres);
+            blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor);
+         }
+      }
+      else if( loadEvaluationStrategy == "PE" )
+      {
+         pe::amr::WeightAssignmentFunctor weightAssignmentFunctor(peInfoCollection, peBlockBaseWeight );
+         blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor);
+      }
+      else if( loadEvaluationStrategy == "LBM" )
+      {
+         pe_coupling::amr::WeightAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, pe_coupling::amr::defaultWeightEvaluationFunction);
+         blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor);
+      }
+      else
+      {
+         WALBERLA_ABORT("Invalid load evaluation strategy: " << loadEvaluationStrategy);
+      }
+
+   } else
+   {
+      WALBERLA_ABORT("Load distribution strategy \"" << loadDistributionStrategy << "\t not implemented! - Aborting" );
+   }
+
+
+   ///////////////
+   // TIME LOOP //
+   ///////////////
+
+   // create the timeloop
+   SweepTimeloop timeloop( blocks->getBlockStorage(), timesteps );
+
+   if( vtkWriteFreqBo != uint_t(0) ) {
+
+      // pe bodies
+      if (useEllipsoids) {
+         auto bodyVtkOutput = make_shared<pe::EllipsoidVtkOutput>(bodyStorageID, blocks->getBlockStorage());
+         auto bodyVTK = vtk::createVTKOutput_PointData(bodyVtkOutput, "bodies", vtkWriteFreqBo, baseFolder);
+         timeloop.addFuncBeforeTimeStep(vtk::writeFiles(bodyVTK), "VTK (sediment data)");
+
+      } else {
+         auto bodyVtkOutput = make_shared<pe::SphereVtkOutput>(bodyStorageID, blocks->getBlockStorage());
+         auto bodyVTK = vtk::createVTKOutput_PointData(bodyVtkOutput, "bodies", vtkWriteFreqBo, baseFolder);
+         timeloop.addFuncBeforeTimeStep(vtk::writeFiles(bodyVTK), "VTK (sediment data)");
+      }
+   }
+
+   if( vtkWriteFreqFl != uint_t(0) ) {
+
+      // pdf field
+      auto pdfFieldVTK = vtk::createVTKOutput_BlockData(blocks, "fluid_field", vtkWriteFreqFl, 0, false, baseFolder);
+
+      field::FlagFieldCellFilter<FlagField_T> fluidFilter(flagFieldID);
+      fluidFilter.addFlag(Fluid_Flag);
+      pdfFieldVTK->addCellInclusionFilter(fluidFilter);
+
+      pdfFieldVTK->addCellDataWriter(
+            make_shared<lbm::VelocityVTKWriter<LatticeModel_T, float> >(pdfFieldID, "VelocityFromPDF"));
+      pdfFieldVTK->addCellDataWriter(
+            make_shared<lbm::DensityVTKWriter<LatticeModel_T, float> >(pdfFieldID, "DensityFromPDF"));
+
+      timeloop.addFuncBeforeTimeStep(vtk::writeFiles(pdfFieldVTK), "VTK (fluid field data)");
+   }
+
+   if( vtkWriteFreqDD != uint_t(0) ) {
+      auto domainDecompVTK = vtk::createVTKOutput_DomainDecomposition(blocks, "domain_decomposition", vtkWriteFreqDD, baseFolder );
+      timeloop.addFuncBeforeTimeStep( vtk::writeFiles(domainDecompVTK), "VTK (domain decomposition)");
+   }
+
+   WcTimingPool timeloopTiming;
+   shared_ptr<WcTimingPool> timeloopRefinementTiming = make_shared<WcTimingPool>();
+   shared_ptr<WcTimingPool> timeloopRefinementTimingLevelwise = make_shared<WcTimingPool>();
+
+
+   auto sweep = lbm::makeCellwiseSweep< LatticeModel_T, FlagField_T >( pdfFieldID, flagFieldID, Fluid_Flag );
+   auto refinementTimestep = lbm::refinement::makeTimeStep< LatticeModel_T, BoundaryHandling_T >( blocks, sweep, pdfFieldID, boundaryHandlingID );
+
+   refinementTimestep->enableTiming( timeloopRefinementTiming, timeloopRefinementTimingLevelwise );
+
+   // Averaging the force/torque over two time steps is said to damp oscillations of the interaction force/torque.
+   // See Ladd - " Numerical simulations of particulate suspensions via a discretized Boltzmann equation. Part 1. Theoretical foundation", 1994, p. 302
+   if( averageForceTorqueOverTwoTimSteps ) {
+
+      // store force/torque from hydrodynamic interactions in container1
+      refinementTimestep->addPostStreamVoidFunction(lbm::refinement::FunctorWrapper(storeForceTorqueInCont1), "Force Storing", finestLevel);
+
+      // set force/torque from previous time step (in container2)
+      refinementTimestep->addPostStreamVoidFunction(lbm::refinement::FunctorWrapper(setForceTorqueOnBodiesFromCont2), "Force setting", finestLevel);
+
+      // average the force/torque by scaling it with factor 1/2 (except in first timestep and directly after refinement, there it is 1)
+      refinementTimestep->addPostStreamVoidFunction(lbm::refinement::FunctorWrapper(SharedFunctor<pe_coupling::ForceTorqueOnBodiesScaler>(forceScaler)), "Force averaging", finestLevel);
+      refinementTimestep->addPostStreamVoidFunction(lbm::refinement::FunctorWrapper(setForceScalingFactorToHalf), "Force scaling adjustment", finestLevel);
+
+      // swap containers
+      refinementTimestep->addPostStreamVoidFunction(lbm::refinement::FunctorWrapper(pe_coupling::BodyContainerSwapper(bodiesFTContainer1, bodiesFTContainer2)), "Swap FT container", finestLevel);
+
+   }
+
+   Vector3<real_t> gravitationalForce( real_t(0), real_t(0), -(densityRatio - real_t(1)) * gravitationalAcceleration * sphereVolume );
+   refinementTimestep->addPostStreamVoidFunction(lbm::refinement::FunctorWrapper(pe_coupling::ForceOnBodiesAdder( blocks, bodyStorageID, gravitationalForce )), "Gravitational force", finestLevel );
+
+   // add pe timesteps
+   refinementTimestep->addPostStreamVoidFunction(lbm::refinement::FunctorWrapper(pe_coupling::TimeStep( blocks, bodyStorageID, cr, syncCall, real_t(1), numPeSubCycles)),
+                                                  "pe Time Step", finestLevel );
+
+   // add sweep for updating the pe body mapping into the LBM simulation
+   refinementTimestep->addPostStreamVoidFunction(lbm::refinement::SweepAsFunctorWrapper( pe_coupling::BodyMapping< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, globalBodyStorage, bodyFieldID,  MO_Flag, FormerMO_Flag, pe_coupling::selectRegularBodies ), blocks ),
+                                                 "Body Mapping", finestLevel );
+
+   // add sweep for restoring PDFs in cells previously occupied by pe bodies
+   typedef pe_coupling::EquilibriumReconstructor< LatticeModel_T, BoundaryHandling_T > Reconstructor_T;
+   Reconstructor_T reconstructor( blocks, boundaryHandlingID, pdfFieldID, bodyFieldID );
+   refinementTimestep->addPostStreamVoidFunction(lbm::refinement::SweepAsFunctorWrapper( pe_coupling::PDFReconstruction< LatticeModel_T, BoundaryHandling_T, Reconstructor_T > ( blocks,
+                                                 boundaryHandlingID, bodyStorageID, globalBodyStorage, bodyFieldID, reconstructor, FormerMO_Flag, Fluid_Flag ), blocks ),
+                                                 "PDF Restore", finestLevel );
+
+
+   // add LBM sweep with refinement
+   timeloop.addFuncBeforeTimeStep( makeSharedFunctor( refinementTimestep ), "LBM refinement time step" );
+
+   std::string loggingFileName( baseFolder + "/Logging_Ga");
+   loggingFileName += std::to_string(uint_c(GalileoNumber));
+   loggingFileName += "_lvl";
+   loggingFileName += std::to_string(numberOfLevels);
+   loggingFileName += ".txt";
+   if( logging  )
+   {
+      WALBERLA_LOG_INFO_ON_ROOT(" - writing logging output to file \"" << loggingFileName << "\"");
+   }
+   shared_ptr< PropertyLogger > logger = walberla::make_shared< PropertyLogger >( &timeloop, blocks, bodyStorageID,
+                                                                                  loggingFileName, fileIO );
+   if(logging)
+   {
+      timeloop.addFuncAfterTimeStep( SharedFunctor< PropertyLogger >( logger ), "Property logger" );
+   }
+
+
+   timeloop.addFuncAfterTimeStep( RemainingTimeLogger( timeloop.getNrOfTimeSteps() ), "Remaining Time Logger" );
+
+
+   // add top level timing pool output
+   timeloop.addFuncAfterTimeStep( TimingPoolLogger( timeloopTiming, timeloop, loggingDisplayFrequency ), "Regular Timing Logger" );
+
+   // add regular refinement timing pool output
+   timeloop.addFuncAfterTimeStep( TimingPoolLogger( *timeloopRefinementTiming, timeloop, loggingDisplayFrequency ), "Refinement Timing Logger" );
+
+   // add level wise timing pool output
+   //if( numberOfLevels != uint_t(1))
+      timeloop.addFuncAfterTimeStep( TimingPoolLogger( *timeloopRefinementTimingLevelwise, timeloop, loggingDisplayFrequency ), "Refinement Levelwise Timing Logger" );
+
+   // add PE timing tree output
+   timeloop.addFuncAfterTimeStep( TimingTreeLogger( timingTreePE, timeloop, loggingDisplayFrequency ), "PE Timing Tree Timing Logger" );
+
+
+   ////////////////////////
+   // EXECUTE SIMULATION //
+   ////////////////////////
+
+   uint_t loadEvaluationFrequency = refinementCheckFrequency;
+   TimingEvaluator timingEvaluator( *timeloopRefinementTimingLevelwise, timingTreePE, numberOfLevels );
+
+   // file for simulation infos
+   std::string infoFileName( baseFolder + "/simulation_info.txt");
+   WALBERLA_ROOT_SECTION()
+   {
+      std::ofstream file;
+      file.open( infoFileName.c_str(), std::fstream::out | std::fstream::trunc );
+      file << "#i\t t\t tSim\t tLB\t numProcs\t levelwise blocks (min/max/sum)\n";
+      file.close();
+   }
+
+   // process local timing measurements and predicted loads
+   std::string processLocalFiles(baseFolder + "/processLocalFiles");
+   WALBERLA_ROOT_SECTION()
+   {
+      filesystem::path tpath( processLocalFiles );
+      if( !filesystem::exists( tpath ) )
+         filesystem::create_directory( tpath );
+   }
+   std::string measurementFileProcessName(processLocalFiles + "/measurements_" + std::to_string(MPIManager::instance()->rank()) + ".txt");
+   {
+      std::ofstream file;
+      file.open( measurementFileProcessName.c_str(), std::fstream::out | std::fstream::trunc );
+      file << "#i\t t\t mTotSim\t mLB\t mLBM\t mBH\t mCoup1\t mCoup2\t mRB\t cLBM\t cRB\t numBlocks\n";
+      file.close();
+   }
+
+   std::string predictionFileProcessName(processLocalFiles + "/predictions_" + std::to_string(MPIManager::instance()->rank()) + ".txt");
+   {
+      std::ofstream file;
+      file.open( predictionFileProcessName.c_str(), std::fstream::out | std::fstream::trunc );
+      file << "#i\t t\t wlLBM\t wlBH\t wlCoup1\t wlCoup2\t wlRB\t edgecut\t numBlocks\n";
+      file.close();
+   }
+
+   std::vector<std::string> LBMTimer;
+   LBMTimer.emplace_back("collide");
+   LBMTimer.emplace_back("stream");
+   LBMTimer.emplace_back("stream & collide");
+
+   std::vector<std::string> bhTimer;
+   bhTimer.emplace_back("boundary handling");
+
+   std::vector<std::string> couplingTimer1;
+   couplingTimer1.emplace_back("Body Mapping");
+   std::vector<std::string> couplingTimer2;
+   couplingTimer2.emplace_back("PDF Restore");
+
+   std::vector<std::string> peTimer;
+   peTimer.emplace_back("Simulation Step.Collision Detection");
+   peTimer.emplace_back("Simulation Step.Collision Response Integration");
+   peTimer.emplace_back("Simulation Step.Collision Response Resolution.Collision Response Solving");
+
+   std::vector<std::string> LBMCommTimer;
+   LBMCommTimer.emplace_back("communication equal level [pack & send]");
+   LBMCommTimer.emplace_back("communication equal level [wait & unpack]");
+
+   std::vector<std::string> peCommTimer;
+   //Adapt if using different collision response (like DEM!)
+   peCommTimer.emplace_back("Simulation Step.Collision Response Resolution.Velocity Sync");
+   peCommTimer.emplace_back("Sync");
+
+
+   real_t terminationPosition = expectedSedimentedHeight;
+   real_t terminationVelocity = real_t(0.05) * uRef;
+
+   auto oldmTotSim = real_t(0);
+   auto oldmLB = real_t(0);
+
+   auto measurementFileCounter = uint_t(0);
+   auto predictionFileCounter = uint_t(0);
+
+   std::string loadEvaluationStep("load evaluation");
+
+   // time loop
+   for (uint_t i = 0; i < timesteps; ++i )
+   {
+
+      // evaluate measurements (note: reflect simulation behavior BEFORE the evaluation)
+      if( loadEvaluationFrequency > 0 && i % loadEvaluationFrequency == 0 && i > 0 && fileIO)
+      {
+
+         timeloopTiming[loadEvaluationStep].start();
+
+         // write process local timing measurements to files (per process, per load balancing step)
+         {
+
+            // evaluate all required timers
+            uint_t evalLevel = finestLevel;
+
+            real_t mTotSim = ( timeloopTiming.timerExists("LBM refinement time step") ) ? timeloopTiming["LBM refinement time step"].total() : real_t(0);
+
+            real_t mLB = ( timeloopTiming.timerExists("refinement checking") ) ? timeloopTiming["refinement checking"].total() : real_t(0);
+
+            real_t mLBM = timingEvaluator.getTimings(LBMTimer, evalLevel);
+            real_t mBH  = timingEvaluator.getTimings(bhTimer, evalLevel);
+            real_t mCoup1 = timingEvaluator.getTimings(couplingTimer1, evalLevel);
+            real_t mCoup2 = timingEvaluator.getTimings(couplingTimer2, evalLevel);
+            real_t mPE = timingEvaluator.getTimings(peTimer, evalLevel);
+
+            real_t cLBM = timingEvaluator.getTimings(LBMCommTimer, evalLevel);
+            real_t cRB = timingEvaluator.getTimings(peCommTimer, evalLevel);
+
+            auto & forest = blocks->getBlockForest();
+            uint_t numBlocks = forest.getNumberOfBlocks(finestLevel);
+
+            // write to process local file
+            std::ofstream file;
+            file.open( measurementFileProcessName.c_str(), std::ofstream::app  );
+            file << measurementFileCounter << "\t " << real_c(i) / tRef << "\t"
+                 << mTotSim - oldmTotSim << "\t" << mLB - oldmLB << "\t" << mLBM << "\t" << mBH << "\t" << mCoup1 << "\t"
+                 << mCoup2 << "\t" << mPE << "\t" << cLBM << "\t" << cRB << "\t" << numBlocks << "\n";
+            file.close();
+
+            oldmTotSim = mTotSim;
+            oldmLB = mLB;
+            measurementFileCounter++;
+
+            // reset timers to have measurement from evaluation to evaluation point
+            timeloopRefinementTimingLevelwise->clear();
+            timingTreePE.reset();
+
+         }
+
+         // evaluate general simulation infos (on root)
+         {
+            real_t totalTimeToCurrentTimestep, totalLBTimeToCurrentTimestep;
+            evaluateTotalSimulationTimePassed(timeloopTiming, totalTimeToCurrentTimestep, totalLBTimeToCurrentTimestep);
+            std::vector<math::DistributedSample> numberOfBlocksPerLevel(numberOfLevels);
+
+            auto & forest = blocks->getBlockForest();
+            for( uint_t lvl = 0; lvl < numberOfLevels; ++lvl)
+            {
+               uint_t numBlocks = forest.getNumberOfBlocks(lvl);
+               numberOfBlocksPerLevel[lvl].castToRealAndInsert(numBlocks);
+            }
+
+            for( uint_t lvl = 0; lvl < numberOfLevels; ++lvl)
+            {
+               numberOfBlocksPerLevel[lvl].mpiGatherRoot();
+            }
+
+            WALBERLA_ROOT_SECTION()
+            {
+               std::ofstream file;
+               file.open( infoFileName.c_str(), std::ofstream::app  );
+               file << i << "\t " << real_c(i) / tRef << "\t"
+                    << totalTimeToCurrentTimestep << "\t " << totalLBTimeToCurrentTimestep << "\t " << numberOfProcesses << "\t ";
+
+               for( uint_t lvl = 0; lvl < numberOfLevels; ++lvl)
+               {
+                  file << uint_c(numberOfBlocksPerLevel[numberOfLevels-1-lvl].min()) << "\t ";
+                  file << uint_c(numberOfBlocksPerLevel[numberOfLevels-1-lvl].max()) << "\t ";
+                  file << uint_c(numberOfBlocksPerLevel[numberOfLevels-1-lvl].sum()) << "\t ";
+               }
+               file << "\n";
+
+               file.close();
+            }
+         }
+
+         timeloopTiming[loadEvaluationStep].end();
+
+      }
+
+
+      if( refinementCheckFrequency != 0 && i % refinementCheckFrequency == 0)
+      {
+
+         WALBERLA_LOG_INFO_ON_ROOT("Checking for refinement and load balancing...")
+
+         std::string refinementCheckStep("refinement checking");
+         timeloopTiming[refinementCheckStep].start();
+
+         if( loadEvaluationStrategy != "LBM" ) {
+
+            // first evaluate all data that is required for the refinement checks
+
+            // update info collections for the particle presence based check and the load balancing:
+            auto &forest = blocks->getBlockForest();
+            pe_coupling::createWithNeighborhood<BoundaryHandling_T>(forest, boundaryHandlingID, bodyStorageID, ccdID,
+                                                                    fcdID, numPeSubCycles, *couplingInfoCollection);
+            pe::createWithNeighborhood(forest, bodyStorageID, *peInfoCollection);
+
+            // for the fluid property based check:
+            if (useVorticityCriterion || useGradientCriterion) {
+               velocityFieldWriter();
+               (*velocityCommunicationScheme)();
+            }
+
+            WALBERLA_LOG_INFO_ON_ROOT("Refreshing blockforest...")
+
+            // check refinement criterions and refine/coarsen if necessary
+            uint_t stampBefore = blocks->getBlockForest().getModificationStamp();
+            blocks->refresh();
+            uint_t stampAfter = blocks->getBlockForest().getModificationStamp();
+
+            bool recreatingNecessary = false;
+
+            if (stampBefore != stampAfter) {
+               recreatingNecessary = true;
+            }
+
+            bool reducedRecreationFlag = mpi::allReduce(recreatingNecessary, mpi::LOGICAL_OR);
+
+            if (reducedRecreationFlag != recreatingNecessary) {
+               WALBERLA_LOG_INFO("Reduced recreation flag different from individual one");
+            }
+
+            recreatingNecessary = reducedRecreationFlag;
+
+            if (recreatingNecessary) {
+
+               WALBERLA_LOG_INFO_ON_ROOT("Recreating data structures..");
+
+               // rebuild PE data structures
+               pe::clearSynchronization(blockforest, bodyStorageID);
+
+               syncCall();
+
+               for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) {
+                  auto * ccd = blockIt->getData<pe::ccd::ICCD>(ccdID);
+                  ccd->reloadBodies();
+               }
+
+               clearBoundaryHandling(forest, boundaryHandlingID);
+               clearBodyField(forest, bodyFieldID);
+
+               if (averageForceTorqueOverTwoTimSteps) {
+
+                  // clear containers from old values
+                  bodiesFTContainer1->clear();
+                  bodiesFTContainer2->clear();
+
+                  // initialize FT container on all blocks anew, i.e. with the currently acting force/torque, which is zero after the refinement step
+                  bodiesFTContainer2->store();
+
+                  // set force scaling factor to one after refinement since force history is not present on blocks after refinement
+                  // thus the usual averaging of 1/2 (over two time steps) can not be carried out, i.e. it would lead to 1/2 of the acting force
+                  // the scaling factor is thus adapted for the next timestep to 1, and then changed back to 1/2 (in the timeloop)
+                  setForceScalingFactorToOne();
+               }
+
+               recreateBoundaryHandling(forest, boundaryHandlingID);
+
+               // re-set the no-slip flags along the walls
+               pe_coupling::mapMovingBodies<BoundaryHandling_T>(*blocks, boundaryHandlingID, bodyStorageID,
+                                                                *globalBodyStorage, bodyFieldID, MO_Flag,
+                                                                pe_coupling::selectGlobalBodies);
+
+               // re-map the body into the domain (initializing the bodyField as well)
+               pe_coupling::mapMovingBodies<BoundaryHandling_T>(*blocks, boundaryHandlingID, bodyStorageID,
+                                                                *globalBodyStorage, bodyFieldID, MO_Flag,
+                                                                pe_coupling::selectRegularBodies);
+            }
+
+         }
+
+         timeloopTiming[refinementCheckStep].end();
+      }
+
+      // evaluate predictions (note: reflect the predictions for all upcoming simulations, thus the corresponding measurements have to be taken afterwards)
+      if( loadEvaluationFrequency > 0 && i % loadEvaluationFrequency == 0 && fileIO)
+      {
+
+         timeloopTiming[loadEvaluationStep].start();
+
+         // write process local load predictions to files (per process, per load balancing step)
+         {
+
+            auto wlLBM = real_t(0);
+            auto wlBH = real_t(0);
+            auto wlCoup1 = real_t(0);
+            auto wlCoup2 = real_t(0);
+            auto wlRB = real_t(0);
+
+            auto & forest = blocks->getBlockForest();
+            pe_coupling::createWithNeighborhood<BoundaryHandling_T>(forest, boundaryHandlingID, bodyStorageID, ccdID, fcdID, numPeSubCycles, *couplingInfoCollection);
+
+            for( auto blockIt = forest.begin(); blockIt != forest.end(); ++blockIt ) {
+               auto * block = static_cast<blockforest::Block *> (&(*blockIt));
+               const auto &blockID = block->getId();
+               auto infoIt = couplingInfoCollection->find(blockID);
+               auto blockInfo = infoIt->second;
+
+               if( useEllipsoids )
+               {
+                  WALBERLA_ABORT("Not yet implemented!");
+               }
+               else
+               {
+                  wlLBM   += fittedLBMWeightEvaluationFunctionSpheres(blockInfo);
+                  wlBH    += fittedBHWeightEvaluationFunctionSpheres(blockInfo);
+                  wlCoup1 += fittedCoupling1WeightEvaluationFunctionSpheres(blockInfo);
+                  wlCoup2 += fittedCoupling2WeightEvaluationFunctionSpheres(blockInfo);
+                  wlRB    += fittedPEWeightEvaluationFunctionSpheres(blockInfo);
+               }
+
+            }
+
+            // note: we count the edge weight doubled here in total (to and from the other process). ParMetis only counts one direction.
+            uint_t edgecut = evaluateEdgeCut(forest);
+
+            uint_t numBlocks = forest.getNumberOfBlocks(finestLevel);
+
+            std::ofstream file;
+            file.open( predictionFileProcessName.c_str(), std::ofstream::app  );
+            file << predictionFileCounter << "\t " << real_c(i) / tRef << "\t"
+                 << wlLBM << "\t" << wlBH << "\t" << wlCoup1 << "\t" << wlCoup2 << "\t" << wlRB << "\t"
+                 << edgecut << "\t" << numBlocks << "\n";
+            file.close();
+
+            predictionFileCounter++;;
+         }
+
+         timeloopTiming[loadEvaluationStep].end();
+
+      }
+
+      // perform a single simulation step
+      timeloop.singleStep( timeloopTiming );
+
+
+      if( logging )
+      {
+         real_t curMeanPos = logger->getMeanPosition();
+         real_t curMeanVel = logger->getMeanVelocity();
+         if( curMeanPos <= terminationPosition && std::fabs(curMeanVel) < terminationVelocity )
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Sediments passed terminal mean position " << terminationPosition << " and reached velocity " << curMeanVel << " - terminating simulation!");
+            break;
+         }
+      }
+
+   }
+
+   timeloopTiming.logResultOnRoot();
+
+
+   return EXIT_SUCCESS;
+}
+
+} // namespace amr_sediment_settling
+
+int main( int argc, char **argv ){
+   amr_sediment_settling::main(argc, argv);
+}
diff --git a/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/CMakeLists.txt b/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d3db5ef36996a33723f2b203c98bc2986fc6f7f
--- /dev/null
+++ b/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/CMakeLists.txt
@@ -0,0 +1,5 @@
+waLBerla_add_executable( NAME WorkloadEvaluation FILES WorkloadEvaluation.cpp DEPENDS blockforest boundary core field lbm pe pe_coupling postprocessing stencil timeloop vtk )
+
+if( WALBERLA_BUILD_WITH_PARMETIS )
+   waLBerla_add_executable( NAME AMRSedimentSettling FILES AMRSedimentSettling.cpp DEPENDS blockforest boundary core field lbm pe pe_coupling postprocessing stencil timeloop vtk )
+endif()
\ No newline at end of file
diff --git a/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/WorkloadEvaluation.cpp b/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/WorkloadEvaluation.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0a4fef688ee44f1e0f1b4bf56be41f9b637cd2ef
--- /dev/null
+++ b/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/WorkloadEvaluation.cpp
@@ -0,0 +1,980 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file WorkLoadEvaluation.cpp
+//! \ingroup pe_coupling
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/Initialization.h"
+#include "blockforest/communication/UniformBufferedScheme.h"
+
+#include "boundary/all.h"
+
+#include "core/DataTypes.h"
+#include "core/Environment.h"
+#include "core/debug/Debug.h"
+#include "core/debug/TestSubsystem.h"
+#include "core/logging/Logging.h"
+#include "core/math/all.h"
+#include "core/SharedFunctor.h"
+#include "core/timing/RemainingTimeLogger.h"
+#include "core/mpi/MPIManager.h"
+#include "core/mpi/Reduce.h"
+#include "core/mpi/Broadcast.h"
+
+#include "domain_decomposition/SharedSweep.h"
+
+#include "field/AddToStorage.h"
+#include "field/communication/PackInfo.h"
+
+#include "lbm/boundary/all.h"
+#include "lbm/communication/PdfFieldPackInfo.h"
+#include "lbm/field/AddToStorage.h"
+#include "lbm/field/MacroscopicValueCalculation.h"
+#include "lbm/field/PdfField.h"
+#include "lbm/lattice_model/D3Q19.h"
+#include "lbm/lattice_model/ForceModel.h"
+#include "lbm/sweeps/CellwiseSweep.h"
+#include "lbm/sweeps/SweepWrappers.h"
+#include "lbm/BlockForestEvaluation.h"
+
+#include "stencil/D3Q27.h"
+
+#include "timeloop/SweepTimeloop.h"
+
+#include "pe/basic.h"
+#include "pe/cr/ICR.h"
+#include "pe/fcd/GJKEPACollideFunctor.h"
+#include "pe/vtk/BodyVtkOutput.h"
+#include "pe/vtk/EllipsoidVtkOutput.h"
+#include "pe/vtk/SphereVtkOutput.h"
+
+#include "pe_coupling/mapping/all.h"
+#include "pe_coupling/momentum_exchange_method/all.h"
+#include "pe_coupling/utility/all.h"
+
+#include "vtk/all.h"
+#include "field/vtk/all.h"
+#include "lbm/vtk/all.h"
+
+#include <vector>
+#include <iomanip>
+#include <iostream>
+
+namespace workload_evaluation
+{
+
+///////////
+// USING //
+///////////
+
+using namespace walberla;
+using walberla::uint_t;
+
+// PDF field, flag field & body field
+typedef lbm::D3Q19< lbm::collision_model::TRT, false>  LatticeModel_T;
+
+typedef LatticeModel_T::Stencil Stencil_T;
+typedef lbm::PdfField< LatticeModel_T > PdfField_T;
+
+typedef walberla::uint8_t                 flag_t;
+typedef FlagField< flag_t >               FlagField_T;
+typedef GhostLayerField< pe::BodyID, 1 >  BodyField_T;
+
+const uint_t FieldGhostLayers = 1;
+
+// boundary handling
+typedef pe_coupling::CurvedLinear< LatticeModel_T, FlagField_T > MO_CLI_T;
+
+typedef boost::tuples::tuple<MO_CLI_T >               BoundaryConditions_T;
+typedef BoundaryHandling< FlagField_T, Stencil_T, BoundaryConditions_T > BoundaryHandling_T;
+
+typedef boost::tuple<pe::Sphere, pe::Ellipsoid, pe::Plane> BodyTypeTuple;
+
+///////////
+// FLAGS //
+///////////
+
+const FlagUID Fluid_Flag   ( "fluid" );
+const FlagUID MO_CLI_Flag  ( "moving obstacle CLI" );
+const FlagUID FormerMO_Flag( "former moving obstacle" );
+
+
+/////////////////////////////////////
+// BOUNDARY HANDLING CUSTOMIZATION //
+/////////////////////////////////////
+
+class MyBoundaryHandling
+{
+public:
+
+   MyBoundaryHandling( const BlockDataID & flagFieldID, const BlockDataID & pdfFieldID, const BlockDataID & bodyFieldID, bool useEntireFieldTraversal ) :
+      flagFieldID_( flagFieldID ), pdfFieldID_( pdfFieldID ), bodyFieldID_ ( bodyFieldID ), useEntireFieldTraversal_( useEntireFieldTraversal ) {}
+
+   BoundaryHandling_T * operator()( IBlock* const block, const StructuredBlockStorage* const storage ) const;
+
+private:
+
+   const BlockDataID flagFieldID_;
+   const BlockDataID pdfFieldID_;
+   const BlockDataID bodyFieldID_;
+   bool useEntireFieldTraversal_;
+
+}; // class MyBoundaryHandling
+
+BoundaryHandling_T * MyBoundaryHandling::operator()( IBlock * const block, const StructuredBlockStorage * const storage ) const
+{
+   WALBERLA_ASSERT_NOT_NULLPTR( block );
+   WALBERLA_ASSERT_NOT_NULLPTR( storage );
+
+   auto * flagField = block->getData< FlagField_T >( flagFieldID_ );
+   auto *  pdfField = block->getData< PdfField_T > ( pdfFieldID_ );
+   auto * bodyField = block->getData< BodyField_T >( bodyFieldID_ );
+
+   const auto fluid = flagField->flagExists( Fluid_Flag ) ? flagField->getFlag( Fluid_Flag ) : flagField->registerFlag( Fluid_Flag );
+
+   BoundaryHandling_T::Mode mode = (useEntireFieldTraversal_) ? BoundaryHandling_T::Mode::ENTIRE_FIELD_TRAVERSAL : BoundaryHandling_T::Mode::OPTIMIZED_SPARSE_TRAVERSAL ;
+
+   BoundaryHandling_T * handling = new BoundaryHandling_T( "fixed obstacle boundary handling", flagField, fluid,
+                                                           boost::tuples::make_tuple( MO_CLI_T ( "MO_CLI", MO_CLI_Flag, pdfField, flagField, bodyField, fluid, *storage, *block ) ),
+                                                           mode);
+
+   // boundaries are set by mapping the planes into the domain
+
+   handling->fillWithDomain( FieldGhostLayers );
+
+   return handling;
+}
+
+
+class CollisionPropertiesEvaluator
+{
+public:
+   explicit CollisionPropertiesEvaluator( pe::cr::ICR & collisionResponse ) : collisionResponse_( collisionResponse )
+   {}
+
+   real_t get()
+   {
+      real_t maxPen = std::fabs(collisionResponse_.getMaximumPenetration());
+      WALBERLA_MPI_SECTION()
+      {
+         mpi::allReduceInplace( maxPen, mpi::MAX );
+      }
+      return maxPen;
+   }
+
+private:
+   pe::cr::ICR & collisionResponse_;
+};
+
+class ContactDistanceEvaluator
+{
+public:
+   ContactDistanceEvaluator( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID ccdID, const BlockDataID fcdID ) :
+   blocks_( blocks ), ccdID_(ccdID), fcdID_(fcdID)
+   {}
+
+   real_t get()
+   {
+      auto maximumPenetration = real_t(0);
+      for (auto it = blocks_->begin(); it != blocks_->end(); ++it) {
+         IBlock &currentBlock = *it;
+
+         auto *ccd = currentBlock.getData<pe::ccd::ICCD>(ccdID_);
+         auto *fcd = currentBlock.getData<pe::fcd::IFCD>(fcdID_);
+         ccd->generatePossibleContacts();
+         pe::Contacts& contacts = fcd->generateContacts( ccd->getPossibleContacts() );
+         size_t numContacts( contacts.size() );
+
+         for( size_t i = 0; i < numContacts; ++i )
+         {
+            const pe::ContactID c( &contacts[i] );
+            maximumPenetration = std::max( maximumPenetration, std::fabs(c->getDistance()));
+         }
+      }
+      WALBERLA_MPI_SECTION()
+      {
+         mpi::allReduceInplace( maximumPenetration, mpi::MAX );
+      }
+      return maximumPenetration;
+   }
+private:
+   shared_ptr< StructuredBlockStorage > blocks_;
+   const BlockDataID ccdID_;
+   const BlockDataID fcdID_;
+};
+
+class MaxVelocityEvaluator
+{
+public:
+   MaxVelocityEvaluator( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID bodyStorageID ) :
+         blocks_( blocks ), bodyStorageID_(bodyStorageID)
+   {}
+
+   Vector3<real_t> get()
+   {
+      auto maxVelX = real_t(0);
+      auto maxVelY = real_t(0);
+      auto maxVelZ = real_t(0);
+
+      for (auto it = blocks_->begin(); it != blocks_->end(); ++it) {
+
+         for( auto bodyIt = pe::LocalBodyIterator::begin(*it, bodyStorageID_ ); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt ) {
+            auto vel = bodyIt->getLinearVel();
+            maxVelX = std::max(maxVelX, std::fabs(vel[0]));
+            maxVelY = std::max(maxVelY, std::fabs(vel[1]));
+            maxVelZ = std::max(maxVelZ, std::fabs(vel[2]));
+         }
+      }
+      WALBERLA_MPI_SECTION()
+      {
+         mpi::allReduceInplace( maxVelX, mpi::MAX );
+         mpi::allReduceInplace( maxVelY, mpi::MAX );
+         mpi::allReduceInplace( maxVelZ, mpi::MAX );
+      }
+      return Vector3<real_t>(maxVelX, maxVelY, maxVelZ);
+   }
+
+   real_t getMagnitude()
+   {
+      auto magnitude = real_t(0);
+
+      for (auto it = blocks_->begin(); it != blocks_->end(); ++it) {
+
+         for( auto bodyIt = pe::LocalBodyIterator::begin(*it, bodyStorageID_ ); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt ) {
+            magnitude = std::max(magnitude, bodyIt->getLinearVel().length());
+         }
+      }
+      WALBERLA_MPI_SECTION()
+      {
+         mpi::allReduceInplace( magnitude, mpi::MAX );
+      }
+      return magnitude;
+
+   }
+
+private:
+   shared_ptr< StructuredBlockStorage > blocks_;
+   const BlockDataID bodyStorageID_;
+};
+
+void evaluateFluidQuantities(const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID boundaryHandlingID,
+                             uint_t & numCells, uint_t & numFluidCells, uint_t & numNBCells )
+{
+
+   for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+   {
+      auto * boundaryHandling = blockIt->getData< BoundaryHandling_T >( boundaryHandlingID );
+      auto xyzSize = boundaryHandling->getFlagField()->xyzSize();
+      numCells += xyzSize.numCells();
+
+      for(auto z = cell_idx_t(xyzSize.zMin()); z <= cell_idx_t(xyzSize.zMax()); ++z ){
+         for(auto y = cell_idx_t(xyzSize.yMin()); y <= cell_idx_t(xyzSize.yMax()); ++y ){
+            for(auto x = cell_idx_t(xyzSize.xMin()); x <= cell_idx_t(xyzSize.xMax()); ++x ) {
+               if (boundaryHandling->isDomain(x, y, z)) {
+                  ++numFluidCells;
+               }
+               if (boundaryHandling->isNearBoundary(x, y, z)) {
+                  ++numNBCells;
+               }
+            }
+         }
+      }
+   }
+}
+
+void evaluatePEQuantities( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID bodyStorageID,
+                           const pe::cr::ICR & cr,
+                           uint_t & numLocalParticles, uint_t & numShadowParticles, uint_t & numContacts)
+{
+
+   for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) {
+      auto * bodyStorage = blockIt->getData<pe::Storage>(bodyStorageID);
+      pe::BodyStorage const & localStorage  = (*bodyStorage)[pe::StorageType::LOCAL];
+      pe::BodyStorage const & shadowStorage = (*bodyStorage)[pe::StorageType::SHADOW];
+      numLocalParticles += localStorage.size();
+      numShadowParticles += shadowStorage.size();
+
+      numContacts += cr.getNumberOfContactsTreated();
+   }
+}
+
+void evaluateTimers(WcTimingPool & timingPool, WcTimingTree & peTimingTree,
+                    const std::vector<std::vector<std::string> > & timerKeys,
+                    std::vector<real_t> & timings )
+{
+
+   for (auto & timingsIt : timings)
+   {
+      timingsIt = real_t(0);
+   }
+
+   timingPool.unifyRegisteredTimersAcrossProcesses();
+   peTimingTree.synchronize();
+
+   auto scalingFactor = real_t(1000); // milliseconds
+
+   for (auto i = uint_t(0); i < timerKeys.size(); ++i )
+   {
+      auto keys = timerKeys[i];
+      for (const auto &timerName : keys)
+      {
+         if(timingPool.timerExists(timerName))
+         {
+            timings[i] += real_c(timingPool[timerName].total()) * scalingFactor;
+         }
+         if(peTimingTree.timerExists(timerName))
+         {
+            timings[i] += real_c(peTimingTree[timerName].total()) * scalingFactor;
+         }
+      }
+
+   }
+}
+
+void resetTimers(WcTimingPool & timingPool, WcTimingTree & peTimingTree)
+{
+   timingPool.clear();
+   peTimingTree.reset();
+}
+
+//*******************************************************************************************************************
+/*! Application to evaluate the workload (time measurements) for a fluid-particle simulation
+ *
+ * This application is used in the paper
+ *  Rettinger, Ruede - "Dynamic Load Balancing Techniques for Particulate Flow Simulations", submitted to Computation
+ * in Section 3 to develop and calibrate the workload estimator.
+ * The setup features settling particle inside a horizontally periodic box.
+ * A comprehensive description is given in Sec. 3.3 of the paper.
+ * It uses 4 x 4 x 5 blocks for domain partitioning.
+ * For each block ( = each process), the block local quantities are evaluated as well as the timing infos of
+ * the fluid-particle interaction algorithm. Those infos are then written to files that can be used later on
+ * for function fitting to obtain a workload estimator.
+ *
+ * NOTE: Since this estimator relies on timing measurements, this evaluation procedure should be carried out everytime
+ * a different implementation, hardware or algorithm is used.
+ *
+ */
+//*******************************************************************************************************************
+int main( int argc, char **argv )
+{
+   debug::enterTestMode();
+
+   mpi::Environment env( argc, argv );
+
+
+   auto solidVolumeFraction = real_t(0.2);
+
+   // LBM / numerical parameters
+   auto blockSize  = uint_t(32);
+   auto uSettling = real_t(0.1); // characteristic settling velocity
+   auto diameter = real_t(10);
+
+   auto Ga = real_t(30); //Galileo number
+   auto numPeSubCycles = uint_t(10);
+
+   auto vtkIOFreq = uint_t(0);
+   auto timestepsNonDim = real_t(2.5);
+   auto numSamples = uint_t(2000);
+   std::string baseFolder = "workload_files"; // folder for vtk and file output
+
+   bool useEllipsoids = false;
+   bool optimizeForSmallObstacleFraction = false;
+   bool noFileOutput = false;
+   bool fixBodies = false;
+   bool useEntireFieldTraversal = true;
+   bool averageForceTorqueOverTwoTimSteps = true;
+   bool useFusedStreamCollide = false;
+
+
+   for( int i = 1; i < argc; ++i )
+   {
+      if( std::strcmp( argv[i], "--vtkIOFreq"               ) == 0 ) { vtkIOFreq = uint_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--noFileOutput"            ) == 0 ) { noFileOutput = true; continue; }
+      if( std::strcmp( argv[i], "--basefolder"              ) == 0 ) { baseFolder = argv[++i]; continue; }
+      if( std::strcmp( argv[i], "--solidVolumeFraction"     ) == 0 ) { solidVolumeFraction = real_c(std::atof( argv[++i] )); continue; }
+      if( std::strcmp( argv[i], "--diameter"                ) == 0 ) { diameter = real_c(std::atof( argv[++i] )); continue; }
+      if( std::strcmp( argv[i], "--blockSize"               ) == 0 ) { blockSize = uint_c(std::atof( argv[++i]) ); continue; }
+      if( std::strcmp( argv[i], "--uSettling"               ) == 0 ) { uSettling = real_c(std::atof( argv[++i] )); continue; }
+      if( std::strcmp( argv[i], "--Ga"                      ) == 0 ) { Ga = real_c(std::atof( argv[++i] )); continue; }
+      if( std::strcmp( argv[i], "--timestepsNonDim"         ) == 0 ) { timestepsNonDim = real_c(std::atof( argv[++i] )); continue; }
+      if( std::strcmp( argv[i], "--numPeSubCycles"          ) == 0 ) { numPeSubCycles = uint_c(std::atof( argv[++i] )); continue; }
+      if( std::strcmp( argv[i], "--useEllipsoids"           ) == 0 ) { useEllipsoids = true; continue; }
+      if( std::strcmp( argv[i], "--optSmallSVF"             ) == 0 ) { optimizeForSmallObstacleFraction = true; continue; }
+      if( std::strcmp( argv[i], "--fixBodies"               ) == 0 ) { fixBodies = true; continue; }
+      if( std::strcmp( argv[i], "--useEntireFieldTraversal" ) == 0 ) { useEntireFieldTraversal = true; continue; }
+      if( std::strcmp( argv[i], "--numSamples"              ) == 0 ) { numSamples = uint_c(std::atof( argv[++i] )); continue; }
+      if( std::strcmp( argv[i], "--noForceAveraging"        ) == 0 ) { averageForceTorqueOverTwoTimSteps = false; continue; }
+      if( std::strcmp( argv[i], "--useFusedStreamCollide"   ) == 0 ) { useFusedStreamCollide = true; continue; }
+      WALBERLA_ABORT("Unrecognized command line argument found: " << argv[i]);
+   }
+
+   WALBERLA_CHECK(diameter > real_t(1));
+   WALBERLA_CHECK(uSettling > real_t(0));
+   WALBERLA_CHECK(Ga > real_t(0));
+   WALBERLA_CHECK(solidVolumeFraction > real_t(0));
+   WALBERLA_CHECK(solidVolumeFraction < real_t(0.65));
+
+   ///////////////////////////
+   // SIMULATION PROPERTIES //
+   ///////////////////////////
+
+   const auto XBlocks = uint_t(4);
+   const auto YBlocks = uint_t(4);
+   const auto ZBlocks = uint_t(5);
+
+   if( MPIManager::instance()->numProcesses() != XBlocks * YBlocks * ZBlocks )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT("WARNING! You have specified less or more processes than number of blocks -> the time measurements are no longer blockwise!")
+   }
+
+   if( diameter > real_c(blockSize) )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT("The bodies might be too large to work with the currently used synchronization!");
+   }
+
+
+   WALBERLA_LOG_INFO_ON_ROOT("Using setup with sedimenting particles -> creating two planes and applying gravitational force")
+   if( useEllipsoids ){ WALBERLA_LOG_INFO_ON_ROOT("using ELLIPSOIDS"); }
+   else{ WALBERLA_LOG_INFO_ON_ROOT("using SPHERES"); }
+
+
+   const uint_t XCells = blockSize * XBlocks;
+   const uint_t YCells = blockSize * YBlocks;
+   const uint_t ZCells = blockSize * ZBlocks;
+
+   const real_t topWallOffset = real_t(1.05) * real_t(blockSize); // move the top wall downwards to take away a certain portion of the overall domain
+
+
+   // determine number of spheres to generate, if necessary scale diameter a bit to reach desired solid volume fraction
+   real_t domainHeight = real_c(ZCells) - topWallOffset;
+   real_t fluidVolume =  real_c( XCells * YCells ) * domainHeight;
+   real_t solidVolume = solidVolumeFraction * fluidVolume;
+   uint_t numberOfParticles = uint_c(std::ceil(solidVolume / ( math::M_PI / real_t(6) * diameter * diameter * diameter )));
+   diameter = std::cbrt( solidVolume / ( real_c(numberOfParticles) * math::M_PI / real_t(6) ) );
+
+   auto densityRatio = real_t(2.5);
+
+   real_t viscosity = uSettling * diameter / Ga;
+   const real_t omega = lbm::collision_model::omegaFromViscosity(viscosity);
+
+   const real_t gravitationalAcceleration = uSettling * uSettling / ( (densityRatio-real_t(1)) * diameter );
+
+   real_t tref = diameter / uSettling;
+   real_t Tref = domainHeight / uSettling;
+
+   uint_t timesteps = uint_c(timestepsNonDim * Tref);
+
+   const real_t dx = real_c(1);
+   WALBERLA_LOG_INFO_ON_ROOT("viscosity = " << viscosity);
+   WALBERLA_LOG_INFO_ON_ROOT("tau = " << real_t(1)/omega);
+   WALBERLA_LOG_INFO_ON_ROOT("diameter = " << diameter);
+   WALBERLA_LOG_INFO_ON_ROOT("solid volume fraction = " << solidVolumeFraction);
+   WALBERLA_LOG_INFO_ON_ROOT("domain size (in cells) = " << XCells << " x " << ZCells << " x " << ZCells);
+   WALBERLA_LOG_INFO_ON_ROOT("number of bodies = " << numberOfParticles);
+   WALBERLA_LOG_INFO_ON_ROOT("gravitational acceleration = " << gravitationalAcceleration);
+   WALBERLA_LOG_INFO_ON_ROOT("Ga = " << Ga);
+   WALBERLA_LOG_INFO_ON_ROOT("uSettling = " << uSettling);
+   WALBERLA_LOG_INFO_ON_ROOT("tref = " << tref);
+   WALBERLA_LOG_INFO_ON_ROOT("Tref = " << Tref);
+   WALBERLA_LOG_INFO_ON_ROOT("timesteps = " << timesteps);
+   WALBERLA_LOG_INFO_ON_ROOT("number of workload samples = " << numSamples);
+
+   // create folder to store logging files
+   WALBERLA_ROOT_SECTION()
+   {
+      walberla::filesystem::path path1( baseFolder );
+      if( !walberla::filesystem::exists( path1 ) )
+         walberla::filesystem::create_directory( path1 );
+   }
+
+
+   ///////////////////////////
+   // BLOCK STRUCTURE SETUP //
+   ///////////////////////////
+
+   Vector3<bool> periodicity( true );
+   periodicity[2] = false;
+
+   // create domain
+   shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid( XBlocks, YBlocks, ZBlocks, blockSize, blockSize, blockSize, dx,
+                                                    0, false, false, //one block per process!
+                                                    periodicity[0], periodicity[1], periodicity[2], //periodicity
+                                                    false );
+
+   ////////
+   // PE //
+   ////////
+
+   shared_ptr<pe::BodyStorage> globalBodyStorage = make_shared<pe::BodyStorage>();
+   pe::SetBodyTypeIDs<BodyTypeTuple>::execute();
+   auto bodyStorageID = blocks->addBlockData(pe::createStorageDataHandling<BodyTypeTuple>(), "pe Body Storage");
+   auto ccdID = blocks->addBlockData(pe::ccd::createHashGridsDataHandling( globalBodyStorage, bodyStorageID ), "CCD");
+   BlockDataID fcdID   = (useEllipsoids) ? blocks->addBlockData( pe::fcd::createGenericFCDDataHandling<BodyTypeTuple, pe::fcd::GJKEPACollideFunctor>(), "FCD" )
+                                         : blocks->addBlockData(pe::fcd::createGenericFCDDataHandling<BodyTypeTuple, pe::fcd::AnalyticCollideFunctor>(), "FCD");
+
+   WcTimingTree timingTreePE;
+
+   pe::cr::HCSITS cr(globalBodyStorage, blocks->getBlockStoragePointer(), bodyStorageID, ccdID, fcdID, &timingTreePE );
+   cr.setMaxIterations(10);
+   cr.setRelaxationModel( pe::cr::HardContactSemiImplicitTimesteppingSolvers::ApproximateInelasticCoulombContactByDecoupling );
+   cr.setErrorReductionParameter(real_t(0.8));
+
+   /////////////////
+   // PE COUPLING //
+   /////////////////
+
+   // connect to pe
+   const real_t overlap = real_c( 1.5 ) * dx;
+
+   std::function<void(void)> syncCall = std::bind( pe::syncNextNeighbors<BodyTypeTuple>, std::ref(blocks->getBlockForest()), bodyStorageID, &timingTreePE, overlap, false );
+
+   auto generationDomain = AABB( real_t(0), real_t(0), real_t(0), real_c(XCells), real_c(YCells), real_c(ZCells) - topWallOffset);
+   auto peMaterial = pe::createMaterial( "mat", densityRatio, real_t(1), real_t(0.25), real_t(0.25), real_t(0), real_t(200), real_t(100), real_t(100), real_t(100) );
+
+   // create two planes at bottom and top of domain
+   pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,0,1), Vector3<real_t>(0,0,0), peMaterial );
+   pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,0,-1), Vector3<real_t>(0,0,real_c(ZCells)-topWallOffset), peMaterial );
+
+   auto xParticle = real_t(0);
+   auto yParticle = real_t(0);
+   auto zParticle = real_t(0);
+
+   for( uint_t nPart = 0; nPart < numberOfParticles; ++nPart )
+   {
+
+      WALBERLA_ROOT_SECTION()
+      {
+         xParticle = math::realRandom<real_t>(generationDomain.xMin(), generationDomain.xMax());
+         yParticle = math::realRandom<real_t>(generationDomain.yMin(), generationDomain.yMax());
+         zParticle = math::realRandom<real_t>(generationDomain.zMin(), generationDomain.zMax());
+      }
+
+      WALBERLA_MPI_SECTION()
+      {
+         mpi::broadcastObject( xParticle );
+         mpi::broadcastObject( yParticle );
+         mpi::broadcastObject( zParticle );
+      }
+
+      if( useEllipsoids )
+      {
+         // prolate ellipsoids
+         auto axisFactor = real_t(1.5);
+         real_t axisFactor2 = std::sqrt(real_t(1)/axisFactor);
+         real_t radius = diameter * real_t(0.5);
+         pe::createEllipsoid( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, Vector3<real_t>( xParticle, yParticle, zParticle ), Vector3<real_t>(axisFactor*radius, axisFactor2*radius, axisFactor2*radius), peMaterial );
+
+      } else{
+         pe::createSphere( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, Vector3<real_t>( xParticle, yParticle, zParticle ), diameter * real_t(0.5), peMaterial );
+      }
+
+   }
+
+   syncCall();
+
+   // resolve possible overlaps of the particles due to the random initialization
+
+   // 100 iterations of solver to resolve all major overlaps
+   {
+      for (auto pet = uint_t(1); pet <= uint_t(100); ++pet )
+      {
+         cr.timestep( real_t(1) );
+         syncCall();
+
+         // reset all velocities to zero
+         for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+         {
+            for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+            {
+               bodyIt->setLinearVel(Vector3<real_t>(real_t(0)));
+               bodyIt->setAngularVel(Vector3<real_t>(real_t(0)));
+            }
+         }
+      }
+   }
+
+
+   // resolve remaining overlaps via particle simulation
+   {
+      const auto initialPeSteps = uint_t(2000);
+      const auto dt_PE_init = real_t(1);
+      const real_t overlapLimit = real_t(0.001) * diameter;
+
+      WALBERLA_LOG_INFO_ON_ROOT("Particle creation done --- resolving overlaps with goal all < " << overlapLimit / diameter * real_t(100) << "%");
+
+      CollisionPropertiesEvaluator collisionPropertiesEvaluator( cr );
+
+      ContactDistanceEvaluator contactDistanceEvaluator(blocks, ccdID, fcdID);
+      MaxVelocityEvaluator maxVelEvaluator(blocks, bodyStorageID);
+
+      for(auto pet = uint_t(1); pet <= initialPeSteps; ++pet )
+      {
+         cr.timestep( dt_PE_init );
+         syncCall();
+         real_t maxPen = collisionPropertiesEvaluator.get();
+
+         if( maxPen < overlapLimit )
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Carried out " << pet << " PE-only time steps to resolve initial overlaps");
+            WALBERLA_LOG_INFO_ON_ROOT("Final max penetration from cr is " << maxPen << " = " << maxPen / diameter * real_t(100) << "%");
+
+            break;
+         }
+
+         real_t maxMagnitude = maxVelEvaluator.getMagnitude();
+
+         if( maxMagnitude * dt_PE_init > overlapLimit)
+         {
+            // avoid too large response velocities by setting them to zero
+            for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+            {
+               for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+               {
+                  bodyIt->setLinearVel(Vector3<real_t>(real_t(0)));
+                  bodyIt->setAngularVel(Vector3<real_t>(real_t(0)));
+               }
+            }
+         }
+         else
+         {
+            cr.setErrorReductionParameter(real_t(0.8));
+         }
+
+         if( pet % uint_t(20) == uint_t(0) )
+         {
+            WALBERLA_LOG_INFO_ON_ROOT(pet << " - current max overlap = " << maxPen / diameter * real_t(100) << "%, max vel magnitude = " << maxMagnitude );
+         }
+      }
+   }
+
+   // reset all velocities to zero
+   Vector3<real_t> initialBodyVelocity(real_t(0));
+
+   WALBERLA_LOG_INFO_ON_ROOT("Setting initial velocity " << initialBodyVelocity << " of all bodies");
+   for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+   {
+      for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+      {
+         bodyIt->setLinearVel(initialBodyVelocity);
+         bodyIt->setAngularVel(Vector3<real_t>(real_t(0)));
+      }
+   }
+
+   ///////////////////////
+   // ADD DATA TO BLOCKS //
+   ////////////////////////
+
+   // create the lattice model
+   LatticeModel_T latticeModel = LatticeModel_T( lbm::collision_model::TRT::constructWithMagicNumber( omega ) );
+
+   // add PDF field
+   BlockDataID pdfFieldID = lbm::addPdfFieldToStorage< LatticeModel_T >( blocks, "pdf field (zyxf)", latticeModel,
+                                                                         Vector3< real_t >( real_t(0) ), real_t(1),
+                                                                         uint_t(1), field::zyxf );
+
+   // add flag field
+   BlockDataID flagFieldID = field::addFlagFieldToStorage<FlagField_T>( blocks, "flag field" );
+
+   // add body field
+   BlockDataID bodyFieldID = field::addToStorage<BodyField_T>( blocks, "body field", nullptr, field::zyxf );
+
+   // add boundary handling & initialize outer domain boundaries
+   BlockDataID boundaryHandlingID = blocks->addStructuredBlockData< BoundaryHandling_T >(
+                                    MyBoundaryHandling( flagFieldID, pdfFieldID, bodyFieldID, useEntireFieldTraversal ), "boundary handling" );
+
+
+   // initially map pe bodies into the LBM simulation
+   pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, *globalBodyStorage, bodyFieldID, MO_CLI_Flag );
+
+   lbm::BlockForestEvaluation<FlagField_T> bfEval(blocks, flagFieldID, Fluid_Flag);
+
+   WALBERLA_LOG_INFO_ON_ROOT(bfEval.loggingString());
+
+   ///////////////
+   // TIME LOOP //
+   ///////////////
+
+   // create the timeloop
+   SweepTimeloop timeloop( blocks->getBlockStorage(), timesteps );
+
+   if( vtkIOFreq != uint_t(0) )
+   {
+      // pe bodies
+      if(useEllipsoids)
+      {
+         auto bodyVtkOutput = make_shared<pe::EllipsoidVtkOutput>( bodyStorageID, blocks->getBlockStorage() );
+         auto bodyVTK = vtk::createVTKOutput_PointData( bodyVtkOutput, "bodies", vtkIOFreq, baseFolder );
+         timeloop.addFuncBeforeTimeStep( vtk::writeFiles( bodyVTK ), "VTK (body data)" );
+
+      }else
+      {
+         auto bodyVtkOutput = make_shared<pe::SphereVtkOutput>( bodyStorageID, blocks->getBlockStorage() );
+         auto bodyVTK = vtk::createVTKOutput_PointData( bodyVtkOutput, "bodies", vtkIOFreq, baseFolder );
+         timeloop.addFuncBeforeTimeStep( vtk::writeFiles( bodyVTK ), "VTK (body data)" );
+      }
+
+
+      // flag field
+      auto flagFieldVTK = vtk::createVTKOutput_BlockData( blocks, "flag_field", vtkIOFreq, 1, false, baseFolder );
+      flagFieldVTK->addCellDataWriter( make_shared< field::VTKWriter< FlagField_T > >( flagFieldID, "FlagField" ) );
+      timeloop.addFuncAfterTimeStep( vtk::writeFiles( flagFieldVTK ), "VTK (flag field data)" );
+
+      // pdf field
+      auto pdfFieldVTK = vtk::createVTKOutput_BlockData( blocks, "fluid_field", vtkIOFreq, 0, false, baseFolder );
+
+      field::FlagFieldCellFilter< FlagField_T > fluidFilter( flagFieldID );
+      fluidFilter.addFlag( Fluid_Flag );
+      pdfFieldVTK->addCellInclusionFilter( fluidFilter );
+
+      pdfFieldVTK->addCellDataWriter( make_shared< lbm::VelocityVTKWriter< LatticeModel_T, float > >( pdfFieldID, "VelocityFromPDF" ) );
+      pdfFieldVTK->addCellDataWriter( make_shared< lbm::DensityVTKWriter < LatticeModel_T, float > >( pdfFieldID, "DensityFromPDF" ) );
+
+      timeloop.addFuncBeforeTimeStep( vtk::writeFiles( pdfFieldVTK ), "VTK (fluid field data)" );
+
+      auto domainDecompVTK = vtk::createVTKOutput_DomainDecomposition(blocks, "domain_decomposition", vtkIOFreq, baseFolder );
+      timeloop.addFuncBeforeTimeStep( vtk::writeFiles(domainDecompVTK), "VTK (domain decomposition)");
+   }
+
+   // sweep for updating the pe body mapping into the LBM simulation
+   timeloop.add()
+         << Sweep( pe_coupling::BodyMapping< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, globalBodyStorage, bodyFieldID, MO_CLI_Flag, FormerMO_Flag, pe_coupling::selectRegularBodies ), "Body Mapping" );
+
+   // sweep for restoring PDFs in cells previously occupied by pe bodies
+   typedef pe_coupling::EquilibriumReconstructor< LatticeModel_T, BoundaryHandling_T > Reconstructor_T;
+   Reconstructor_T reconstructor( blocks, boundaryHandlingID, pdfFieldID, bodyFieldID);
+   timeloop.add()
+         << Sweep( pe_coupling::PDFReconstruction< LatticeModel_T, BoundaryHandling_T, Reconstructor_T >
+                         ( blocks, boundaryHandlingID, bodyStorageID, globalBodyStorage, bodyFieldID, reconstructor, FormerMO_Flag, Fluid_Flag, pe_coupling::selectRegularBodies, optimizeForSmallObstacleFraction ), "PDF Restore" );
+
+   shared_ptr<pe_coupling::BodiesForceTorqueContainer> bodiesFTContainer1 = make_shared<pe_coupling::BodiesForceTorqueContainer>(blocks, bodyStorageID);
+   std::function<void(void)> storeForceTorqueInCont1 = std::bind(&pe_coupling::BodiesForceTorqueContainer::store, bodiesFTContainer1);
+   shared_ptr<pe_coupling::BodiesForceTorqueContainer> bodiesFTContainer2 = make_shared<pe_coupling::BodiesForceTorqueContainer>(blocks, bodyStorageID);
+   std::function<void(void)> setForceTorqueOnBodiesFromCont2 = std::bind(&pe_coupling::BodiesForceTorqueContainer::setOnBodies, bodiesFTContainer2);
+   shared_ptr<pe_coupling::ForceTorqueOnBodiesScaler> forceScaler = make_shared<pe_coupling::ForceTorqueOnBodiesScaler>(blocks, bodyStorageID, real_t(1));
+   std::function<void(void)> setForceScalingFactorToHalf = std::bind(&pe_coupling::ForceTorqueOnBodiesScaler::resetScalingFactor,forceScaler,real_t(0.5));
+
+   if( averageForceTorqueOverTwoTimSteps ) {
+      bodiesFTContainer2->store();
+   }
+
+
+   // setup of the LBM communication for synchronizing the pdf field between neighboring blocks
+   std::function< void () > commFunction;
+
+   blockforest::communication::UniformBufferedScheme< stencil::D3Q27 > scheme( blocks );
+   scheme.addPackInfo( make_shared< field::communication::PackInfo< PdfField_T > >( pdfFieldID ) );
+   commFunction = scheme;
+
+   auto sweep = lbm::makeCellwiseSweep< LatticeModel_T, FlagField_T >( pdfFieldID, flagFieldID, Fluid_Flag );
+
+   if( !useFusedStreamCollide )
+   {
+      // streaming & collide
+      timeloop.add() << Sweep( makeCollideSweep(sweep), "Collide" );
+   }
+
+   // add LBM communication function and boundary handling sweep (does the hydro force calculations and the no-slip treatment)
+   timeloop.add() << BeforeFunction( commFunction, "LBM Communication" )
+                  << Sweep( BoundaryHandling_T::getBlockSweep( boundaryHandlingID ), "Boundary Handling" );
+
+   if( useFusedStreamCollide )
+   {
+      // streaming & collide
+      timeloop.add() << Sweep( makeSharedSweep(sweep), "Stream&Collide" );
+   } else
+   {
+      // streaming & collide
+      timeloop.add() << Sweep( makeStreamSweep(sweep), "Stream" );
+
+   }
+
+   // Averaging the force/torque over two time steps is said to damp oscillations of the interaction force/torque.
+   // See Ladd - " Numerical simulations of particulate suspensions via a discretized Boltzmann equation. Part 1. Theoretical foundation", 1994, p. 302
+   if( averageForceTorqueOverTwoTimSteps ) {
+
+      // store force/torque from hydrodynamic interactions in container1
+      timeloop.addFuncAfterTimeStep(storeForceTorqueInCont1, "Force Storing");
+
+      // set force/torque from previous time step (in container2)
+      timeloop.addFuncAfterTimeStep(setForceTorqueOnBodiesFromCont2, "Force setting");
+
+      // average the force/torque by scaling it with factor 1/2 (except in first timestep, there it is 1, which it is initially)
+      timeloop.addFuncAfterTimeStep( pe_coupling::ForceTorqueOnBodiesScaler(blocks, bodyStorageID, real_t(0.5)),  "Force averaging");
+      timeloop.addFuncAfterTimeStep( setForceScalingFactorToHalf, "Force scaling adjustment" );
+
+      // swap containers
+      timeloop.addFuncAfterTimeStep( pe_coupling::BodyContainerSwapper( bodiesFTContainer1, bodiesFTContainer2 ), "Swap FT container" );
+
+   }
+
+   real_t sphereVolume = diameter * diameter * diameter * math::M_PI / real_t(6);
+   Vector3<real_t> gravitationalForce( real_t(0), real_t(0), -(densityRatio - real_t(1)) * gravitationalAcceleration * sphereVolume );
+   timeloop.addFuncAfterTimeStep(pe_coupling::ForceOnBodiesAdder( blocks, bodyStorageID, gravitationalForce ), "Gravitational force" );
+
+   if( fixBodies ) {
+      // reset all forces
+      timeloop.addFuncAfterTimeStep( pe_coupling::ForceTorqueOnBodiesResetter(blocks, bodyStorageID), "Force Resetting");
+   } else{
+      // add pe timesteps
+      timeloop.addFuncAfterTimeStep( pe_coupling::TimeStep( blocks, bodyStorageID, cr, syncCall, real_t(1), numPeSubCycles ), "pe Time Step" );
+   }
+
+   timeloop.addFuncAfterTimeStep( RemainingTimeLogger( timeloop.getNrOfTimeSteps() ), "Remaining Time Logger" );
+
+
+
+   ////////////////////////
+   // EXECUTE SIMULATION //
+   ////////////////////////
+
+   WcTimingPool timeloopTiming;
+
+   std::vector< std::vector<std::string> > timerKeys;
+   std::vector<std::string> LBMTimer;
+   LBMTimer.emplace_back("Stream&Collide");
+   LBMTimer.emplace_back("Stream");
+   LBMTimer.emplace_back("Collide");
+   timerKeys.push_back(LBMTimer);
+
+   std::vector<std::string> bhTimer;
+   bhTimer.emplace_back("Boundary Handling");
+   timerKeys.push_back(bhTimer);
+
+   std::vector<std::string> couplingTimer1;
+   couplingTimer1.emplace_back("Body Mapping");
+   std::vector<std::string> couplingTimer2;
+   couplingTimer2.emplace_back("PDF Restore");
+   timerKeys.push_back(couplingTimer1);
+   timerKeys.push_back(couplingTimer2);
+
+   std::vector<std::string> peTimer;
+   peTimer.emplace_back("Simulation Step.Collision Detection");
+   peTimer.emplace_back("Simulation Step.Collision Response Integration");
+   peTimer.emplace_back("Simulation Step.Collision Response Resolution.Collision Response Solving");
+   timerKeys.push_back(peTimer);
+
+   uint_t numCells, numFluidCells, numNBCells, numLocalParticles, numShadowParticles, numContacts;
+   numCells = uint_t(0);
+   numFluidCells = uint_t(0);
+   numNBCells = uint_t(0);
+   numLocalParticles = uint_t(0);
+   numShadowParticles = uint_t(0);
+   numContacts = uint_t(0);
+
+   std::vector<real_t> timings(timerKeys.size());
+
+   resetTimers(timeloopTiming, timingTreePE);
+
+   // every rank writes its own file -> numProcesses number of samples!
+   int myRank = MPIManager::instance()->rank();
+
+   std::string logFileName = baseFolder + "/load";
+   logFileName += "_settling";
+   if( useEllipsoids)
+   {
+      logFileName += "_ellipsoids";
+   }
+   else
+   {
+      logFileName += "_spheres";
+   }
+   logFileName += "_d" + std::to_string(int_c(std::ceil(diameter)));
+   logFileName += "_bs" + std::to_string(blockSize);
+   logFileName += "_" + std::to_string(myRank) + ".txt";
+
+
+   std::ofstream file;
+
+   if(!noFileOutput)
+   {
+      WALBERLA_LOG_INFO_ON_ROOT("Writing load info to file " << logFileName);
+      file.open( logFileName.c_str(), std::ofstream::app );
+      file << "# svf = " << solidVolumeFraction << ", d = " << diameter << ", domain = " << XCells << "x" << YCells << "x" << ZCells << "\n";
+   }
+
+
+   auto timeStepOfFirstTiming = uint_t(50);
+
+   if( timesteps - timeStepOfFirstTiming < numSamples )
+   {
+      WALBERLA_LOG_WARNING_ON_ROOT("Less actual time steps than number of required samples!");
+   }
+
+   uint_t nSample( 0 ); // number of current sample
+   real_t samplingFrequency = real_c(timesteps - timeStepOfFirstTiming) / real_c(numSamples);
+
+   // time loop
+   for (uint_t i = 1; i <= timesteps; ++i )
+   {
+      // perform a single simulation step
+      timeloop.singleStep( timeloopTiming );
+
+      // check if current time step should be included in sample
+      if( i >= uint_c( samplingFrequency * real_c(nSample) ) + timeStepOfFirstTiming )
+      {
+         // include -> evaluate all timers and quantities
+
+         evaluateFluidQuantities(blocks, boundaryHandlingID, numCells, numFluidCells, numNBCells);
+         evaluatePEQuantities(blocks, bodyStorageID, cr, numLocalParticles, numShadowParticles, numContacts);
+
+         evaluateTimers(timeloopTiming, timingTreePE, timerKeys, timings);
+
+         if(!noFileOutput)
+         {
+            real_t totalTime = std::accumulate(timings.begin(), timings.end(), real_t(0) );
+
+            file << timeloop.getCurrentTimeStep() << " " << real_c(timeloop.getCurrentTimeStep()) / Tref << " "
+                 << numCells << " " << numFluidCells << " " << numNBCells << " "
+                 << numLocalParticles << " " << numShadowParticles << " " << numContacts << " " << numPeSubCycles;
+            for (real_t timing : timings) {
+               file << " " << timing;
+            }
+            file << " " << totalTime << "\n";
+         }
+
+         numCells = uint_t(0);
+         numFluidCells = uint_t(0);
+         numNBCells = uint_t(0);
+         numLocalParticles = uint_t(0);
+         numShadowParticles = uint_t(0);
+         numContacts = uint_t(0);
+
+         ++nSample;
+      }
+
+      // reset timers to always include only a single time step in them
+      resetTimers(timeloopTiming, timingTreePE);
+   }
+
+   if(!noFileOutput) {
+      file.close();
+   }
+
+   //timeloopTiming.logResultOnRoot();
+
+   WALBERLA_LOG_INFO_ON_ROOT("Simulation finished!");
+
+   return 0;
+
+}
+
+} //namespace workload_evaluation
+
+int main( int argc, char **argv ){
+   workload_evaluation::main(argc, argv);
+}
diff --git a/apps/benchmarks/CMakeLists.txt b/apps/benchmarks/CMakeLists.txt
index 31e6417ae85f60488e8938511b4db0a2f2533af2..4fa0c4cd0eaf8270c116a02aceb6d80ee5dccf40 100644
--- a/apps/benchmarks/CMakeLists.txt
+++ b/apps/benchmarks/CMakeLists.txt
@@ -1,3 +1,4 @@
+add_subdirectory( AdaptiveMeshRefinementFluidParticleCoupling )
 add_subdirectory( ComplexGeometry )
 add_subdirectory( DEM )
 add_subdirectory( MeshDistance )
diff --git a/apps/benchmarks/SchaeferTurek/SchaeferTurek.cpp b/apps/benchmarks/SchaeferTurek/SchaeferTurek.cpp
index 7bb745c3abde69740019857b5e7f286dc3bf3edc..d62fac03260b19dbb9ad73e67d7daf5911fe2f34 100644
--- a/apps/benchmarks/SchaeferTurek/SchaeferTurek.cpp
+++ b/apps/benchmarks/SchaeferTurek/SchaeferTurek.cpp
@@ -84,6 +84,7 @@
 #include "lbm/refinement/BoundarySetup.h"
 #include "lbm/refinement/PdfFieldSyncPackInfo.h"
 #include "lbm/refinement/TimeStep.h"
+#include "lbm/refinement/VorticityBasedLevelDetermination.h"
 #include "lbm/sweeps/CellwiseSweep.h"
 #include "lbm/sweeps/SplitPureSweep.h"
 #include "lbm/sweeps/SplitSweep.h"
@@ -1064,109 +1065,6 @@ Set<SUID> Pseudo2DBlockStateDetermination::operator()( const std::vector< std::p
 }
 
 
-
-template< typename VectorField_T, typename Filter_T, bool Pseudo2D = false >
-class VorticityRefinement // used as a 'BlockForest::RefreshMinTargetLevelDeterminationFunction'
-{
-public:
-
-   VorticityRefinement( const ConstBlockDataID & fieldId, const Filter_T & filter,
-                       const real_t upperLimit, const real_t lowerLimit, const uint_t maxLevel ) :
-      fieldId_( fieldId ), filter_( filter ),
-      upperLimit_( upperLimit * upperLimit ), lowerLimit_( lowerLimit * lowerLimit ), maxLevel_( maxLevel )
-   {}
-
-   void operator()( std::vector< std::pair< const Block *, uint_t > > & minTargetLevels,
-                    std::vector< const Block * > & blocksAlreadyMarkedForRefinement,
-                    const BlockForest & forest );
-
-private:
-
-   ConstBlockDataID fieldId_;
-
-   Filter_T filter_;
-
-   real_t upperLimit_;
-   real_t lowerLimit_;
-
-   uint_t maxLevel_;
-   
-}; // class VorticityRefinement
-
-template< typename VectorField_T, typename Filter_T, bool Pseudo2D >
-void VorticityRefinement< VectorField_T, Filter_T, Pseudo2D >::operator()( std::vector< std::pair< const Block *, uint_t > > & minTargetLevels,
-                                                                           std::vector< const Block * > &, const BlockForest & )
-{
-   for( auto it = minTargetLevels.begin(); it != minTargetLevels.end(); ++it )
-   {
-      const Block * const block = it->first;
-      const VectorField_T * u = block->template getData< VectorField_T >( fieldId_ );
-
-      if( u == nullptr )
-      {
-         it->second = uint_t(0);
-         continue;
-      }
-      
-      CellInterval interval = u->xyzSize();
-      Cell expand( cell_idx_c(-1), cell_idx_c(-1), Pseudo2D ? cell_idx_t(0) : cell_idx_c(-1) );
-      interval.expand( expand );
-
-      const cell_idx_t one( cell_idx_t(1) );
-      const real_t half( real_c(0.5) );
-      
-      bool refine( false );
-      bool coarsen( true );
-
-      filter_( *block );
-
-      WALBERLA_FOR_ALL_CELLS_IN_INTERVAL_XYZ( interval,
-
-         if( filter_(x,y,z) && filter_(x+one,y,z) && filter_(x-one,y,z) && filter_(x,y+one,z) && filter_(x,y-one,z) &&
-                  ( Pseudo2D || (filter_(x,y,z+one) && filter_(x,y,z-one)) ) )
-         {
-            const Vector3< real_t >& xa = u->get(x+one,y,z);
-            const Vector3< real_t >& xb = u->get(x-one,y,z);
-            const Vector3< real_t >& ya = u->get(x,y+one,z);
-            const Vector3< real_t >& yb = u->get(x,y-one,z);
-            const Vector3< real_t >  za = Pseudo2D ? Vector3< real_t >(0) : u->get(x,y,z+one);
-            const Vector3< real_t >  zb = Pseudo2D ? Vector3< real_t >(0) : u->get(x,y,z-one);
-
-            // ATTENTION: dx/y/z is assumed to be equal to '1'!
-            const real_t duzdy = half * ( ya[2] - yb[2] );
-            const real_t duydz = half * ( za[1] - zb[1] );
-            const real_t duxdz = half * ( za[0] - zb[0] );
-            const real_t duzdx = half * ( xa[2] - xb[2] );
-            const real_t duydx = half * ( xa[1] - xb[1] );
-            const real_t duxdy = half * ( ya[0] - yb[0] );
-
-            const Vector3< real_t > curl( duzdy - duydz, duxdz - duzdx, duydx - duxdy );
-            const auto curlSqr = curl.sqrLength();
-
-            if( curlSqr > lowerLimit_ )
-            {
-               coarsen = false;
-               if( curlSqr > upperLimit_ )
-                  refine = true;
-            }
-         }
-      )
-      
-      if( refine && block->getLevel() < maxLevel_ )
-      {
-         WALBERLA_ASSERT( !coarsen );
-         it->second = block->getLevel() + uint_t(1);
-      }
-      if( coarsen && block->getLevel() > uint_t(0) )
-      {
-         WALBERLA_ASSERT( !refine );
-         it->second = block->getLevel() - uint_t(1);
-      }
-   }
-}
-
-
-
 // used as a 'BlockForest::RefreshMinTargetLevelDeterminationFunction
 void keepInflowOutflowAtTheSameLevel( std::vector< std::pair< const Block *, uint_t > > & minTargetLevels,
                                       std::vector< const Block * > & blocksAlreadyMarkedForRefinement,
@@ -2578,8 +2476,8 @@ void run( const shared_ptr< Config > & config, const LatticeModel_T & latticeMod
       {
          const real_t lowerLimit = configBlock.getParameter< real_t >( "curlLowerLimit" );
          const real_t upperLimit = configBlock.getParameter< real_t >( "curlUpperLimit" );
-         
-         VorticityRefinement< VelocityField_T, field::FlagFieldEvaluationFilter<FlagField_T>, Is2D<LatticeModel_T>::value > vorticityRefinement(
+
+         lbm::refinement::VorticityBasedLevelDetermination< field::FlagFieldEvaluationFilter<FlagField_T>, Is2D<LatticeModel_T>::value > vorticityRefinement(
             velocityFieldId, flagFieldFilter, upperLimit, lowerLimit, configBlock.getParameter< uint_t >( "maxLevel", uint_t(0) ) );
 
          minTargetLevelDeterminationFunctions.add( vorticityRefinement );
diff --git a/src/lbm/refinement/VorticityBasedLevelDetermination.h b/src/lbm/refinement/VorticityBasedLevelDetermination.h
new file mode 100644
index 0000000000000000000000000000000000000000..c3f4ee20d66f3840876c685a148af3d49d6b3156
--- /dev/null
+++ b/src/lbm/refinement/VorticityBasedLevelDetermination.h
@@ -0,0 +1,161 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file VorticityBasedLevelDetermination.h
+//! \ingroup lbm
+//! \author Florian Schornbaum <florian.schornbaum@fau.de>
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "blockforest/BlockForest.h"
+#include "core/math/Vector3.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "field/GhostLayerField.h"
+
+#include <vector>
+
+namespace walberla {
+namespace lbm {
+namespace refinement {
+
+
+/*!\brief Level determination for refinement check based on (scaled) vorticity values
+ *
+ * If (scaled) vorticity magnitude is below lowerLimit in all cells of a block, that block could be coarsened.
+ * If the (scaled) vorticity value is above the upperLimit for at least one cell, that block gets marked for refinement.
+ * Else, the block remains on the current level.
+ *
+ * The scaling originates from neglecting the actual mesh size on the block to obtain different vorticity values for
+ * different mesh sizes.
+ */
+template< typename Filter_T, bool Pseudo2D = false >
+class VorticityBasedLevelDetermination // used as a 'BlockForest::RefreshMinTargetLevelDeterminationFunction'
+{
+
+public:
+
+   typedef GhostLayerField< Vector3<real_t>, 1 >  VectorField_T;
+
+   VorticityBasedLevelDetermination( const ConstBlockDataID & fieldID, const Filter_T & filter,
+                                     const real_t upperLimit, const real_t lowerLimit, const uint_t maxLevel ) :
+         fieldID_( fieldID ), filter_( filter ),
+         upperLimit_( upperLimit * upperLimit ), lowerLimit_( lowerLimit * lowerLimit ), maxLevel_( maxLevel )
+   {}
+
+   void operator()( std::vector< std::pair< const Block *, uint_t > > & minTargetLevels,
+                    std::vector< const Block * > & blocksAlreadyMarkedForRefinement,
+                    const BlockForest & forest );
+
+private:
+
+   ConstBlockDataID fieldID_;
+
+   Filter_T filter_;
+
+   real_t upperLimit_;
+   real_t lowerLimit_;
+
+   uint_t maxLevel_;
+
+}; // class VorticityRefinement
+
+template< typename Filter_T, bool Pseudo2D >
+void VorticityBasedLevelDetermination< Filter_T, Pseudo2D >::operator()( std::vector< std::pair< const Block *, uint_t > > & minTargetLevels,
+                                                                         std::vector< const Block * > &, const BlockForest & )
+{
+   for( auto it = minTargetLevels.begin(); it != minTargetLevels.end(); ++it )
+   {
+      const Block * const block = it->first;
+      const VectorField_T * u = block->template getData< VectorField_T >( fieldID_ );
+
+      if( u == nullptr )
+      {
+         it->second = uint_t(0);
+         continue;
+      }
+
+      WALBERLA_ASSERT_GREATER_EQUAL(u->nrOfGhostLayers(), uint_t(1));
+
+      CellInterval interval = u->xyzSize();
+      Cell expand( cell_idx_c(-1), cell_idx_c(-1), Pseudo2D ? cell_idx_t(0) : cell_idx_c(-1) );
+      interval.expand( expand );
+
+      const cell_idx_t one( cell_idx_t(1) );
+      const real_t half( real_c(0.5) );
+
+      bool refine( false );
+      bool coarsen( true );
+
+      filter_( *block );
+
+      const cell_idx_t xSize = cell_idx_c( interval.xSize() );
+      const cell_idx_t ySize = cell_idx_c( interval.ySize() );
+      const cell_idx_t zSize = cell_idx_c( interval.zSize() );
+
+      for( cell_idx_t z = cell_idx_t(0); z < zSize; ++z ) {
+         for (cell_idx_t y = cell_idx_t(0); y < ySize; ++y) {
+            for (cell_idx_t x = cell_idx_t(0); x < xSize; ++x) {
+               if( filter_(x,y,z) && filter_(x+one,y,z) && filter_(x-one,y,z) && filter_(x,y+one,z) && filter_(x,y-one,z) &&
+                   ( Pseudo2D || (filter_(x,y,z+one) && filter_(x,y,z-one)) ) )
+               {
+                  const Vector3< real_t > xa = u->get(x+one,y,z);
+                  const Vector3< real_t > xb = u->get(x-one,y,z);
+                  const Vector3< real_t > ya = u->get(x,y+one,z);
+                  const Vector3< real_t > yb = u->get(x,y-one,z);
+                  const Vector3< real_t > za = Pseudo2D ? Vector3< real_t >(0) : u->get(x,y,z+one);
+                  const Vector3< real_t > zb = Pseudo2D ? Vector3< real_t >(0) : u->get(x,y,z-one);
+
+                  const real_t duzdy = half * ( ya[2] - yb[2] );
+                  const real_t duydz = half * ( za[1] - zb[1] );
+                  const real_t duxdz = half * ( za[0] - zb[0] );
+                  const real_t duzdx = half * ( xa[2] - xb[2] );
+                  const real_t duydx = half * ( xa[1] - xb[1] );
+                  const real_t duxdy = half * ( ya[0] - yb[0] );
+
+                  // since no dx was used in the gradient calculation, this value is actually curl * dx
+                  // this is needed to have changing curl values on different grid levels
+                  const Vector3< real_t > curl( duzdy - duydz, duxdz - duzdx, duydx - duxdy );
+                  const auto curlSqr = curl.sqrLength();
+
+                  if( curlSqr > lowerLimit_ )
+                  {
+                     coarsen = false;
+                     if( curlSqr > upperLimit_ )
+                        refine = true;
+                  }
+               }
+            }
+         }
+      }
+
+      if( refine && block->getLevel() < maxLevel_ )
+      {
+         WALBERLA_ASSERT( !coarsen );
+         it->second = block->getLevel() + uint_t(1);
+      }
+      if( coarsen && block->getLevel() > uint_t(0) )
+      {
+         WALBERLA_ASSERT( !refine );
+         it->second = block->getLevel() - uint_t(1);
+      }
+   }
+}
+
+} // namespace refinement
+} // namespace lbm
+} // namespace walberla
diff --git a/src/lbm/refinement/all.h b/src/lbm/refinement/all.h
index a3a353488c9529dab22749a5735954a4551b0d1b..a63a1ba388c87af4a16b639e81ed7acd5ed3c8ab 100644
--- a/src/lbm/refinement/all.h
+++ b/src/lbm/refinement/all.h
@@ -29,3 +29,4 @@
 #include "TimeStep.h"
 #include "TimeStepPdfPackInfo.h"
 #include "TimeTracker.h"
+#include "VorticityBasedLevelDetermination.h"
diff --git a/src/pe/communication/rigidbody/Union.h b/src/pe/communication/rigidbody/Union.h
index 544c131efff56184a82296a67c073784b0f585a4..67ba4db6d4679ec26e67766b20e370fa65aec99b 100644
--- a/src/pe/communication/rigidbody/Union.h
+++ b/src/pe/communication/rigidbody/Union.h
@@ -123,8 +123,7 @@ inline std::unique_ptr<Union<BodyTypeTuple>> instantiate( mpi::RecvBuffer& buffe
                                                      false,
                                                      subobjparam.communicating_,
                                                      subobjparam.infiniteMass_ );
-   un->setLinearVel( subobjparam.v_ );
-   un->setAngularVel( subobjparam.w_ );
+
    un->MPITrait.setOwner( subobjparam.mpiTrait_.owner_ );
 
    // Decoding the contained primitives
@@ -136,7 +135,8 @@ inline std::unique_ptr<Union<BodyTypeTuple>> instantiate( mpi::RecvBuffer& buffe
       obj->setRemote( true );
       un->add(std::move(obj));
    }
-
+   un->setLinearVel( subobjparam.v_ );
+   un->setAngularVel( subobjparam.w_ );
    newBody = un.get();
    return un;
 }
diff --git a/src/pe_coupling/all.h b/src/pe_coupling/all.h
index 2dfc25e1272ad3e361b9d8bd094aa0f09fb4ce8d..53ba7b855d0848c8010f66bd34afa4ee29ba9d78 100644
--- a/src/pe_coupling/all.h
+++ b/src/pe_coupling/all.h
@@ -24,6 +24,7 @@
 
 #pragma once
 
+#include "amr/all.h"
 #include "discrete_particle_methods/all.h"
 #include "mapping/all.h"
 #include "geometry/all.h"
diff --git a/src/pe_coupling/amr/BlockInfo.h b/src/pe_coupling/amr/BlockInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..6d4873852ce3678479da0b1bcdf66fe51a12b124
--- /dev/null
+++ b/src/pe_coupling/amr/BlockInfo.h
@@ -0,0 +1,88 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file BlockInfo.h
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <core/mpi/RecvBuffer.h>
+#include <core/mpi/SendBuffer.h>
+
+#include <ostream>
+
+namespace walberla {
+namespace pe_coupling {
+
+struct BlockInfo {
+   // lbm quantities
+   uint_t numberOfCells;
+   uint_t numberOfFluidCells;
+   uint_t numberOfNearBoundaryCells;
+   // pe quantities
+   uint_t numberOfLocalBodies;
+   uint_t numberOfShadowBodies;
+   uint_t numberOfContacts;
+   // coupling quantities
+   uint_t numberOfPeSubCycles;
+
+   BlockInfo()
+         : numberOfCells(0), numberOfFluidCells(0), numberOfNearBoundaryCells(0),
+           numberOfLocalBodies(0), numberOfShadowBodies(0), numberOfContacts(0),
+           numberOfPeSubCycles(0) {}
+
+   BlockInfo(const uint_t numCells, const uint_t numFluidCells, const uint_t numNearBoundaryCells,
+             const uint_t numLocalBodies, const uint_t numShadowBodies, const uint_t numContacts,
+             const uint_t numPeSubCycles)
+         : numberOfCells(numCells), numberOfFluidCells(numFluidCells), numberOfNearBoundaryCells(numNearBoundaryCells),
+           numberOfLocalBodies(numLocalBodies), numberOfShadowBodies(numShadowBodies), numberOfContacts(numContacts),
+           numberOfPeSubCycles(numPeSubCycles) {}
+};
+
+
+inline
+std::ostream& operator<<( std::ostream& os, const BlockInfo& bi )
+{
+   os << bi.numberOfCells << " / " << bi.numberOfFluidCells << " / " << bi.numberOfNearBoundaryCells << " / "
+      << bi.numberOfLocalBodies << " / " << bi.numberOfShadowBodies << " / " << bi.numberOfContacts << " / "
+      << bi.numberOfPeSubCycles;
+   return os;
+}
+
+template< typename T,    // Element type of SendBuffer
+          typename G>    // Growth policy of SendBuffer
+mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const BlockInfo& info )
+{
+   buf.addDebugMarker( "pca" );
+   buf << info.numberOfCells << info.numberOfFluidCells << info.numberOfNearBoundaryCells
+       << info.numberOfLocalBodies << info.numberOfShadowBodies << info.numberOfContacts
+       << info.numberOfPeSubCycles;
+   return buf;
+}
+
+template< typename T>    // Element type of SendBuffer
+mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, BlockInfo& info )
+{
+   buf.readDebugMarker( "pca" );
+   buf >> info.numberOfCells >> info.numberOfFluidCells >> info.numberOfNearBoundaryCells
+       >> info.numberOfLocalBodies >> info.numberOfShadowBodies >> info.numberOfContacts
+       >> info.numberOfPeSubCycles;
+   return buf;
+}
+
+} // namespace pe_coupling
+} // namespace walberla
diff --git a/src/pe_coupling/amr/InfoCollection.cpp b/src/pe_coupling/amr/InfoCollection.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..19238621785912cc6ee8bcfdaf3466c93a5b058b
--- /dev/null
+++ b/src/pe_coupling/amr/InfoCollection.cpp
@@ -0,0 +1,100 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file InfoCollection.h
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+
+#include "InfoCollection.h"
+
+namespace walberla {
+namespace pe_coupling {
+
+
+void getBlockInfoFromInfoCollection( const PhantomBlock * block, const shared_ptr<InfoCollection>& ic,
+                                     BlockInfo & blockInfo )
+{
+   WALBERLA_ASSERT_NOT_NULLPTR(block);
+
+   if (block->sourceBlockIsLarger())
+   {
+      // block is a result of refinement -> BlockInfo object only available for the father block
+      // there should be no particles on the block (otherwise it would not have been refined)
+      // and refinement in LBM does not change the number of cells
+      // we assume that the number of fluid and near boundary cells also stays the same
+      // (ATTENTION: not true for blocks intersecting with a boundary!)
+      // -> we can use the information of the father block for weight assignment
+
+      auto infoIt = ic->find( block->getId().getFatherId() );
+      WALBERLA_CHECK_UNEQUAL( infoIt, ic->end(), "Father block with ID " << block->getId().getFatherId() << " not found in info collection!" );
+
+      // check the above mentioned assumptions
+      WALBERLA_ASSERT_EQUAL(infoIt->second.numberOfLocalBodies, uint_t(0));
+      WALBERLA_ASSERT_EQUAL(infoIt->second.numberOfShadowBodies, uint_t(0));
+      WALBERLA_ASSERT_EQUAL(infoIt->second.numberOfContacts, uint_t(0));
+
+      blockInfo = infoIt->second;
+   }
+   else if (block->sourceBlockHasTheSameSize())
+   {
+      auto infoIt = ic->find( block->getId() );
+      WALBERLA_CHECK_UNEQUAL( infoIt, ic->end(), "Block with ID " << block->getId() << " not found in info collection!" );
+      blockInfo = infoIt->second;
+   }
+   else
+   {
+      // source block of block is smaller
+
+      // block is a result of coarsening -> BlockInfo object is available on all 8 child blocks
+      // there should be no particles on the block (otherwise it would not have been coarsened)
+      // and refinement in LBM does not change the number of cells
+      // we assume that the number of fluid and near boundary cells will be the average of all 8 child blocks
+      // -> we can use the information of the child blocks for weight assignment
+
+      blockforest::BlockID childIdForInit(block->getId(), 0);
+      auto childForInitIt = ic->find( childIdForInit );
+      WALBERLA_CHECK_UNEQUAL( childForInitIt, ic->end(), "Child block with ID " << childIdForInit << " not found in info collection!" );
+      BlockInfo combinedInfo = childForInitIt->second;
+      uint_t numFluidCells(0), numNearBoundaryCells(0);
+      for (uint_t child = 0; child < 8; ++child)
+      {
+         blockforest::BlockID childId(block->getId(), child);
+         auto childIt = ic->find( childId );
+         WALBERLA_CHECK_UNEQUAL( childIt, ic->end(), "Child block with ID " << childId << " not found in info collection!" );
+         numFluidCells += childIt->second.numberOfFluidCells;
+         numNearBoundaryCells += childIt->second.numberOfNearBoundaryCells;
+
+         // check above mentioned assumptions
+         WALBERLA_ASSERT_EQUAL(childIt->second.numberOfLocalBodies, uint_t(0));
+         WALBERLA_ASSERT_EQUAL(childIt->second.numberOfShadowBodies, uint_t(0));
+         WALBERLA_ASSERT_EQUAL(childIt->second.numberOfContacts, uint_t(0));
+      }
+      // total number of cells remains unchanged
+      combinedInfo.numberOfFluidCells = uint_c(numFluidCells / uint_t(8)); //average
+      combinedInfo.numberOfNearBoundaryCells = uint_c( numNearBoundaryCells / uint_t(8) ); //average
+      combinedInfo.numberOfLocalBodies = uint_t(0);
+      combinedInfo.numberOfShadowBodies = uint_t(0);
+      combinedInfo.numberOfContacts = uint_t(0); //sum
+      // number of pe sub cycles stays the same
+
+      blockInfo = combinedInfo;
+   }
+
+}
+
+} // namespace pe_coupling
+} // namespace walberla
diff --git a/src/pe_coupling/amr/InfoCollection.h b/src/pe_coupling/amr/InfoCollection.h
new file mode 100644
index 0000000000000000000000000000000000000000..86e508e87e9e183fab6464eae655dbd6372984b1
--- /dev/null
+++ b/src/pe_coupling/amr/InfoCollection.h
@@ -0,0 +1,121 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file InfoCollection.h
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "BlockInfo.h"
+
+#include "blockforest/BlockForest.h"
+#include "blockforest/BlockID.h"
+#include "core/mpi/BufferSystem.h"
+
+#include "pe/ccd/ICCD.h"
+#include "pe/fcd/IFCD.h"
+#include "pe/rigidbody/BodyStorage.h"
+
+#include <map>
+
+namespace walberla {
+namespace pe_coupling {
+
+typedef std::map<blockforest::BlockID, BlockInfo>  InfoCollection;
+typedef std::pair<blockforest::BlockID, BlockInfo> InfoCollectionPair;
+
+template <typename BoundaryHandling_T>
+void createWithNeighborhood(BlockForest& bf, const BlockDataID boundaryHandlingID,
+                            const BlockDataID bodyStorageID, const BlockDataID ccdID, const BlockDataID fcdID,
+                            const uint_t numberOfPeSubCycles,
+                            InfoCollection& ic )
+{
+   ic.clear();
+
+   mpi::BufferSystem bs( MPIManager::instance()->comm(), 856 );
+
+   for (auto blockIt = bf.begin(); blockIt != bf.end(); ++blockIt)
+   {
+      auto * block = static_cast<blockforest::Block*> (&(*blockIt));
+
+      // evaluate LBM quantities
+      BoundaryHandling_T * boundaryHandling = blockIt->getData< BoundaryHandling_T >( boundaryHandlingID );
+      auto xyzSize = boundaryHandling->getFlagField()->xyzSize();
+      const uint_t numCells = xyzSize.numCells();
+      uint_t numFluidCells(0), numNearBoundaryCells(0);
+      for( auto cellIt = xyzSize.begin(); cellIt != xyzSize.end(); ++cellIt)
+      {
+         if( boundaryHandling->isDomain(*cellIt) )
+         {
+            ++numFluidCells;
+         }
+         if( boundaryHandling->isNearBoundary(*cellIt))
+         {
+            ++numNearBoundaryCells;
+         }
+      }
+
+      // evaluate PE quantities
+      auto * bodyStorage = block->getData<pe::Storage>(bodyStorageID);
+      pe::BodyStorage const & localStorage  = (*bodyStorage)[pe::StorageType::LOCAL];
+      pe::BodyStorage const & shadowStorage = (*bodyStorage)[pe::StorageType::SHADOW];
+      const uint_t numLocalParticles = localStorage.size();
+      const uint_t numShadowParticles = shadowStorage.size();
+
+      auto * ccd = block->getData<pe::ccd::ICCD>(ccdID);
+      auto * fcd = block->getData<pe::fcd::IFCD>(fcdID);
+      ccd->generatePossibleContacts();
+      pe::Contacts& contacts = fcd->generateContacts( ccd->getPossibleContacts() );
+      const uint_t numContacts = contacts.size();
+
+      BlockInfo blockInfo(numCells, numFluidCells, numNearBoundaryCells, numLocalParticles, numShadowParticles, numContacts, numberOfPeSubCycles);
+      InfoCollectionPair infoCollectionEntry(block->getId(), blockInfo);
+
+      ic.insert( infoCollectionEntry );
+
+      for( auto nb = uint_t(0); nb < block->getNeighborhoodSize(); ++nb )
+      {
+         bs.sendBuffer( block->getNeighborProcess(nb) ) << infoCollectionEntry;
+      }
+
+      //note: is it necessary to add child blocks already into the info collection?
+      // here, we still have full geometrical information and can probably determine number of fluid and near boundary cells more easily
+      // however, the interesting (and most costly) blocks are never refined and thus their child infos is never needed
+      // see pe/amr/InfoCollection.cpp for an example
+
+   }
+
+   // size of buffer is unknown and changes with each send
+   bs.setReceiverInfoFromSendBufferState(false, true);
+   bs.sendAll();
+
+   for( auto recvIt = bs.begin(); recvIt != bs.end(); ++recvIt )
+   {
+      while( !recvIt.buffer().isEmpty() )
+      {
+         InfoCollectionPair val;
+         recvIt.buffer() >> val;
+         ic.insert(val);
+      }
+   }
+}
+
+void getBlockInfoFromInfoCollection( const PhantomBlock * block, const shared_ptr<InfoCollection>& ic, BlockInfo & blockInfo );
+
+
+} // namespace pe_coupling
+} // namespace walberla
diff --git a/src/pe_coupling/amr/all.h b/src/pe_coupling/amr/all.h
new file mode 100644
index 0000000000000000000000000000000000000000..da4f4edfa80ea7004ca8903c15585a416bc807f2
--- /dev/null
+++ b/src/pe_coupling/amr/all.h
@@ -0,0 +1,29 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file all.h
+//! \ingroup pe_coupling
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//! \brief Collective header file for module pe_coupling
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "BlockInfo.h"
+#include "InfoCollection.h"
+
+#include "level_determination/all.h"
+#include "weight_assignment/all.h"
\ No newline at end of file
diff --git a/src/pe_coupling/amr/level_determination/BodyPresenceLevelDetermination.cpp b/src/pe_coupling/amr/level_determination/BodyPresenceLevelDetermination.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5eafb9408ce8ac70bbc56f6201d2e7248545df44
--- /dev/null
+++ b/src/pe_coupling/amr/level_determination/BodyPresenceLevelDetermination.cpp
@@ -0,0 +1,81 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file BodyPresenceLevelDetermination.cpp
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#include "BodyPresenceLevelDetermination.h"
+
+namespace walberla {
+namespace pe_coupling {
+namespace amr {
+
+
+void BodyPresenceLevelDetermination::operator()( std::vector< std::pair< const Block *, uint_t > > & minTargetLevels,
+                                                 std::vector< const Block * > &, const BlockForest & /*forest*/ )
+{
+   for (auto &minTargetLevel : minTargetLevels) {
+      uint_t currentLevelOfBlock = minTargetLevel.first->getLevel();
+
+      const uint_t numberOfParticlesInDirectNeighborhood = getNumberOfLocalAndShadowBodiesInNeighborhood(minTargetLevel.first);
+
+      uint_t targetLevelOfBlock = currentLevelOfBlock; //keep everything as it is
+      if ( numberOfParticlesInDirectNeighborhood > uint_t(0) )
+      {
+         // set block to finest level if there are bodies nearby
+         targetLevelOfBlock = finestLevel_;
+      }
+      else
+      {
+         // block could coarsen since there are no bodies nearby
+         if( currentLevelOfBlock > uint_t(0) )
+            targetLevelOfBlock = currentLevelOfBlock - uint_t(1);
+      }
+
+      WALBERLA_CHECK_LESS_EQUAL(std::abs(int_c(targetLevelOfBlock) - int_c(currentLevelOfBlock)), uint_t(1), "Only level difference of maximum 1 allowed!");
+      minTargetLevel.second = targetLevelOfBlock;
+   }
+}
+
+uint_t BodyPresenceLevelDetermination::getNumberOfLocalAndShadowBodiesInNeighborhood(const Block * block)
+{
+   auto numBodies = uint_t(0);
+
+   // add bodies of current block
+   const auto infoIt = infoCollection_->find(block->getId());
+   WALBERLA_CHECK_UNEQUAL(infoIt, infoCollection_->end(), "Block with ID " << block->getId() << " not found in info collection!");
+
+   numBodies += infoIt->second.numberOfLocalBodies;
+   numBodies += infoIt->second.numberOfShadowBodies;
+
+   // add bodies of all neighboring blocks
+   for(uint_t i = 0; i < block->getNeighborhoodSize(); ++i)
+   {
+      const BlockID &neighborBlockID = block->getNeighborId(i);
+      const auto infoItNeighbor = infoCollection_->find(neighborBlockID);
+      WALBERLA_CHECK_UNEQUAL(infoItNeighbor, infoCollection_->end(), "Neighbor block with ID " << neighborBlockID << " not found in info collection!");
+
+      numBodies += infoItNeighbor->second.numberOfLocalBodies;
+      numBodies += infoItNeighbor->second.numberOfShadowBodies;
+   }
+   return numBodies;
+}
+
+
+} // namespace amr
+} // namespace pe_coupling
+} // namespace walberla
diff --git a/src/pe_coupling/amr/level_determination/BodyPresenceLevelDetermination.h b/src/pe_coupling/amr/level_determination/BodyPresenceLevelDetermination.h
new file mode 100644
index 0000000000000000000000000000000000000000..e57d318f01045172a6f0a43b9479dd19d5a1c0b4
--- /dev/null
+++ b/src/pe_coupling/amr/level_determination/BodyPresenceLevelDetermination.h
@@ -0,0 +1,62 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file BodyPresenceLevelDetermination.h
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "blockforest/BlockForest.h"
+#include "pe_coupling/amr/InfoCollection.h"
+
+namespace walberla {
+namespace pe_coupling {
+namespace amr {
+
+/*
+ * Class to determine the minimum level a block can be.
+ * For coupled LBM-PE simulations the following rules apply:
+ *  - a moving body will always remain on the finest block
+ *  - a moving body is not allowed to extend into an area with a coarser block
+ *  - if no moving body is present, the level can be as coarse as possible (restricted by the 2:1 rule)
+ * Therefore, if a body, local or remote (due to bodies that are larger than a block), is present on any of the
+ * neighboring blocks of a certain block, this block's target level is the finest level.
+ * This, together with a refinement checking frequency that depends on the maximum translational body velocity,
+ * ensures the above given requirements.
+ */
+class BodyPresenceLevelDetermination
+{
+public:
+
+   BodyPresenceLevelDetermination( const shared_ptr<pe_coupling::InfoCollection> & infoCollection, uint_t finestLevel) :
+         infoCollection_( infoCollection ), finestLevel_( finestLevel)
+   {}
+
+   void operator()( std::vector< std::pair< const Block *, uint_t > > & minTargetLevels,
+                    std::vector< const Block * > &, const BlockForest & /*forest*/ );
+
+private:
+
+   uint_t getNumberOfLocalAndShadowBodiesInNeighborhood(const Block * block);
+
+   shared_ptr<pe_coupling::InfoCollection> infoCollection_;
+   uint_t finestLevel_;
+};
+
+} // namespace amr
+} // namespace pe_coupling
+} // namespace walberla
diff --git a/src/pe_coupling/amr/level_determination/GlobalBodyPresenceLevelDetermination.cpp b/src/pe_coupling/amr/level_determination/GlobalBodyPresenceLevelDetermination.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c99118611a8afb369ec7f2019fa2be4d57f1ba83
--- /dev/null
+++ b/src/pe_coupling/amr/level_determination/GlobalBodyPresenceLevelDetermination.cpp
@@ -0,0 +1,89 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file GlobalBodyPresenceLevelDetermination.cpp
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#include "GlobalBodyPresenceLevelDetermination.h"
+
+#include "pe_coupling/geometry/PeOverlapFraction.h"
+
+namespace walberla {
+namespace pe_coupling {
+namespace amr {
+
+void GlobalBodyPresenceLevelDetermination::operator()( std::vector< std::pair< const Block *, uint_t > > & minTargetLevels,
+                                                       std::vector< const Block * > &, const BlockForest & /*forest*/ )
+{
+   for (auto &minTargetLevel : minTargetLevels) {
+      uint_t currentLevelOfBlock = minTargetLevel.first->getLevel();
+
+      auto blockExtendedAABB = minTargetLevel.first->getAABB().getExtended(blockExtensionLength_);
+      bool blockPartiallyOverlapsWithGlobalBodies = checkForPartialOverlapWithGlobalBodies(blockExtendedAABB);
+
+      uint_t targetLevelOfBlock = currentLevelOfBlock; //keep everything as it is
+      if ( blockPartiallyOverlapsWithGlobalBodies )
+      {
+         // set block to finest level since overlap with at least one global body is present
+         targetLevelOfBlock = finestLevel_;
+      }
+      else
+      {
+         // block could coarsen since there are no overlaps with global bodies
+         if( currentLevelOfBlock > uint_t(0) )
+            targetLevelOfBlock = currentLevelOfBlock - uint_t(1);
+      }
+
+      WALBERLA_ASSERT_LESS_EQUAL(std::abs(int_c(targetLevelOfBlock) - int_c(currentLevelOfBlock)), uint_t(1), "Only level difference of maximum 1 allowed!");
+      minTargetLevel.second = targetLevelOfBlock;
+   }
+}
+
+bool GlobalBodyPresenceLevelDetermination::checkForPartialOverlapWithGlobalBodies(const AABB& box)
+{
+   const Vector3<real_t> boxMidPoint( box.min() + real_t(0.5) * box.sizes());
+   const Vector3<real_t> dxVec( box.sizes() );
+   const auto maxDepthSuperSampling = uint_t(2);
+
+   bool partialOverlapWithAllBodies = false;
+
+   for( auto bodyIt = globalBodyStorage_->begin(); bodyIt != globalBodyStorage_->end(); ++bodyIt )
+   {
+      auto bodyID = bodyIt.getBodyID();
+      if( globalBodySelectorFct_(bodyID))
+      {
+         real_t overlapFraction = pe_coupling::overlapFractionPe(*bodyID, boxMidPoint, dxVec, maxDepthSuperSampling);
+
+         // check for partial overlap
+         if( overlapFraction > real_t(0) && overlapFraction < real_t(1))
+         {
+            partialOverlapWithAllBodies = true;
+         }
+         // if fully contained in at least one body, no partial overlap possible
+         if( floatIsEqual(overlapFraction, real_t(1) ))
+         {
+            partialOverlapWithAllBodies = false;
+            break;
+         }
+      }
+   }
+   return partialOverlapWithAllBodies;
+}
+
+} // namespace amr
+} // namespace pe_coupling
+} // namespace walberla
diff --git a/src/pe_coupling/amr/level_determination/GlobalBodyPresenceLevelDetermination.h b/src/pe_coupling/amr/level_determination/GlobalBodyPresenceLevelDetermination.h
new file mode 100644
index 0000000000000000000000000000000000000000..1acdd271febd925de73daefd21657ac642d9c0aa
--- /dev/null
+++ b/src/pe_coupling/amr/level_determination/GlobalBodyPresenceLevelDetermination.h
@@ -0,0 +1,66 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file GlobalBodyPresenceLevelDetermination.h
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "blockforest/BlockForest.h"
+#include "pe/rigidbody/BodyStorage.h"
+#include "pe_coupling/utility/BodySelectorFunctions.h"
+
+namespace walberla {
+namespace pe_coupling {
+namespace amr {
+
+/*
+ * Class to determine the minimum level a block can be based on presence of global bodies.
+ * Only the global bodies that are given by the body selection function are considered.
+ * If a global body partially overlaps with a block this is block is determined to be on the finest grid.
+ * The block can be extended by the given extension length (e.g. the number of ghost layers * dx).
+ * This ensures correctness of the body mapping across block borders.
+ * Note: Blocks that are fully contained inside a global body can have any level.
+ */
+class GlobalBodyPresenceLevelDetermination
+{
+public:
+
+   GlobalBodyPresenceLevelDetermination( const shared_ptr<pe::BodyStorage> & globalBodyStorage,
+                                         uint_t finestLevel, real_t blockExtensionLength = real_t(0),
+                                         const std::function<bool(pe::BodyID)> & globalBodySelectorFct = selectAllBodies) :
+         globalBodyStorage_( globalBodyStorage ), finestLevel_( finestLevel ),
+         blockExtensionLength_( blockExtensionLength ),
+         globalBodySelectorFct_( globalBodySelectorFct )
+   {}
+
+   void operator()( std::vector< std::pair< const Block *, uint_t > > & minTargetLevels,
+                    std::vector< const Block * > &, const BlockForest & /*forest*/ );
+
+private:
+
+   bool checkForPartialOverlapWithGlobalBodies(const AABB& box);
+
+   shared_ptr<pe::BodyStorage> globalBodyStorage_;
+   uint_t finestLevel_;
+   real_t blockExtensionLength_;
+   std::function<bool(pe::BodyID)> globalBodySelectorFct_;
+};
+
+} // namespace amr
+} // namespace pe_coupling
+} // namespace walberla
diff --git a/src/pe_coupling/amr/level_determination/all.h b/src/pe_coupling/amr/level_determination/all.h
new file mode 100644
index 0000000000000000000000000000000000000000..de0bbcaf5468a48b0f74d59535372f7d64f34d0b
--- /dev/null
+++ b/src/pe_coupling/amr/level_determination/all.h
@@ -0,0 +1,26 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file all.h
+//! \ingroup pe_coupling
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//! \brief Collective header file for module pe_coupling
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "BodyPresenceLevelDetermination.h"
+#include "GlobalBodyPresenceLevelDetermination.h"
diff --git a/src/pe_coupling/amr/weight_assignment/MetisAssignmentFunctor.cpp b/src/pe_coupling/amr/weight_assignment/MetisAssignmentFunctor.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ce01d28700bf3f989b3f6367c9553464a9c85f0e
--- /dev/null
+++ b/src/pe_coupling/amr/weight_assignment/MetisAssignmentFunctor.cpp
@@ -0,0 +1,100 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file MetisAssignmentFunctor.cpp
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#include "MetisAssignmentFunctor.h"
+#include "stencil/D3Q27.h"
+
+namespace walberla {
+namespace pe_coupling {
+namespace amr {
+
+void MetisAssignmentFunctor::operator()( std::vector< std::pair< const PhantomBlock *, walberla::any > > & blockData, const PhantomBlockForest & /*phantomBlockForest*/)
+{
+   for (auto &it : blockData) {
+      const PhantomBlock * block = it.first;
+      //only change of one level is supported!
+      WALBERLA_ASSERT_LESS( std::abs(int_c(block->getLevel()) - int_c(block->getSourceLevel())), 2 );
+
+      BlockInfo blockInfo;
+      pe_coupling::getBlockInfoFromInfoCollection(block, ic_, blockInfo);
+
+
+      std::vector<int64_t> metisVertexWeights(ncon_);
+
+      for( auto con = uint_t(0); con < ncon_; ++con )
+      {
+         real_t vertexWeight = std::max(weightEvaluationFct_[con](blockInfo), blockBaseWeight_);
+
+         int64_t metisVertexWeight = int64_c( vertexWeight );
+
+         WALBERLA_ASSERT_GREATER(metisVertexWeight, int64_t(0));
+         metisVertexWeights[con] = metisVertexWeight;
+      }
+
+      blockforest::DynamicParMetisBlockInfo info( metisVertexWeights );
+
+      info.setVertexCoords(it.first->getAABB().center() );
+
+      real_t blockVolume = it.first->getAABB().volume();
+      real_t approximateEdgeLength = std::cbrt( blockVolume );
+
+      int64_t faceNeighborWeight = int64_c(approximateEdgeLength * approximateEdgeLength ); //common face
+      int64_t edgeNeighborWeight = int64_c(approximateEdgeLength); //common edge
+      int64_t cornerNeighborWeight = int64_c( 1 ); //common corner
+
+      int64_t vertexSize = int64_c(blockVolume);
+      info.setVertexSize( vertexSize );
+
+      for( const uint_t idx : blockforest::getFaceNeighborhoodSectionIndices() )
+      {
+         for( auto nb = uint_t(0); nb < it.first->getNeighborhoodSectionSize(idx); ++nb )
+         {
+            auto neighborBlockID = it.first->getNeighborId(idx,nb);
+            info.setEdgeWeight(neighborBlockID, faceNeighborWeight );
+         }
+      }
+
+      for( const uint_t idx : blockforest::getEdgeNeighborhoodSectionIndices() )
+      {
+         for( auto nb = uint_t(0); nb < it.first->getNeighborhoodSectionSize(idx); ++nb )
+         {
+            auto neighborBlockID = it.first->getNeighborId(idx,nb);
+            info.setEdgeWeight(neighborBlockID, edgeNeighborWeight );
+         }
+      }
+
+      for( const uint_t idx : blockforest::getCornerNeighborhoodSectionIndices() )
+      {
+         for( auto nb = uint_t(0); nb < it.first->getNeighborhoodSectionSize(idx); ++nb )
+         {
+            auto neighborBlockID = it.first->getNeighborId(idx,nb);
+            info.setEdgeWeight(neighborBlockID, cornerNeighborWeight );
+         }
+      }
+
+      it.second = info;
+
+   }
+}
+
+
+} // namespace amr
+} // namespace pe_coupling
+} // namespace walberla
diff --git a/src/pe_coupling/amr/weight_assignment/MetisAssignmentFunctor.h b/src/pe_coupling/amr/weight_assignment/MetisAssignmentFunctor.h
new file mode 100644
index 0000000000000000000000000000000000000000..384746aa6c3777add20c1283fde6c2ca06523a09
--- /dev/null
+++ b/src/pe_coupling/amr/weight_assignment/MetisAssignmentFunctor.h
@@ -0,0 +1,68 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file MetisAssignmentFunctor.h
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "pe_coupling/amr/InfoCollection.h"
+
+#include "blockforest/loadbalancing/DynamicParMetis.h"
+
+#include <functional>
+#include <vector>
+
+namespace walberla {
+namespace pe_coupling {
+namespace amr {
+
+class MetisAssignmentFunctor
+{
+public:
+
+   MetisAssignmentFunctor( shared_ptr<InfoCollection>& ic,
+                           const std::function<real_t(const BlockInfo&)> & weightEvaluationFct,
+                           const uint_t ncon = 1)
+      : ic_(ic), ncon_(ncon), weightEvaluationFct_(ncon, weightEvaluationFct) {}
+
+   MetisAssignmentFunctor( shared_ptr<InfoCollection>& ic,
+                           const std::vector< std::function<real_t(const BlockInfo&)> > & weightEvaluationFct )
+      : ic_(ic), ncon_(weightEvaluationFct.size()), weightEvaluationFct_(weightEvaluationFct) {}
+
+   void operator()( std::vector< std::pair< const PhantomBlock *, walberla::any > > & blockData, const PhantomBlockForest & phantomBlockForest);
+
+   inline void setWeightEvaluationFct( const std::function<real_t(const BlockInfo &)> & weightEvaluationFct, const uint_t con = 0 ) { weightEvaluationFct_[con] = weightEvaluationFct; }
+   inline void setWeightEvaluationFcts( const std::vector< std::function<real_t(const BlockInfo &)> > & weightEvaluationFct) { weightEvaluationFct_ = weightEvaluationFct; }
+
+   uint_t getNcon() const { return ncon_; }
+
+   inline void   setBlockBaseWeight( const real_t blockBaseWeight ){ blockBaseWeight_ = blockBaseWeight; }
+   inline real_t getBlockBaseWeight() const { return blockBaseWeight_; }
+
+private:
+
+   shared_ptr<InfoCollection> ic_;
+   uint_t ncon_;
+   std::vector< std::function<real_t(const BlockInfo&)> > weightEvaluationFct_;
+   real_t blockBaseWeight_ = real_t(1);
+};
+
+} // namespace amr
+} // namespace pe_coupling
+} // namespace walberla
+
diff --git a/src/pe_coupling/amr/weight_assignment/WeightAssignmentFunctor.cpp b/src/pe_coupling/amr/weight_assignment/WeightAssignmentFunctor.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5fc297aa37e3af631ff392b4c9f065186996e30b
--- /dev/null
+++ b/src/pe_coupling/amr/weight_assignment/WeightAssignmentFunctor.cpp
@@ -0,0 +1,46 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file WeightAssignmentFunctor.cpp
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#include "WeightAssignmentFunctor.h"
+
+namespace walberla {
+namespace pe_coupling {
+namespace amr {
+
+void WeightAssignmentFunctor::operator()( std::vector< std::pair< const PhantomBlock *, walberla::any > > & blockData, const PhantomBlockForest & )
+{
+   for (auto &it : blockData) {
+      const PhantomBlock * block = it.first;
+      //only change of one level is supported!
+      WALBERLA_ASSERT_LESS( std::abs(int_c(block->getLevel()) - int_c(block->getSourceLevel())), 2 );
+
+      BlockInfo blockInfo;
+      pe_coupling::getBlockInfoFromInfoCollection(block, ic_, blockInfo);
+
+      real_t blockWeight = std::max(weightEvaluationFct_(blockInfo), blockBaseWeight_);
+
+      it.second = PhantomBlockWeight( double_c( blockWeight ) );
+
+   }
+}
+
+} // namespace amr
+} // namespace pe_coupling
+} // namespace walberla
diff --git a/src/pe_coupling/amr/weight_assignment/WeightAssignmentFunctor.h b/src/pe_coupling/amr/weight_assignment/WeightAssignmentFunctor.h
new file mode 100644
index 0000000000000000000000000000000000000000..5fa776362e591d01a5286eb7922b179875bfbe06
--- /dev/null
+++ b/src/pe_coupling/amr/weight_assignment/WeightAssignmentFunctor.h
@@ -0,0 +1,58 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file WeightAssignmentFunctor.h
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "pe_coupling/amr/BlockInfo.h"
+#include "pe_coupling/amr/InfoCollection.h"
+
+#include "blockforest/loadbalancing/PODPhantomData.h"
+
+#include <functional>
+
+namespace walberla {
+namespace pe_coupling {
+namespace amr {
+
+class WeightAssignmentFunctor
+{
+public:
+   typedef walberla::blockforest::PODPhantomWeight<double> PhantomBlockWeight;
+
+   WeightAssignmentFunctor( const shared_ptr<InfoCollection>& ic,
+                            const std::function<real_t(const BlockInfo&)> & weightEvaluationFct ) :
+         ic_(ic), weightEvaluationFct_(weightEvaluationFct) {}
+
+   void operator()( std::vector< std::pair< const PhantomBlock *, walberla::any > > & blockData, const PhantomBlockForest & );
+
+   inline void setWeightEvaluationFct( const std::function<real_t(const BlockInfo &)> & weightEvaluationFct ) { weightEvaluationFct_ = weightEvaluationFct;}
+
+   inline void   setBlockBaseWeight( const real_t blockBaseWeight ){blockBaseWeight_ = blockBaseWeight;}
+   inline real_t getBlockBaseWeight() const { return blockBaseWeight_;}
+
+private:
+   shared_ptr<InfoCollection> ic_;
+   std::function<real_t(const BlockInfo&)> weightEvaluationFct_;
+   real_t blockBaseWeight_ = real_t(1);
+};
+
+} // namespace amr
+} // namespace pe_coupling
+} // namespace walberla
diff --git a/src/pe_coupling/amr/weight_assignment/WeightEvaluationFunctions.cpp b/src/pe_coupling/amr/weight_assignment/WeightEvaluationFunctions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..81e1fa0c46f4be2dc3698230fc9cf19f084657cc
--- /dev/null
+++ b/src/pe_coupling/amr/weight_assignment/WeightEvaluationFunctions.cpp
@@ -0,0 +1,35 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file WeightEvaluationFunctions.cpp
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#include "pe_coupling/amr/weight_assignment/WeightEvaluationFunctions.h"
+
+namespace walberla {
+namespace pe_coupling {
+namespace amr {
+
+
+real_t defaultWeightEvaluationFunction(const BlockInfo& blockInfo)
+{
+   return real_c(blockInfo.numberOfCells);
+}
+
+} // namespace amr
+} // namespace pe_coupling
+} // namespace walberla
diff --git a/src/pe_coupling/amr/weight_assignment/WeightEvaluationFunctions.h b/src/pe_coupling/amr/weight_assignment/WeightEvaluationFunctions.h
new file mode 100644
index 0000000000000000000000000000000000000000..958151ea39697c6c8f26e6bcc4e5034fa5fc9d5b
--- /dev/null
+++ b/src/pe_coupling/amr/weight_assignment/WeightEvaluationFunctions.h
@@ -0,0 +1,39 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file WeightEvaluationFunctions.h
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "pe_coupling/amr/BlockInfo.h"
+
+namespace walberla {
+namespace pe_coupling {
+namespace amr {
+
+
+/*
+ * Examples of weight evaluation functions, useful for coupled LBM-PE simulations:
+ *  - defaultWeightEvaluationFunction: weight is just the number of cells (thus constant on each block)
+ */
+
+real_t defaultWeightEvaluationFunction(const BlockInfo& blockInfo);
+
+} // namespace amr
+} // namespace pe_coupling
+} // namespace walberla
diff --git a/src/pe_coupling/amr/weight_assignment/all.h b/src/pe_coupling/amr/weight_assignment/all.h
new file mode 100644
index 0000000000000000000000000000000000000000..8f9cf169df8cecc0567c3c89706222b28f3285ab
--- /dev/null
+++ b/src/pe_coupling/amr/weight_assignment/all.h
@@ -0,0 +1,27 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file all.h
+//! \ingroup pe_coupling
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//! \brief Collective header file for module pe_coupling
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "MetisAssignmentFunctor.h"
+#include "WeightAssignmentFunctor.h"
+#include "WeightEvaluationFunctions.h"
diff --git a/tests/mesh/CMakeLists.txt b/tests/mesh/CMakeLists.txt
index 932e96ec634405d4aea068b24bb2129bc2c1a06b..a8ba46bf91600553536f74698437ee66c1869eb0 100644
--- a/tests/mesh/CMakeLists.txt
+++ b/tests/mesh/CMakeLists.txt
@@ -94,4 +94,7 @@ if ( WALBERLA_BUILD_WITH_OPENMESH )
    waLBerla_execute_test( NAME  NumericIntegrationTestSphere COMMAND $<TARGET_FILE:NumericIntegrationTest> sphere.obj )
    waLBerla_execute_test( NAME  NumericIntegrationTestBunny  COMMAND $<TARGET_FILE:NumericIntegrationTest> bunny.obj CONFIGURATIONS Release RelWithDbgInfo )
 
+   waLBerla_compile_test( FILES MeshMarshalling.cpp DEPENDS core mesh )
+   waLBerla_execute_test( NAME  MeshMarshalling )
+
 endif()
diff --git a/tests/mesh/MeshMarshalling.cpp b/tests/mesh/MeshMarshalling.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1ba1fcc2125ae300a457d557c83bc2ee08c4a1ff
--- /dev/null
+++ b/tests/mesh/MeshMarshalling.cpp
@@ -0,0 +1,170 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file Marshalling.cpp
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+#include "core/debug/TestSubsystem.h"
+
+#include "mesh/pe/rigid_body/ConvexPolyhedron.h"
+#include "mesh/pe/rigid_body/ConvexPolyhedronFactory.h"
+#include "mesh/TriangleMeshes.h"
+#include "mesh/QHull.h"
+#include "mesh/pe/Types.h"
+#include "pe/rigidbody/Squirmer.h"
+#include "pe/rigidbody/UnionFactory.h"
+#include "pe/rigidbody/Union.h"
+#include "pe/communication/rigidbody/Squirmer.h"
+#include "pe/communication/DynamicMarshalling.h"
+#include "mesh/pe/communication/ConvexPolyhedron.h"
+#include "pe/rigidbody/SetBodyTypeIDs.h"
+#include "pe/Materials.h"
+
+#include <boost/tuple/tuple.hpp>
+#include <memory>
+namespace walberla {
+using namespace walberla::pe;
+using namespace walberla::pe::communication;
+
+using UnionTypeTuple = boost::tuple<mesh::pe::ConvexPolyhedron>;
+using UnionT = Union<UnionTypeTuple>;
+using UnionID = UnionT *;
+using UnionPtr = std::unique_ptr<UnionT>;
+
+typedef boost::tuple<mesh::pe::ConvexPolyhedron, UnionT> BodyTuple ;
+
+std::vector<Vector3<real_t>> generateOctahedron( const real_t radius)
+{
+
+   std::vector<Vector3<real_t>> okta( 6 );
+   for(size_t i = 0; i < 6; i++){
+      auto &p = okta[i];
+      p[i%3]=(i<3) ? radius: -radius;
+   }
+   return okta;
+}
+
+// Checks two mesh::TriangleMesh for pseudo-equality
+void checkMeshEquals(const mesh::TriangleMesh &m1, const mesh::TriangleMesh &m2){
+	// Very basic checks
+	WALBERLA_CHECK_FLOAT_EQUAL(mesh::computeVolume(m1), mesh::computeVolume(m2));
+	WALBERLA_CHECK_EQUAL(mesh::computeCentroid(m1), mesh::computeCentroid(m2));
+	WALBERLA_CHECK_EQUAL(mesh::computeInertiaTensor(m1), mesh::computeInertiaTensor(m2));
+}
+
+// Checks two convexPolyhedrons for equality
+void checkConvexPolyhedronEquals(const mesh::pe::ConvexPolyhedron &b1, const mesh::pe::ConvexPolyhedron &b2){
+   WALBERLA_CHECK_FLOAT_EQUAL(b1.getPosition(), b2.getPosition());
+   WALBERLA_CHECK_FLOAT_EQUAL(b1.getLinearVel(), b2.getLinearVel());
+   WALBERLA_CHECK_FLOAT_EQUAL(b1.getAngularVel(), b2.getAngularVel());
+   WALBERLA_CHECK_EQUAL(b1.getInertia(), b2.getInertia());
+   WALBERLA_CHECK_EQUAL(b1.getMaterial(), b2.getMaterial());
+   // Check equality of the meshes
+   checkMeshEquals(b1.getMesh(), b2.getMesh());
+   WALBERLA_CHECK_EQUAL(b1.getID(), b2.getID());
+   WALBERLA_CHECK_EQUAL(b1.getSystemID(), b2.getSystemID());
+}
+
+void testConvexPolyhedron()
+{
+   WALBERLA_LOG_INFO_ON_ROOT("*** testConvexPolyhedron ***");
+
+   // Generate mesh
+   shared_ptr< mesh::TriangleMesh > octamesh = make_shared<mesh::TriangleMesh>();
+   mesh::QHull< mesh::TriangleMesh > qhull( generateOctahedron(real_t(1.0)), octamesh );
+   qhull.run();
+   
+   MaterialID iron = Material::find("iron");
+   
+   mesh::pe::ConvexPolyhedron b1(759846, 1234794, Vec3(real_c(1), real_c(2), real_c(3)), Vec3(0,0,0), Quat(), *octamesh, iron, false, true, false);
+   b1.setLinearVel(Vec3(real_c(5.2), real_c(6.3), real_c(7.4)));
+   b1.setAngularVel(Vec3(real_c(1.2), real_c(2.3), real_c(3.4)));
+
+   mpi::SendBuffer sb;
+   MarshalDynamically<BodyTuple>::execute(sb, b1);
+   mpi::RecvBuffer rb(sb);
+
+   auto bPtr = UnmarshalDynamically<BodyTuple>::execute(rb, mesh::pe::ConvexPolyhedron::getStaticTypeID(), math::AABB(Vec3(-100,-100,-100), Vec3(100,100,100)), math::AABB(Vec3(-100,-100,-100), Vec3(100,100,100)));
+   mesh::pe::ConvexPolyhedronID b2 = static_cast<mesh::pe::ConvexPolyhedronID>(bPtr.get());
+   checkConvexPolyhedronEquals(b1, *b2);
+   
+}
+
+void testUnion()
+{
+   
+   WALBERLA_LOG_INFO_ON_ROOT("*** testUnion ***");
+   // Generate mesh
+   shared_ptr< mesh::TriangleMesh > octamesh = make_shared<mesh::TriangleMesh>();
+   mesh::QHull< mesh::TriangleMesh > qhull( generateOctahedron(real_t(1.0)), octamesh );
+   qhull.run();
+   
+   MaterialID iron = Material::find("iron");
+   
+   UnionT u1(159, 423, Vec3(real_c(1), real_c(2), real_c(3)), Vec3(0,0,0), Quat(), false, false, false);
+   u1.add(std::make_unique<mesh::pe::ConvexPolyhedron>(753326, 1267824, Vec3(real_c(2), real_c(2), real_c(3)), Vec3(0,0,0), Quat(), *octamesh, iron, false, true, false));
+   u1.add(std::make_unique<mesh::pe::ConvexPolyhedron>(753246, 1233424, Vec3(real_c(-1), real_c(4), real_c(-2)), Vec3(0,0,0), Quat(), *octamesh, iron, false, true, false));
+   
+   u1.setLinearVel(Vec3(real_c(5.2), real_c(6.3), real_c(7.4)));
+   u1.setAngularVel(Vec3(real_c(1.2), real_c(2.3), real_c(3.4)));
+   
+   mpi::SendBuffer sb;
+   MarshalDynamically<BodyTuple>::execute(sb, u1);
+   mpi::RecvBuffer rb(sb);
+
+   auto uPtr = UnmarshalDynamically<BodyTuple>::execute(rb, UnionT::getStaticTypeID(), math::AABB(Vec3(-100,-100,-100), Vec3(100,100,100)), math::AABB(Vec3(-100,-100,-100), Vec3(100,100,100)));
+   UnionID u2 = static_cast<UnionID>(uPtr.get());
+   WALBERLA_CHECK_NOT_NULLPTR( u2 );
+
+   WALBERLA_CHECK_EQUAL(u1.size(), 2);
+   WALBERLA_CHECK_EQUAL(u1.size(), u2->size());
+   WALBERLA_CHECK_EQUAL(u1.getInertia(), u2->getInertia());
+   WALBERLA_CHECK_EQUAL(u1.getPosition(), u2->getPosition());
+   WALBERLA_CHECK_FLOAT_EQUAL(u1.getLinearVel(), u2->getLinearVel());
+   WALBERLA_CHECK_FLOAT_EQUAL(u1.getAngularVel(), u2->getAngularVel());
+   
+   //getting polyhedrons of first union
+   mesh::pe::ConvexPolyhedronID p11 = static_cast<mesh::pe::ConvexPolyhedronID > (u1.begin().getBodyID());
+   mesh::pe::ConvexPolyhedronID p21 = static_cast<mesh::pe::ConvexPolyhedronID > ((++(u1.begin())).getBodyID());
+   
+   //getting polyhedrons of second union
+   mesh::pe::ConvexPolyhedronID p12 = static_cast<mesh::pe::ConvexPolyhedronID > (u2->begin().getBodyID());
+   mesh::pe::ConvexPolyhedronID p22 = static_cast<mesh::pe::ConvexPolyhedronID > ((++(u2->begin())).getBodyID());
+   
+   checkConvexPolyhedronEquals(*p11, *p12);
+   checkConvexPolyhedronEquals(*p21, *p22);
+
+}
+
+int main( int argc, char** argv )
+{
+   walberla::debug::enterTestMode();
+
+   walberla::MPIManager::instance()->initializeMPI( &argc, &argv );
+
+   SetBodyTypeIDs<BodyTuple>::execute();
+   testConvexPolyhedron();
+   testUnion();
+
+   return EXIT_SUCCESS;
+}
+} // namespace walberla
+
+int main( int argc, char* argv[] )
+{
+  return walberla::main( argc, argv );
+}
diff --git a/tests/pe_coupling/momentum_exchange_method/SettlingSphereMEMDynamicRefinement.cpp b/tests/pe_coupling/momentum_exchange_method/SettlingSphereMEMDynamicRefinement.cpp
index c87a6e7f8a283c910f768a7f5f5805022b2abeb6..885e9ed5f5e9e075800de8ccaabcd9e7a4d96203 100644
--- a/tests/pe_coupling/momentum_exchange_method/SettlingSphereMEMDynamicRefinement.cpp
+++ b/tests/pe_coupling/momentum_exchange_method/SettlingSphereMEMDynamicRefinement.cpp
@@ -58,6 +58,7 @@
 #include "pe/Types.h"
 #include "pe/synchronization/ClearSynchronization.h"
 
+#include "pe_coupling/amr/all.h"
 #include "pe_coupling/mapping/all.h"
 #include "pe_coupling/momentum_exchange_method/all.h"
 #include "pe_coupling/utility/all.h"
@@ -115,80 +116,6 @@ const FlagUID MO_Flag( "moving obstacle" );
 const FlagUID FormerMO_Flag( "former moving obstacle" );
 
 
-//////////////////////////////////////
-// DYNAMIC REFINEMENT FUNCTIONALITY //
-//////////////////////////////////////
-
-/*
- * Class to determine the minimum level a block can be.
- * For coupled LBM-PE simulations the following rules apply:
- *  - a moving body will always remain on the finest block
- *  - a moving body is not allowed to extend into an area with a coarser block
- *  - if no moving body is present, the level can be as coarse as possible (restricted by the 2:1 rule)
- * Therefore, if a body, local or remote (due to bodies that are larger than a block), is present on any of the
- * neighboring blocks of a certain block, this block's target level is the finest level.
- * This, together with a refinement checking frequency that depends on the maximum translational body velocity,
- * ensures the above given requirements.
- */
-class LevelDeterminator
-{
-public:
-
-   LevelDeterminator( const shared_ptr<pe::InfoCollection> & infoCollection, uint_t finestLevel) :
-         infoCollection_( infoCollection ), finestLevel_( finestLevel)
-   {}
-
-   void operator()( std::vector< std::pair< const Block *, uint_t > > & minTargetLevels,
-                    std::vector< const Block * > &, const BlockForest & /*forest*/ )
-   {
-      for( auto it = minTargetLevels.begin(); it != minTargetLevels.end(); ++it )
-      {
-         const uint_t numberOfParticlesInDirectNeighborhood = getNumberOfLocalAndShadowBodiesInNeighborhood(it->first);
-
-         uint_t currentLevelOfBlock = it->first->getLevel();
-
-         uint_t targetLevelOfBlock = currentLevelOfBlock; //keep everything as it is
-         if ( numberOfParticlesInDirectNeighborhood > uint_t(0) )
-         {
-            // set block to finest level if there are bodies nearby
-            targetLevelOfBlock = finestLevel_;
-            //WALBERLA_LOG_DEVEL(currentLevelOfBlock << " -> " << targetLevelOfBlock << " (" << numberOfParticlesInDirectNeighborhood << ")" );
-         }
-         else
-         {
-            // block could coarsen sicne there are no bodies nearby
-            if( currentLevelOfBlock > uint_t(0) )
-               targetLevelOfBlock = currentLevelOfBlock - uint_t(1);
-            //WALBERLA_LOG_DEVEL(currentLevelOfBlock << " -> " << targetLevelOfBlock << " (" << numberOfParticlesInDirectNeighborhood << ")" );
-         }
-         it->second = targetLevelOfBlock;
-      }
-   }
-
-private:
-   uint_t getNumberOfLocalAndShadowBodiesInNeighborhood(const Block * block)
-   {
-      uint_t numBodies = uint_t(0);
-
-      // add bodies of current block
-      const auto infoIt = infoCollection_->find(block->getId());
-      numBodies += infoIt->second.numberOfLocalBodies;
-      numBodies += infoIt->second.numberOfShadowBodies;
-
-      // add bodies of all neighboring blocks
-      for(uint_t i = 0; i < block->getNeighborhoodSize(); ++i)
-      {
-         const BlockID& neighborBlockID = block->getNeighborId(i);
-         const auto infoItNeighbor = infoCollection_->find(neighborBlockID);
-         numBodies += infoItNeighbor->second.numberOfLocalBodies;
-         numBodies += infoItNeighbor->second.numberOfShadowBodies;
-      }
-      return numBodies;
-   }
-
-   shared_ptr<pe::InfoCollection> infoCollection_;
-   uint_t finestLevel_;
-};
 
 /////////////////////
 // BLOCK STRUCTURE //
@@ -655,11 +582,10 @@ int main( int argc, char **argv )
    blockforest.reevaluateMinTargetLevelsAfterForcedRefinement( false );
    blockforest.allowRefreshChangingDepth( false );
 
-   shared_ptr<pe::InfoCollection> peInfoCollection = walberla::make_shared<pe::InfoCollection>();
-
-   LevelDeterminator levelDet( peInfoCollection, finestLevel );
+   shared_ptr<pe_coupling::InfoCollection> couplingInfoCollection = walberla::make_shared<pe_coupling::InfoCollection>();
+   pe_coupling::amr::BodyPresenceLevelDetermination particlePresenceRefinement( couplingInfoCollection, finestLevel );
 
-   blockforest.setRefreshMinTargetLevelDeterminationFunction( levelDet );
+   blockforest.setRefreshMinTargetLevelDeterminationFunction( particlePresenceRefinement );
 
    bool curveHilbert = false;
    bool curveAllGather = true;
@@ -873,7 +799,7 @@ int main( int argc, char **argv )
       if( i % refinementCheckFrequency == 0)
       {
          auto & forest = blocks->getBlockForest();
-         pe::createWithNeighborhood(forest, bodyStorageID, *peInfoCollection);
+         pe_coupling::createWithNeighborhood<BoundaryHandling_T>(forest, boundaryHandlingID, bodyStorageID, ccdID, fcdID, numPeSubCycles, *couplingInfoCollection);
 
          uint_t stampBefore = blocks->getBlockForest().getModificationStamp();
          blocks->refresh();