diff --git a/apps/benchmarks/GranularGas/CMakeLists.txt b/apps/benchmarks/GranularGas/CMakeLists.txt
index 0e214cc4d1770e857fe29235c895010c7908dd73..b017add69e5080df42ac236dd5670ffbe466c020 100644
--- a/apps/benchmarks/GranularGas/CMakeLists.txt
+++ b/apps/benchmarks/GranularGas/CMakeLists.txt
@@ -2,13 +2,25 @@ waLBerla_link_files_to_builddir( *.cfg )
 waLBerla_link_files_to_builddir( *.py )
 
 waLBerla_add_executable ( NAME PE_GranularGas
-   FILES PE_GranularGas.cpp SQLProperties.cpp Parameters.cpp NodeTimings.cpp
+                          FILES PE_GranularGas.cpp SQLProperties.cpp Parameters.cpp NodeTimings.cpp
                           DEPENDS blockforest core pe postprocessing sqlite )
 
+waLBerla_add_executable ( NAME PE_LoadBalancing
+                          FILES PE_LoadBalancing.cpp SQLProperties.cpp Parameters.cpp NodeTimings.cpp
+                          DEPENDS blockforest core pe postprocessing sqlite )
+
+waLBerla_add_executable ( NAME MESA_PD_LoadBalancing
+                          FILES MESA_PD_LoadBalancing.cpp SQLProperties.cpp Parameters.cpp NodeTimings.cpp sortParticleStorage.cpp CreateParticles.cpp
+                          DEPENDS blockforest core pe mesa_pd postprocessing sqlite vtk )
+
 waLBerla_add_executable ( NAME MESA_PD_GranularGas
-   FILES MESA_PD_GranularGas.cpp SQLProperties.cpp Parameters.cpp NodeTimings.cpp sortParticleStorage.cpp CreateParticles.cpp
-                          DEPENDS blockforest core pe mesa_pd postprocessing vtk sqlite )
+                          FILES MESA_PD_GranularGas.cpp SQLProperties.cpp Parameters.cpp NodeTimings.cpp sortParticleStorage.cpp CreateParticles.cpp
+                          DEPENDS blockforest core pe mesa_pd postprocessing sqlite vtk )
 
 waLBerla_add_executable ( NAME MESA_PD_KernelBenchmark
-   FILES MESA_PD_KernelBenchmark.cpp SQLProperties.cpp Parameters.cpp NodeTimings.cpp sortParticleStorage.cpp CreateParticles.cpp
-                          DEPENDS blockforest core pe mesa_pd postprocessing vtk sqlite )
+                          FILES MESA_PD_KernelBenchmark.cpp SQLProperties.cpp Parameters.cpp NodeTimings.cpp sortParticleStorage.cpp CreateParticles.cpp
+                          DEPENDS blockforest core pe mesa_pd postprocessing sqlite vtk )
+
+waLBerla_add_executable ( NAME MESA_PD_KernelLoadBalancing
+                          FILES MESA_PD_KernelLoadBalancing.cpp SQLProperties.cpp Parameters.cpp NodeTimings.cpp sortParticleStorage.cpp CreateParticles.cpp
+                          DEPENDS blockforest core pe mesa_pd postprocessing sqlite vtk )
diff --git a/apps/benchmarks/GranularGas/GenerateModule.py b/apps/benchmarks/GranularGas/GenerateModule.py
index f068a6d44837bc5084f558f7264e7813ae2a5032..a6b7696b1337ff87da5bea5f5ffccca155e00d09 100755
--- a/apps/benchmarks/GranularGas/GenerateModule.py
+++ b/apps/benchmarks/GranularGas/GenerateModule.py
@@ -49,6 +49,9 @@ if __name__ == '__main__':
    ps.addProperty("flags",            "walberla::mesa_pd::data::particle_flags::FlagT", defValue="", syncMode="COPY")
    ps.addProperty("nextParticle",     "int",                     defValue="-1",        syncMode="NEVER")
 
+   ps.addInclude("blockforest/BlockForest.h")
+   ps.addProperty("currentBlock",     "blockforest::Block*",     defValue="nullptr",   syncMode="NEVER")
+
    kernels = []
    kernels.append( kernel.DoubleCast(shapes) )
    kernels.append( kernel.ExplicitEuler() )
@@ -74,6 +77,7 @@ if __name__ == '__main__':
    comm.append(mpi.ClearNextNeighborSync())
    comm.append(mpi.ReduceContactHistory())
    comm.append(mpi.ReduceProperty())
+   comm.append(mpi.SyncGhostOwners(ps))
    comm.append(mpi.SyncNextNeighbors(ps))
 
 
diff --git a/apps/benchmarks/GranularGas/GranularGas.cfg b/apps/benchmarks/GranularGas/GranularGas.cfg
index 2c321312ff72d4174d09d9c0d8a5f058f02d39a7..8dea7a979b90cc5776fc1cd2cfd64f79a47080e9 100644
--- a/apps/benchmarks/GranularGas/GranularGas.cfg
+++ b/apps/benchmarks/GranularGas/GranularGas.cfg
@@ -2,11 +2,21 @@ GranularGas
 {
    simulationCorner < 0, 0, 0 >;
    simulationDomain < 40, 40, 40 >;
-   blocks < 2,2,2 >;
-   isPeriodic < 1, 1, 1 >;
-   initialRefinementLevel 0;
-   sorting none;
+   blocks < 1,1,1 >;
+   isPeriodic < 0, 0, 0 >;
+   initialRefinementLevel 1;
+   sorting linear;
 
+   LBAlgorithm Morton;
+   baseWeight 1;
+
+   recalculateBlockLevelsInRefresh 1;
+   reevaluateMinTargetLevelsAfterForcedRefinement 1;
+   allowRefreshChangingDepth 1;
+   regridMin 2000;
+   regridMax 100;
+
+   normal  <1,1,1>;
    radius  0.6;
    spacing 1.0;
    vMax    0.0;
diff --git a/apps/benchmarks/GranularGas/MESA_PD_GranularGas.cpp b/apps/benchmarks/GranularGas/MESA_PD_GranularGas.cpp
index 54c8eb7f2878d8a6df2f433d56d50401d22c647d..7ff78bd1ae940ad0a3b88e0ae4fdb18e12babdf0 100644
--- a/apps/benchmarks/GranularGas/MESA_PD_GranularGas.cpp
+++ b/apps/benchmarks/GranularGas/MESA_PD_GranularGas.cpp
@@ -36,6 +36,7 @@
 #include <mesa_pd/data/ParticleStorage.h>
 #include <mesa_pd/data/ShapeStorage.h>
 #include <mesa_pd/domain/BlockForestDomain.h>
+#include <mesa_pd/kernel/AssocToBlock.h>
 #include <mesa_pd/kernel/DoubleCast.h>
 #include <mesa_pd/kernel/ExplicitEulerWithShape.h>
 #include <mesa_pd/kernel/InsertParticleIntoLinkedCells.h>
@@ -44,6 +45,7 @@
 #include <mesa_pd/mpi/ContactFilter.h>
 #include <mesa_pd/mpi/ReduceProperty.h>
 #include <mesa_pd/mpi/SyncNextNeighbors.h>
+#include <mesa_pd/mpi/SyncNextNeighborsBlockForest.h>
 
 #include <mesa_pd/mpi/notifications/ForceTorqueNotification.h>
 
@@ -101,7 +103,7 @@ int main( int argc, char ** argv )
       WALBERLA_LOG_INFO_ON_ROOT( "No BlockForest created ... exiting!");
       return EXIT_SUCCESS;
    }
-   domain::BlockForestDomain domain(forest);
+   auto domain = std::make_shared<domain::BlockForestDomain> (forest);
 
    auto simulationDomain = forest->getDomain();
    auto localDomain = forest->begin()->getAABB();
@@ -123,7 +125,7 @@ int main( int argc, char ** argv )
    for (auto& iBlk : *forest)
    {
       for (auto pt : grid_generator::SCGrid(iBlk.getAABB(),
-                                            Vector3<real_t>(params.spacing) * real_c(0.5),
+                                            Vector3<real_t>(params.spacing) * real_c(0.5) + params.shift,
                                             params.spacing))
       {
          WALBERLA_CHECK(iBlk.getAABB().contains(pt));
@@ -158,9 +160,9 @@ int main( int argc, char ** argv )
    WALBERLA_LOG_INFO_ON_ROOT("*** SETUP - END ***");
 
    WALBERLA_LOG_INFO_ON_ROOT("*** VTK ***");
-   auto vtkDomainOutput = walberla::vtk::createVTKOutput_DomainDecomposition( forest, "domain_decomposition", 1, "vtk_out", "simulation_step" );
+   auto vtkDomainOutput = walberla::vtk::createVTKOutput_DomainDecomposition( forest, "domain_decomposition", 1, params.vtk_out, "simulation_step" );
    auto vtkOutput       = make_shared<mesa_pd::vtk::ParticleVtkOutput>(ps) ;
-   auto vtkWriter       = walberla::vtk::createVTKOutput_PointData(vtkOutput, "Bodies", 1, "vtk", "simulation_step", false, false);
+   auto vtkWriter       = walberla::vtk::createVTKOutput_PointData(vtkOutput, "Bodies", 1, params.vtk_out, "simulation_step", false, false);
    vtkOutput->addOutput<SelectRank>("rank");
    vtkOutput->addOutput<data::SelectParticleOwner>("owner");
    //   vtkDomainOutput->write();
@@ -175,13 +177,15 @@ int main( int argc, char ** argv )
    dem.setDampingT (0, 0, real_t(0));
    dem.setFriction (0, 0, real_t(0));
    collision_detection::AnalyticContactDetection              acd;
+   kernel::AssocToBlock                  assoc(forest);
    kernel::DoubleCast                    double_cast;
    mpi::ContactFilter                    contact_filter;
    mpi::ReduceProperty                   RP;
-   mpi::SyncNextNeighbors                SNN;
+   mpi::SyncNextNeighborsBlockForest     SNN;
 
    // initial sync
-   SNN(*ps, domain);
+   ps->forEachParticle(false, kernel::SelectLocal(), accessor, assoc, accessor);
+   SNN(*ps, forest, domain);
    sortParticleStorage(*ps, params.sorting, lc.domain_, uint_c(lc.numCellsPerDim_[0]));
 //   vtkWriter->write();
 
@@ -211,6 +215,11 @@ int main( int argc, char ** argv )
          //         vtkWriter->write();
          //      }
 
+         tp["AssocToBlock"].start();
+         ps->forEachParticle(false, kernel::SelectLocal(), accessor, assoc, accessor);
+         if (params.bBarrier) WALBERLA_MPI_BARRIER();
+         tp["AssocToBlock"].end();
+
          tp["GenerateLinkedCells"].start();
          lc.clear();
          ps->forEachParticle(true, kernel::SelectAll(), accessor, ipilc, accessor, lc);
@@ -230,7 +239,7 @@ int main( int argc, char ** argv )
             if (double_cast(idx1, idx2, ac, acd, ac ))
             {
                ++contactsDetected;
-               if (contact_filter(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), domain))
+               if (contact_filter(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), *domain))
                {
                   ++contactsTreated;
                   dem(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), acd.getContactNormal(), acd.getPenetrationDepth());
@@ -253,7 +262,7 @@ int main( int argc, char ** argv )
          tp["Euler"].end();
 
          tp["SNN"].start();
-         SNN(*ps, domain);
+         SNN(*ps, forest, domain);
          if (params.bBarrier) WALBERLA_MPI_BARRIER();
          tp["SNN"].end();
       }
@@ -329,9 +338,9 @@ int main( int argc, char ** argv )
       walberla::mpi::reduceInplace(linkedCellsVolume, walberla::mpi::SUM);
       size_t numLinkedCells = lc.cells_.size();
       walberla::mpi::reduceInplace(numLinkedCells, walberla::mpi::SUM);
-      size_t local_aabbs         = domain.getNumLocalAABBs();
-      size_t neighbor_subdomains = domain.getNumNeighborSubdomains();
-      size_t neighbor_processes  = domain.getNumNeighborProcesses();
+      size_t local_aabbs         = domain->getNumLocalAABBs();
+      size_t neighbor_subdomains = domain->getNumNeighborSubdomains();
+      size_t neighbor_processes  = domain->getNumNeighborProcesses();
       walberla::mpi::reduceInplace(local_aabbs, walberla::mpi::SUM);
       walberla::mpi::reduceInplace(neighbor_subdomains, walberla::mpi::SUM);
       walberla::mpi::reduceInplace(neighbor_processes, walberla::mpi::SUM);
@@ -347,6 +356,11 @@ int main( int argc, char ** argv )
          stringProperties["tag"]                  = "mesa_pd";
          integerProperties["mpi_num_processes"]   = mpiManager->numProcesses();
          integerProperties["omp_max_threads"]     = omp_get_max_threads();
+         realProperties["PUpS"]                   = double_c(PUpS);
+         realProperties["timer_min"]              = timer_reduced->min();
+         realProperties["timer_max"]              = timer_reduced->max();
+         realProperties["timer_average"]          = timer_reduced->average();
+         realProperties["timer_total"]            = timer_reduced->total();
          integerProperties["outerIteration"]      = int64_c(outerIteration);
          integerProperties["num_particles"]       = numParticles;
          integerProperties["num_ghost_particles"] = numGhostParticles;
diff --git a/apps/benchmarks/GranularGas/MESA_PD_KernelBenchmark.cpp b/apps/benchmarks/GranularGas/MESA_PD_KernelBenchmark.cpp
index 34ad979126256d5e5bd04e45f646ee5320468d04..4b4d24256b3e0ab6916c155bb7f25bf22cc1fad3 100644
--- a/apps/benchmarks/GranularGas/MESA_PD_KernelBenchmark.cpp
+++ b/apps/benchmarks/GranularGas/MESA_PD_KernelBenchmark.cpp
@@ -128,7 +128,7 @@ int main( int argc, char ** argv )
    for (auto& iBlk : *forest)
    {
       for (auto pt : grid_generator::SCGrid(iBlk.getAABB(),
-                                            Vector3<real_t>(params.spacing) * real_c(0.5),
+                                            Vector3<real_t>(params.spacing) * real_c(0.5) + params.shift,
                                             params.spacing))
       {
          WALBERLA_CHECK(iBlk.getAABB().contains(pt));
@@ -163,9 +163,9 @@ int main( int argc, char ** argv )
    WALBERLA_LOG_INFO_ON_ROOT("*** SETUP - END ***");
 
    WALBERLA_LOG_INFO_ON_ROOT("*** VTK ***");
-   auto vtkDomainOutput = walberla::vtk::createVTKOutput_DomainDecomposition( forest, "domain_decomposition", 1, "vtk_out", "simulation_step" );
+   auto vtkDomainOutput = walberla::vtk::createVTKOutput_DomainDecomposition( forest, "domain_decomposition", 1, params.vtk_out, "simulation_step" );
    auto vtkOutput       = make_shared<mesa_pd::vtk::ParticleVtkOutput>(ps) ;
-   auto vtkWriter       = walberla::vtk::createVTKOutput_PointData(vtkOutput, "Bodies", 1, "vtk", "simulation_step", false, false);
+   auto vtkWriter       = walberla::vtk::createVTKOutput_PointData(vtkOutput, "Bodies", 1, params.vtk_out, "simulation_step", false, false);
    vtkOutput->addOutput<SelectRank>("rank");
    vtkOutput->addOutput<data::SelectParticleOwner>("owner");
    vtkOutput->addOutput<SelectIdx>("idx");
diff --git a/apps/benchmarks/GranularGas/MESA_PD_KernelLoadBalancing.cpp b/apps/benchmarks/GranularGas/MESA_PD_KernelLoadBalancing.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..33c97b2b931821ca2a2dfeffed0be236b6ad17a2
--- /dev/null
+++ b/apps/benchmarks/GranularGas/MESA_PD_KernelLoadBalancing.cpp
@@ -0,0 +1,639 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file   MESA_PD_KernelLoadBalancing.cpp
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+#include "Accessor.h"
+#include "check.h"
+#include "Contact.h"
+#include "CreateParticles.h"
+#include "NodeTimings.h"
+#include "Parameters.h"
+#include "SelectProperty.h"
+#include "sortParticleStorage.h"
+#include "SQLProperties.h"
+
+#include <mesa_pd/vtk/ParticleVtkOutput.h>
+
+#include <mesa_pd/collision_detection/AnalyticContactDetection.h>
+#include <mesa_pd/data/LinkedCells.h>
+#include <mesa_pd/data/ParticleAccessor.h>
+#include <mesa_pd/data/ParticleStorage.h>
+#include <mesa_pd/data/ShapeStorage.h>
+#include <mesa_pd/data/SparseLinkedCells.h>
+#include <mesa_pd/domain/BlockForestDataHandling.h>
+#include <mesa_pd/domain/BlockForestDomain.h>
+#include <mesa_pd/domain/InfoCollection.h>
+#include <mesa_pd/kernel/AssocToBlock.h>
+#include <mesa_pd/kernel/DoubleCast.h>
+#include <mesa_pd/kernel/ExplicitEulerWithShape.h>
+#include <mesa_pd/kernel/InsertParticleIntoLinkedCells.h>
+#include <mesa_pd/kernel/InsertParticleIntoSparseLinkedCells.h>
+#include <mesa_pd/kernel/ParticleSelector.h>
+#include <mesa_pd/kernel/SpringDashpot.h>
+#include <mesa_pd/mpi/ContactFilter.h>
+#include <mesa_pd/mpi/ReduceProperty.h>
+#include <mesa_pd/mpi/SyncNextNeighbors.h>
+#include <mesa_pd/mpi/SyncNextNeighborsBlockForest.h>
+#include <mesa_pd/mpi/notifications/ForceTorqueNotification.h>
+#include <mesa_pd/sorting/HilbertCompareFunctor.h>
+#include <mesa_pd/sorting/LinearizedCompareFunctor.h>
+
+#include <blockforest/BlockForest.h>
+#include <blockforest/Initialization.h>
+#include <blockforest/loadbalancing/DynamicCurve.h>
+#include <blockforest/loadbalancing/DynamicParMetis.h>
+#include <blockforest/loadbalancing/PODPhantomData.h>
+#include <core/Abort.h>
+#include <core/Environment.h>
+#include <core/Hostname.h>
+#include <core/math/Random.h>
+#include <core/mpi/Gatherv.h>
+#include <core/mpi/RecvBuffer.h>
+#include <core/mpi/Reduce.h>
+#include <core/mpi/SendBuffer.h>
+#include <core/grid_generator/SCIterator.h>
+#include <core/logging/Logging.h>
+#include <core/OpenMP.h>
+#include <core/timing/Timer.h>
+#include <core/timing/TimingPool.h>
+#include <core/waLBerlaBuildInfo.h>
+#include <pe/amr/level_determination/MinMaxLevelDetermination.h>
+#include <pe/amr/weight_assignment/MetisAssignmentFunctor.h>
+#include <pe/amr/weight_assignment/WeightAssignmentFunctor.h>
+#include <sqlite/SQLite.h>
+#include <vtk/VTKOutput.h>
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <type_traits>
+
+namespace walberla {
+namespace mesa_pd {
+
+int main( int argc, char ** argv )
+{
+   using namespace walberla::timing;
+
+   Environment env(argc, argv);
+   auto mpiManager = walberla::mpi::MPIManager::instance();
+   mpiManager->useWorldComm();
+
+   WALBERLA_LOG_DEVEL_ON_ROOT("MESA_PD_KernelLoadBalancing" );
+
+   //   logging::Logging::instance()->setStreamLogLevel(logging::Logging::INFO);
+   //   logging::Logging::instance()->setFileLogLevel(logging::Logging::INFO);
+
+   WALBERLA_LOG_INFO_ON_ROOT( "config file: " << argv[1] );
+   WALBERLA_LOG_INFO_ON_ROOT( "waLBerla Revision: " << WALBERLA_GIT_SHA1 );
+
+   math::seedRandomGenerator( static_cast<unsigned int>(1337 * mpiManager->worldRank()) );
+
+   std::map< std::string, walberla::int64_t > integerProperties;
+   std::map< std::string, double >            realProperties;
+   std::map< std::string, std::string >       stringProperties;
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** READING CONFIG FILE ***");
+   auto cfg = env.config();
+   if (cfg == nullptr) WALBERLA_ABORT("No config specified!");
+   const Config::BlockHandle mainConf  = cfg->getBlock( "GranularGas" );
+   Parameters params;
+   loadFromConfig(params, mainConf);
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** BLOCKFOREST ***");
+   // create forest
+   auto forest = blockforest::createBlockForestFromConfig( mainConf );
+   if (!forest)
+   {
+      WALBERLA_LOG_INFO_ON_ROOT( "No BlockForest created ... exiting!");
+      return EXIT_SUCCESS;
+   }
+
+   forest->recalculateBlockLevelsInRefresh( params.recalculateBlockLevelsInRefresh );
+   forest->alwaysRebalanceInRefresh( params.alwaysRebalanceInRefresh );
+   forest->reevaluateMinTargetLevelsAfterForcedRefinement( params.reevaluateMinTargetLevelsAfterForcedRefinement );
+   forest->allowRefreshChangingDepth( params.allowRefreshChangingDepth );
+
+   forest->allowMultipleRefreshCycles( params.allowMultipleRefreshCycles );
+   forest->checkForEarlyOutInRefresh( params.checkForEarlyOutInRefresh );
+   forest->checkForLateOutInRefresh( params.checkForLateOutInRefresh );
+
+   auto ic = make_shared<pe::InfoCollection>();
+
+   pe::amr::MinMaxLevelDetermination regrid(ic, params.regridMin, params.regridMax);
+   forest->setRefreshMinTargetLevelDeterminationFunction( regrid );
+
+   bool bRebalance = true;
+   if (params.LBAlgorithm == "None")
+   {
+      bRebalance = false;
+   } else if (params.LBAlgorithm == "Morton")
+   {
+      forest->setRefreshPhantomBlockDataAssignmentFunction( pe::amr::WeightAssignmentFunctor( ic, params.baseWeight ) );
+      forest->setRefreshPhantomBlockDataPackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+      forest->setRefreshPhantomBlockDataUnpackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+
+      auto prepFunc = blockforest::DynamicCurveBalance< pe::amr::WeightAssignmentFunctor::PhantomBlockWeight >( false, true, false );
+      prepFunc.setMaxBlocksPerProcess( params.maxBlocksPerProcess );
+      forest->setRefreshPhantomBlockMigrationPreparationFunction( prepFunc );
+   } else if (params.LBAlgorithm == "Hilbert")
+   {
+      forest->setRefreshPhantomBlockDataAssignmentFunction( pe::amr::WeightAssignmentFunctor( ic, params.baseWeight ) );
+      forest->setRefreshPhantomBlockDataPackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+      forest->setRefreshPhantomBlockDataUnpackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+
+      auto prepFunc = blockforest::DynamicCurveBalance< pe::amr::WeightAssignmentFunctor::PhantomBlockWeight >( true, true, false );
+      prepFunc.setMaxBlocksPerProcess( params.maxBlocksPerProcess );
+      forest->setRefreshPhantomBlockMigrationPreparationFunction( prepFunc );
+   } else if (params.LBAlgorithm == "Metis")
+   {
+      auto assFunc = pe::amr::MetisAssignmentFunctor( ic, params.baseWeight );
+      forest->setRefreshPhantomBlockDataAssignmentFunction( assFunc );
+      forest->setRefreshPhantomBlockDataPackFunction( pe::amr::MetisAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+      forest->setRefreshPhantomBlockDataUnpackFunction( pe::amr::MetisAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+
+      auto alg     = blockforest::DynamicParMetis::stringToAlgorithm(    params.metisAlgorithm );
+      auto vWeight = blockforest::DynamicParMetis::stringToWeightsToUse( params.metisWeightsToUse );
+      auto eWeight = blockforest::DynamicParMetis::stringToEdgeSource(   params.metisEdgeSource );
+
+      auto prepFunc = blockforest::DynamicParMetis( alg, vWeight, eWeight );
+      prepFunc.setipc2redist(params.metisipc2redist);
+      addParMetisPropertiesToSQL(prepFunc, integerProperties, realProperties, stringProperties);
+      forest->setRefreshPhantomBlockMigrationPreparationFunction( prepFunc );
+   } else if (params.LBAlgorithm == "Diffusive")
+   {
+      forest->setRefreshPhantomBlockDataAssignmentFunction( pe::amr::WeightAssignmentFunctor( ic, params.baseWeight ) );
+      forest->setRefreshPhantomBlockDataPackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+      forest->setRefreshPhantomBlockDataUnpackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+      auto prepFunc = blockforest::DynamicDiffusionBalance< pe::amr::WeightAssignmentFunctor::PhantomBlockWeight >( 1, 1, false );
+      //configure(cfg, prepFunc);
+      //addDynamicDiffusivePropertiesToSQL(prepFunc, integerProperties, realProperties, stringProperties);
+      forest->setRefreshPhantomBlockMigrationPreparationFunction(prepFunc);
+   } else
+   {
+      WALBERLA_ABORT("Unknown LBAlgorithm: " << params.LBAlgorithm);
+   }
+
+   auto domain = std::make_shared<domain::BlockForestDomain>(forest);
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** SETUP - START ***");
+
+   //init data structures
+   auto ps = std::make_shared<data::ParticleStorage>(100);
+   auto ss = std::make_shared<data::ShapeStorage>();
+   ParticleAccessorWithShape accessor(ps, ss);
+   auto lc = std::make_shared<data::LinkedCells>(domain->getUnionOfLocalAABBs().getExtended(params.spacing), params.spacing );
+   forest->addBlockData(domain::createBlockForestDataHandling(ps), "Storage");
+
+   auto center = forest->getDomain().center();
+   auto  smallSphere = ss->create<data::Sphere>( params.radius );
+   ss->shapes[smallSphere]->updateMassAndInertia(real_t(2707));
+   for (auto& iBlk : *forest)
+   {
+      for (auto pt : grid_generator::SCGrid(iBlk.getAABB(),
+                                            Vector3<real_t>(params.spacing) * real_c(0.5) + params.shift,
+                                            params.spacing))
+      {
+         WALBERLA_CHECK(iBlk.getAABB().contains(pt));
+         auto tmp = dot( (pt - center), params.normal );
+         if (tmp < 0)
+         {
+            createSphere(*ps, pt, params.radius, smallSphere);
+         }
+      }
+   }
+   int64_t numParticles = int64_c(ps->size());
+   walberla::mpi::reduceInplace(numParticles, walberla::mpi::SUM);
+   WALBERLA_LOG_INFO_ON_ROOT("#particles created: " << numParticles);
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** SETUP - END ***");
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** VTK ***");
+   auto vtkDomainOutput = walberla::vtk::createVTKOutput_DomainDecomposition( forest, "domain_decomposition", 1, params.vtk_out, "simulation_step" );
+   auto vtkOutput       = make_shared<mesa_pd::vtk::ParticleVtkOutput>(ps) ;
+   auto vtkWriter       = walberla::vtk::createVTKOutput_PointData(vtkOutput, "Bodies", 1, params.vtk_out, "simulation_step", false, false);
+   vtkOutput->addOutput<SelectRank>("rank");
+   vtkOutput->addOutput<data::SelectParticleOwner>("owner");
+   vtkOutput->addOutput<SelectIdx>("idx");
+   //   vtkDomainOutput->write();
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** SIMULATION - START ***");
+   // Init kernels
+   kernel::ExplicitEulerWithShape        explicitEulerWithShape( params.dt );
+   kernel::InsertParticleIntoLinkedCells ipilc;
+   kernel::SpringDashpot                 dem(1);
+   dem.setStiffness(0, 0, real_t(0));
+   dem.setDampingN (0, 0, real_t(0));
+   dem.setDampingT (0, 0, real_t(0));
+   dem.setFriction (0, 0, real_t(0));
+   collision_detection::AnalyticContactDetection              acd;
+   kernel::AssocToBlock                  assoc(forest);
+   kernel::DoubleCast                    double_cast;
+   mpi::ContactFilter                    contact_filter;
+   mpi::ReduceProperty                   RP;
+   mpi::SyncNextNeighborsBlockForest     SNN;
+   std::vector<Contact>                  contacts;
+   contacts.reserve(4000000);
+
+   // initial sync
+   ps->forEachParticle(true, kernel::SelectLocal(), accessor, assoc, accessor);
+   SNN(*ps, forest, domain);
+   sortParticleStorage(*ps, params.sorting, lc->domain_, uint_c(lc->numCellsPerDim_[0]));
+   //   vtkWriter->write();
+
+   WcTimingPool tpImbalanced;
+   WcTimingPool tpBalanced;
+   WcTimer      timerLoadBalancing;
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** RUNNING UNBALANCED SIMULATION ***");
+   WALBERLA_MPI_BARRIER();
+   tpImbalanced["AssocToBlock"].start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      ps->forEachParticle(true, kernel::SelectLocal(), accessor, assoc, accessor);
+   }
+   tpImbalanced["AssocToBlock"].end();
+
+   WALBERLA_MPI_BARRIER();
+   tpImbalanced["GenerateLinkedCells"].start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      lc->clear();
+      ps->forEachParticle(true, kernel::SelectAll(), accessor, ipilc, accessor, *lc);
+   }
+   tpImbalanced["GenerateLinkedCells"].end();
+
+   int64_t imbalancedContactsChecked  = 0;
+   int64_t imbalancedContactsDetected = 0;
+   int64_t imbalancedContactsTreated  = 0;
+   WALBERLA_MPI_BARRIER();
+   tpImbalanced["ContactDetection"].start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      contacts.clear();
+      imbalancedContactsChecked  = 0;
+      imbalancedContactsDetected = 0;
+      imbalancedContactsTreated  = 0;
+      lc->forEachParticlePairHalf(true,
+                                  kernel::SelectAll(),
+                                  accessor,
+                                  [&](const size_t idx1, const size_t idx2, auto& ac)
+      {
+         ++imbalancedContactsChecked;
+         if (double_cast(idx1, idx2, ac, acd, ac ))
+         {
+            ++imbalancedContactsDetected;
+            if (contact_filter(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), *domain))
+            {
+               ++imbalancedContactsTreated;
+               contacts.emplace_back(acd.getIdx1(), acd.getIdx2(), acd.getContactPoint(), acd.getContactNormal(), acd.getPenetrationDepth());
+            }
+         }
+      },
+      accessor );
+   }
+   tpImbalanced["ContactDetection"].end();
+
+   WALBERLA_MPI_BARRIER();
+   tpImbalanced["DEM"].start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      for (auto& c : contacts)
+      {
+         dem(c.idx1_, c.idx2_, accessor, c.contactPoint_, c.contactNormal_, c.penetrationDepth_);
+      }
+   }
+   tpImbalanced["DEM"].end();
+
+   WALBERLA_MPI_BARRIER();
+   tpImbalanced["ReduceForce"].start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      RP.operator()<ForceTorqueNotification>(*ps);
+   }
+   tpImbalanced["ReduceForce"].end();
+
+   WALBERLA_MPI_BARRIER();
+   tpImbalanced["Euler"].start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      ps->forEachParticle(true, kernel::SelectLocal(), accessor, explicitEulerWithShape, accessor);
+   }
+   tpImbalanced["Euler"].end();
+
+   WALBERLA_MPI_BARRIER();
+   tpImbalanced["SNN"].start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      SNN(*ps, forest, domain);
+   }
+   tpImbalanced["SNN"].end();
+
+   auto SNNBytesSent     = SNN.getBytesSent();
+   auto SNNBytesReceived = SNN.getBytesReceived();
+   auto SNNSends         = SNN.getNumberOfSends();
+   auto SNNReceives      = SNN.getNumberOfReceives();
+   auto RPBytesSent      = RP.getBytesSent();
+   auto RPBytesReceived  = RP.getBytesReceived();
+   auto RPSends          = RP.getNumberOfSends();
+   auto RPReceives       = RP.getNumberOfReceives();
+   walberla::mpi::reduceInplace(SNNBytesSent, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(SNNBytesReceived, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(SNNSends, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(SNNReceives, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(RPBytesSent, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(RPBytesReceived, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(RPSends, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(RPReceives, walberla::mpi::SUM);
+   auto cC = walberla::mpi::reduce(imbalancedContactsChecked, walberla::mpi::SUM);
+   auto cD = walberla::mpi::reduce(imbalancedContactsDetected, walberla::mpi::SUM);
+   auto cT = walberla::mpi::reduce(imbalancedContactsTreated, walberla::mpi::SUM);
+   WALBERLA_LOG_DEVEL_ON_ROOT( "SNN bytes communicated:   " << SNNBytesSent << " / " << SNNBytesReceived );
+   WALBERLA_LOG_DEVEL_ON_ROOT( "SNN communication partners: " << SNNSends << " / " << SNNReceives );
+   WALBERLA_LOG_DEVEL_ON_ROOT( "RP bytes communicated:  " << RPBytesSent << " / " << RPBytesReceived );
+   WALBERLA_LOG_DEVEL_ON_ROOT( "RP communication partners: " << RPSends << " / " << RPReceives );
+   WALBERLA_LOG_DEVEL_ON_ROOT( "contacts checked/detected/treated: " << cC << " / " << cD << " / " << cT );
+   auto minLinkedCells = walberla::mpi::reduce(lc->cells_.size(), walberla::mpi::MIN);
+   auto maxLinkedCells = walberla::mpi::reduce(lc->cells_.size(), walberla::mpi::MAX);
+   WALBERLA_LOG_DEVEL_ON_ROOT( "linked cells: " << minLinkedCells << " / " << maxLinkedCells );
+
+   vtkDomainOutput->write( );
+   vtkWriter->write();
+   WALBERLA_MPI_BARRIER();
+   timerLoadBalancing.start();
+   if (bRebalance)
+   {
+      WALBERLA_LOG_INFO_ON_ROOT("*** RUNNING LOAD BALANCING ***");
+      domain::createWithNeighborhood( accessor, *forest, *ic );
+      for (auto pIt = ps->begin(); pIt != ps->end(); )
+      {
+         using namespace walberla::mesa_pd::data::particle_flags;
+         if (isSet(pIt->getFlags(), GHOST))
+         {
+            pIt = ps->erase(pIt);
+         } else
+         {
+            pIt->getGhostOwnersRef().clear();
+            ++pIt;
+         }
+      }
+      forest->refresh();
+      domain->refresh();
+      lc = std::make_shared<data::LinkedCells>(domain->getUnionOfLocalAABBs().getExtended(params.spacing), params.spacing );
+      ps->forEachParticle(true, kernel::SelectLocal(), accessor, assoc, accessor);
+      SNN(*ps, forest, domain);
+      sortParticleStorage(*ps, params.sorting, lc->domain_, uint_c(lc->numCellsPerDim_[0]));
+   }
+   timerLoadBalancing.end();
+   vtkDomainOutput->write( );
+   vtkWriter->write();
+
+   WALBERLA_MPI_BARRIER();
+   WALBERLA_LOG_INFO_ON_ROOT("*** RUNNING BALANCED SIMULATION ***");
+
+   WALBERLA_MPI_BARRIER();
+   tpBalanced["AssocToBlock"].start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      ps->forEachParticle(true, kernel::SelectLocal(), accessor, assoc, accessor);
+   }
+   tpBalanced["AssocToBlock"].end();
+
+   WALBERLA_MPI_BARRIER();
+   tpBalanced["GenerateLinkedCells"].start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      lc->clear();
+      ps->forEachParticle(true, kernel::SelectAll(), accessor, ipilc, accessor, *lc);
+   }
+   tpBalanced["GenerateLinkedCells"].end();
+
+   int64_t balancedContactsChecked  = 0;
+   int64_t balancedContactsDetected = 0;
+   int64_t balancedContactsTreated  = 0;
+   WALBERLA_MPI_BARRIER();
+   tpBalanced["ContactDetection"].start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      contacts.clear();
+      balancedContactsChecked  = 0;
+      balancedContactsDetected = 0;
+      balancedContactsTreated  = 0;
+      lc->forEachParticlePairHalf(true,
+                                  kernel::SelectAll(),
+                                  accessor,
+                                  [&](const size_t idx1, const size_t idx2, auto& ac)
+      {
+         ++balancedContactsChecked;
+         if (double_cast(idx1, idx2, ac, acd, ac ))
+         {
+            ++balancedContactsDetected;
+            if (contact_filter(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), *domain))
+            {
+               ++balancedContactsTreated;
+               contacts.emplace_back(acd.getIdx1(), acd.getIdx2(), acd.getContactPoint(), acd.getContactNormal(), acd.getPenetrationDepth());
+            }
+         }
+      },
+      accessor );
+   }
+   tpBalanced["ContactDetection"].end();
+
+   WALBERLA_MPI_BARRIER();
+   tpBalanced["DEM"].start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      for (auto& c : contacts)
+      {
+         dem(c.idx1_, c.idx2_, accessor, c.contactPoint_, c.contactNormal_, c.penetrationDepth_);
+      }
+   }
+   tpBalanced["DEM"].end();
+
+   WALBERLA_MPI_BARRIER();
+   tpBalanced["ReduceForce"].start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      RP.operator()<ForceTorqueNotification>(*ps);
+   }
+   tpBalanced["ReduceForce"].end();
+
+   WALBERLA_MPI_BARRIER();
+   tpBalanced["Euler"].start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      ps->forEachParticle(true, kernel::SelectLocal(), accessor, explicitEulerWithShape, accessor);
+   }
+   tpBalanced["Euler"].end();
+
+   WALBERLA_MPI_BARRIER();
+   tpBalanced["SNN"].start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      SNN(*ps, forest, domain);
+   }
+   tpBalanced["SNN"].end();
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** SIMULATION - END ***");
+
+   if (params.checkSimulation)
+   {
+      check(*ps, *forest, params.spacing);
+   }
+
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** SQL OUTPUT - START ***");
+
+   SNNBytesSent     = SNN.getBytesSent();
+   SNNBytesReceived = SNN.getBytesReceived();
+   SNNSends         = SNN.getNumberOfSends();
+   SNNReceives      = SNN.getNumberOfReceives();
+   RPBytesSent      = RP.getBytesSent();
+   RPBytesReceived  = RP.getBytesReceived();
+   RPSends          = RP.getNumberOfSends();
+   RPReceives       = RP.getNumberOfReceives();
+   walberla::mpi::reduceInplace(SNNBytesSent, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(SNNBytesReceived, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(SNNSends, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(SNNReceives, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(RPBytesSent, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(RPBytesReceived, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(RPSends, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(RPReceives, walberla::mpi::SUM);
+   cC = walberla::mpi::reduce(balancedContactsChecked, walberla::mpi::SUM);
+   cD = walberla::mpi::reduce(balancedContactsDetected, walberla::mpi::SUM);
+   cT = walberla::mpi::reduce(balancedContactsTreated, walberla::mpi::SUM);
+   WALBERLA_LOG_DEVEL_ON_ROOT( "SNN bytes communicated:   " << SNNBytesSent << " / " << SNNBytesReceived );
+   WALBERLA_LOG_DEVEL_ON_ROOT( "SNN communication partners: " << SNNSends << " / " << SNNReceives );
+   WALBERLA_LOG_DEVEL_ON_ROOT( "RP bytes communicated:  " << RPBytesSent << " / " << RPBytesReceived );
+   WALBERLA_LOG_DEVEL_ON_ROOT( "RP communication partners: " << RPSends << " / " << RPReceives );
+   WALBERLA_LOG_DEVEL_ON_ROOT( "contacts checked/detected/treated: " << cC << " / " << cD << " / " << cT );
+   minLinkedCells = walberla::mpi::reduce(lc->cells_.size(), walberla::mpi::MIN);
+   maxLinkedCells = walberla::mpi::reduce(lc->cells_.size(), walberla::mpi::MAX);
+   WALBERLA_LOG_DEVEL_ON_ROOT( "linked cells: " << minLinkedCells << " / " << maxLinkedCells );
+
+   auto tpImbalancedReduced = tpImbalanced.getReduced();
+   WALBERLA_LOG_INFO_ON_ROOT(*tpImbalancedReduced);
+
+   auto tpBalancedReduced = tpBalanced.getReduced();
+   WALBERLA_LOG_INFO_ON_ROOT(*tpBalancedReduced);
+
+   auto timerLoadBalancingReduced = walberla::timing::getReduced(timerLoadBalancing, REDUCE_TOTAL, 0);
+
+   numParticles = 0;
+   int64_t numGhostParticles = 0;
+   ps->forEachParticle(false,
+                       kernel::SelectAll(),
+                       accessor,
+                       [&numParticles, &numGhostParticles](const size_t idx, auto& ac)
+   {
+      if (data::particle_flags::isSet( ac.getFlagsRef(idx), data::particle_flags::GHOST))
+      {
+         ++numGhostParticles;
+      } else
+      {
+         ++numParticles;
+      }
+   },
+   accessor);
+   auto minParticles = walberla::mpi::reduce(numParticles, walberla::mpi::MIN);
+   auto maxParticles = walberla::mpi::reduce(numParticles, walberla::mpi::MAX);
+   WALBERLA_LOG_DEVEL_ON_ROOT("particle ratio: " << minParticles << " / " << maxParticles);
+   walberla::mpi::reduceInplace(numParticles, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(numGhostParticles, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(imbalancedContactsChecked, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(imbalancedContactsDetected, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(imbalancedContactsTreated, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(balancedContactsChecked, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(balancedContactsDetected, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(balancedContactsTreated, walberla::mpi::SUM);
+   double linkedCellsVolume = lc->domain_.volume();
+   walberla::mpi::reduceInplace(linkedCellsVolume, walberla::mpi::SUM);
+   size_t numLinkedCells = lc->cells_.size();
+   walberla::mpi::reduceInplace(numLinkedCells, walberla::mpi::SUM);
+   size_t local_aabbs         = domain->getNumLocalAABBs();
+   size_t neighbor_subdomains = domain->getNumNeighborSubdomains();
+   size_t neighbor_processes  = domain->getNumNeighborProcesses();
+   walberla::mpi::reduceInplace(local_aabbs, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(neighbor_subdomains, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(neighbor_processes, walberla::mpi::SUM);
+
+   uint_t runId = uint_c(-1);
+   WALBERLA_ROOT_SECTION()
+   {
+      stringProperties["walberla_git"]                  = WALBERLA_GIT_SHA1;
+      stringProperties["tag"]                           = "mesa_pd";
+      integerProperties["mpi_num_processes"]            = mpiManager->numProcesses();
+      integerProperties["omp_max_threads"]              = omp_get_max_threads();
+      integerProperties["num_particles"]                = numParticles;
+      integerProperties["num_ghost_particles"]          = numGhostParticles;
+      integerProperties["minParticles"]                 = minParticles;
+      integerProperties["maxParticles"]                 = maxParticles;
+      integerProperties["imbalancedContactsChecked"]    = imbalancedContactsChecked;
+      integerProperties["imbalancedContactsDetected"]   = imbalancedContactsDetected;
+      integerProperties["imbalancedContactsTreated"]    = imbalancedContactsTreated;
+      integerProperties["balancedContactsChecked"]      = balancedContactsChecked;
+      integerProperties["balancedContactsDetected"]     = balancedContactsDetected;
+      integerProperties["balancedContactsTreated"]      = balancedContactsTreated;
+      realProperties["loadbalancing_timer_min"]         = timerLoadBalancingReduced->min();
+      realProperties["loadbalancing_timer_max"]         = timerLoadBalancingReduced->max();
+      realProperties["loadbalancing_timer_average"]     = timerLoadBalancingReduced->average();
+      realProperties["loadbalancing_timer_total"]       = timerLoadBalancingReduced->total();
+      integerProperties["local_aabbs"]                  = int64_c(local_aabbs);
+      integerProperties["neighbor_subdomains"]          = int64_c(neighbor_subdomains);
+      integerProperties["neighbor_processes"]           = int64_c(neighbor_processes);
+      integerProperties["SNNBytesSent"]                 = SNNBytesSent;
+      integerProperties["SNNBytesReceived"]             = SNNBytesReceived;
+      integerProperties["SNNSends"]                     = SNNSends;
+      integerProperties["SNNReceives"]                  = SNNReceives;
+      integerProperties["RPBytesSent"]                  = RPBytesSent;
+      integerProperties["RPBytesReceived"]              = RPBytesReceived;
+      integerProperties["RPSends"]                      = RPSends;
+      integerProperties["RPReceives"]                   = RPReceives;
+      realProperties["linkedCellsVolume"]               = linkedCellsVolume;
+      integerProperties["numLinkedCells"]               = int64_c(numLinkedCells);
+      integerProperties["minLinkedCells"]               = int64_c(minLinkedCells);
+      integerProperties["maxLinkedCells"]               = int64_c(maxLinkedCells);
+
+      addBuildInfoToSQL( integerProperties, realProperties, stringProperties );
+      saveToSQL(params, integerProperties, realProperties, stringProperties );
+      addDomainPropertiesToSQL(*forest, integerProperties, realProperties, stringProperties);
+      addSlurmPropertiesToSQL(integerProperties, realProperties, stringProperties);
+
+      runId = sqlite::storeRunInSqliteDB( params.sqlFile, integerProperties, stringProperties, realProperties );
+      sqlite::storeTimingPoolInSqliteDB( params.sqlFile, runId, *tpImbalancedReduced, "imbalanced" );
+      sqlite::storeTimingPoolInSqliteDB( params.sqlFile, runId, *tpBalancedReduced, "balanced" );
+   }
+   if (params.storeNodeTimings)
+   {
+      storeNodeTimings(runId, params.sqlFile, "NodeTimingImbalanced", tpImbalanced);
+      storeNodeTimings(runId, params.sqlFile, "NodeTimingBalanced", tpBalanced);
+   }
+   WALBERLA_LOG_INFO_ON_ROOT("*** SQL OUTPUT - END ***");
+
+   return EXIT_SUCCESS;
+}
+
+} // namespace mesa_pd
+} // namespace walberla
+
+int main( int argc, char* argv[] )
+{
+   return walberla::mesa_pd::main( argc, argv );
+}
diff --git a/apps/benchmarks/GranularGas/MESA_PD_LoadBalancing.cpp b/apps/benchmarks/GranularGas/MESA_PD_LoadBalancing.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..856edfc886c0fa2588919325148f825495a4d319
--- /dev/null
+++ b/apps/benchmarks/GranularGas/MESA_PD_LoadBalancing.cpp
@@ -0,0 +1,598 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file   MESA_PD_LoadBalancing.cpp
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+#include "Accessor.h"
+#include "check.h"
+#include "Contact.h"
+#include "CreateParticles.h"
+#include "NodeTimings.h"
+#include "Parameters.h"
+#include "SelectProperty.h"
+#include "sortParticleStorage.h"
+#include "SQLProperties.h"
+
+#include <mesa_pd/vtk/ParticleVtkOutput.h>
+
+#include <mesa_pd/collision_detection/AnalyticContactDetection.h>
+#include <mesa_pd/data/LinkedCells.h>
+#include <mesa_pd/data/ParticleAccessor.h>
+#include <mesa_pd/data/ParticleStorage.h>
+#include <mesa_pd/data/ShapeStorage.h>
+#include <mesa_pd/data/STLOverloads.h>
+#include <mesa_pd/domain/BlockForestDataHandling.h>
+#include <mesa_pd/domain/BlockForestDomain.h>
+#include <mesa_pd/domain/InfoCollection.h>
+#include <mesa_pd/kernel/AssocToBlock.h>
+#include <mesa_pd/kernel/DoubleCast.h>
+#include <mesa_pd/kernel/ExplicitEulerWithShape.h>
+#include <mesa_pd/kernel/InsertParticleIntoLinkedCells.h>
+#include <mesa_pd/kernel/ParticleSelector.h>
+#include <mesa_pd/kernel/SpringDashpot.h>
+#include <mesa_pd/mpi/ContactFilter.h>
+#include <mesa_pd/mpi/ReduceProperty.h>
+#include <mesa_pd/mpi/SyncNextNeighbors.h>
+#include <mesa_pd/mpi/SyncNextNeighborsBlockForest.h>
+
+#include <mesa_pd/mpi/notifications/ForceTorqueNotification.h>
+
+#include <blockforest/BlockForest.h>
+#include <blockforest/Initialization.h>
+#include <blockforest/loadbalancing/DynamicCurve.h>
+#include <blockforest/loadbalancing/DynamicParMetis.h>
+#include <blockforest/loadbalancing/PODPhantomData.h>
+#include <core/Abort.h>
+#include <core/Environment.h>
+#include <core/math/Random.h>
+#include <core/mpi/Reduce.h>
+#include <core/mpi/MPITextFile.h>
+#include <core/grid_generator/SCIterator.h>
+#include <core/logging/Logging.h>
+#include <core/OpenMP.h>
+#include <core/timing/Timer.h>
+#include <core/waLBerlaBuildInfo.h>
+#include <pe/amr/level_determination/MinMaxLevelDetermination.h>
+#include <pe/amr/weight_assignment/MetisAssignmentFunctor.h>
+#include <pe/amr/weight_assignment/WeightAssignmentFunctor.h>
+#include <sqlite/SQLite.h>
+#include <vtk/VTKOutput.h>
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <type_traits>
+
+namespace walberla {
+namespace mesa_pd {
+
+int main( int argc, char ** argv )
+{
+   using namespace walberla::timing;
+
+   Environment env(argc, argv);
+   auto mpiManager = walberla::mpi::MPIManager::instance();
+   mpiManager->useWorldComm();
+
+   WALBERLA_LOG_DEVEL_ON_ROOT("MESA_PD_LoadBalancing" );
+
+   //   logging::Logging::instance()->setStreamLogLevel(logging::Logging::INFO);
+   //   logging::Logging::instance()->includeLoggingToFile("LoadBalancing");
+   //   logging::Logging::instance()->setFileLogLevel(logging::Logging::DETAIL);
+
+   WALBERLA_LOG_INFO_ON_ROOT( "config file: " << argv[1] );
+   WALBERLA_LOG_INFO_ON_ROOT( "waLBerla Revision: " << WALBERLA_GIT_SHA1 );
+
+   math::seedRandomGenerator( static_cast<unsigned int>(1337 * mpiManager->worldRank()) );
+
+   std::map< std::string, walberla::int64_t > integerProperties;
+   std::map< std::string, double >            realProperties;
+   std::map< std::string, std::string >       stringProperties;
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** READING CONFIG FILE ***");
+   auto cfg = env.config();
+   if (cfg == nullptr) WALBERLA_ABORT("No config specified!");
+   const Config::BlockHandle mainConf  = cfg->getBlock( "GranularGas" );
+
+   Parameters params;
+   loadFromConfig(params, mainConf);
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** BLOCKFOREST ***");
+   // create forest
+   auto forest = blockforest::createBlockForestFromConfig( mainConf );
+   if (!forest)
+   {
+      WALBERLA_LOG_INFO_ON_ROOT( "No BlockForest created ... exiting!");
+      return EXIT_SUCCESS;
+   }
+
+   forest->recalculateBlockLevelsInRefresh( params.recalculateBlockLevelsInRefresh );
+   forest->alwaysRebalanceInRefresh( params.alwaysRebalanceInRefresh );
+   forest->reevaluateMinTargetLevelsAfterForcedRefinement( params.reevaluateMinTargetLevelsAfterForcedRefinement );
+   forest->allowRefreshChangingDepth( params.allowRefreshChangingDepth );
+
+   forest->allowMultipleRefreshCycles( params.allowMultipleRefreshCycles );
+   forest->checkForEarlyOutInRefresh( params.checkForEarlyOutInRefresh );
+   forest->checkForLateOutInRefresh( params.checkForLateOutInRefresh );
+
+   auto ic = make_shared<pe::InfoCollection>();
+
+   pe::amr::MinMaxLevelDetermination regrid(ic, params.regridMin, params.regridMax);
+   forest->setRefreshMinTargetLevelDeterminationFunction( regrid );
+
+   bool bRebalance = true;
+   if (params.LBAlgorithm == "None")
+   {
+      bRebalance = false;
+   } else if (params.LBAlgorithm == "Morton")
+   {
+      forest->setRefreshPhantomBlockDataAssignmentFunction( pe::amr::WeightAssignmentFunctor( ic, params.baseWeight ) );
+      forest->setRefreshPhantomBlockDataPackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+      forest->setRefreshPhantomBlockDataUnpackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+
+      auto prepFunc = blockforest::DynamicCurveBalance< pe::amr::WeightAssignmentFunctor::PhantomBlockWeight >( false, true, false );
+      prepFunc.setMaxBlocksPerProcess( params.maxBlocksPerProcess );
+      forest->setRefreshPhantomBlockMigrationPreparationFunction( prepFunc );
+   } else if (params.LBAlgorithm == "Hilbert")
+   {
+      forest->setRefreshPhantomBlockDataAssignmentFunction( pe::amr::WeightAssignmentFunctor( ic, params.baseWeight ) );
+      forest->setRefreshPhantomBlockDataPackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+      forest->setRefreshPhantomBlockDataUnpackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+
+      auto prepFunc = blockforest::DynamicCurveBalance< pe::amr::WeightAssignmentFunctor::PhantomBlockWeight >( true, true, false );
+      prepFunc.setMaxBlocksPerProcess( params.maxBlocksPerProcess );
+      forest->setRefreshPhantomBlockMigrationPreparationFunction( prepFunc );
+   } else if (params.LBAlgorithm == "Metis")
+   {
+      auto assFunc = pe::amr::MetisAssignmentFunctor( ic, params.baseWeight );
+      forest->setRefreshPhantomBlockDataAssignmentFunction( assFunc );
+      forest->setRefreshPhantomBlockDataPackFunction( pe::amr::MetisAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+      forest->setRefreshPhantomBlockDataUnpackFunction( pe::amr::MetisAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+
+      auto alg     = blockforest::DynamicParMetis::stringToAlgorithm(    params.metisAlgorithm );
+      auto vWeight = blockforest::DynamicParMetis::stringToWeightsToUse( params.metisWeightsToUse );
+      auto eWeight = blockforest::DynamicParMetis::stringToEdgeSource(   params.metisEdgeSource );
+
+      auto prepFunc = blockforest::DynamicParMetis( alg, vWeight, eWeight );
+      prepFunc.setipc2redist(params.metisipc2redist);
+      addParMetisPropertiesToSQL(prepFunc, integerProperties, realProperties, stringProperties);
+      forest->setRefreshPhantomBlockMigrationPreparationFunction( prepFunc );
+   } else if (params.LBAlgorithm == "Diffusive")
+   {
+      forest->setRefreshPhantomBlockDataAssignmentFunction( pe::amr::WeightAssignmentFunctor( ic, params.baseWeight ) );
+      forest->setRefreshPhantomBlockDataPackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+      forest->setRefreshPhantomBlockDataUnpackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+      auto prepFunc = blockforest::DynamicDiffusionBalance< pe::amr::WeightAssignmentFunctor::PhantomBlockWeight >( 1, 1, false );
+      //configure(cfg, prepFunc);
+      //addDynamicDiffusivePropertiesToSQL(prepFunc, integerProperties, realProperties, stringProperties);
+      forest->setRefreshPhantomBlockMigrationPreparationFunction(prepFunc);
+   } else
+   {
+      WALBERLA_ABORT("Unknown LBAlgorithm: " << params.LBAlgorithm);
+   }
+
+   auto domain = std::make_shared<domain::BlockForestDomain>(forest);
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** SETUP - START ***");
+
+   //init data structures
+   auto ps = std::make_shared<data::ParticleStorage>(100);
+   auto ss = std::make_shared<data::ShapeStorage>();
+   ParticleAccessorWithShape accessor(ps, ss);
+   auto lc = std::make_shared<data::LinkedCells>(domain->getUnionOfLocalAABBs().getExtended(params.spacing), params.spacing );
+   forest->addBlockData(domain::createBlockForestDataHandling(ps), "Storage");
+
+   auto center = forest->getDomain().center();
+   auto  smallSphere = ss->create<data::Sphere>( params.radius );
+   ss->shapes[smallSphere]->updateMassAndInertia(real_t(2707));
+   for (auto& iBlk : *forest)
+   {
+      for (auto pt : grid_generator::SCGrid(iBlk.getAABB(),
+                                            Vector3<real_t>(params.spacing) * real_c(0.5) + params.shift,
+                                            params.spacing))
+      {
+         WALBERLA_CHECK(iBlk.getAABB().contains(pt));
+         auto tmp = dot( (pt - center), params.normal );
+         if (tmp < 0)
+         {
+            createSphere(*ps, pt, params.radius, smallSphere);
+         }
+      }
+   }
+   int64_t numParticles = int64_c(ps->size());
+   walberla::mpi::reduceInplace(numParticles, walberla::mpi::SUM);
+   WALBERLA_LOG_INFO_ON_ROOT("#particles created: " << numParticles);
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** SETUP - END ***");
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** VTK ***");
+   auto vtkDomainOutput = walberla::vtk::createVTKOutput_DomainDecomposition( forest, "domain_decomposition", 1, params.vtk_out, "simulation_step" );
+   auto vtkOutput       = make_shared<mesa_pd::vtk::ParticleVtkOutput>(ps) ;
+   auto vtkWriter       = walberla::vtk::createVTKOutput_PointData(vtkOutput, "Bodies", 1, params.vtk_out, "simulation_step", false, false);
+   vtkOutput->addOutput<SelectRank>("rank");
+   vtkOutput->addOutput<data::SelectParticleOwner>("owner");
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** SIMULATION - START ***");
+   // Init kernels
+   kernel::ExplicitEulerWithShape        explicitEulerWithShape( params.dt );
+   kernel::InsertParticleIntoLinkedCells ipilc;
+   kernel::SpringDashpot                 dem(1);
+   dem.setStiffness(0, 0, real_t(0));
+   dem.setDampingN (0, 0, real_t(0));
+   dem.setDampingT (0, 0, real_t(0));
+   dem.setFriction (0, 0, real_t(0));
+   collision_detection::AnalyticContactDetection              acd;
+   kernel::AssocToBlock                  assoc(forest);
+   kernel::DoubleCast                    double_cast;
+   mpi::ContactFilter                    contact_filter;
+   mpi::ReduceProperty                   RP;
+   mpi::SyncNextNeighborsBlockForest     SNN;
+
+   // initial sync
+   ps->forEachParticle(true, kernel::SelectLocal(), accessor, assoc, accessor);
+   SNN(*ps, forest, domain);
+   sortParticleStorage(*ps, params.sorting, lc->domain_, uint_c(lc->numCellsPerDim_[0]));
+
+   WcTimer      timerImbalanced;
+   WcTimer      timerLoadBalancing;
+   WcTimer      timerBalanced;
+   WcTimingPool tpImbalanced;
+   WcTimingPool tpBalanced;
+
+   auto    SNNBytesSent     = SNN.getBytesSent();
+   auto    SNNBytesReceived = SNN.getBytesReceived();
+   auto    SNNSends         = SNN.getNumberOfSends();
+   auto    SNNReceives      = SNN.getNumberOfReceives();
+   auto    RPBytesSent      = RP.getBytesSent();
+   auto    RPBytesReceived  = RP.getBytesReceived();
+   auto    RPSends          = RP.getNumberOfSends();
+   auto    RPReceives       = RP.getNumberOfReceives();
+   int64_t imbalancedContactsChecked  = 0;
+   int64_t imbalancedContactsDetected = 0;
+   int64_t imbalancedContactsTreated  = 0;
+
+   WALBERLA_MPI_BARRIER();
+   WALBERLA_LOG_DEVEL_ON_ROOT("running imbalanced simulation");
+   timerImbalanced.start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      //WALBERLA_LOG_DEVEL_ON_ROOT("timestep: " << i << " / " << params.simulationSteps );
+      //         if (i % params.visSpacing == 0)
+      //         {
+      //            vtkWriter->write();
+      //         }
+
+      tpImbalanced["AssocToBlock"].start();
+      ps->forEachParticle(true, kernel::SelectLocal(), accessor, assoc, accessor);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpImbalanced["AssocToBlock"].end();
+
+      tpImbalanced["GenerateLinkedCells"].start();
+      lc->clear();
+      ps->forEachParticle(true, kernel::SelectAll(), accessor, ipilc, accessor, *lc);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpImbalanced["GenerateLinkedCells"].end();
+
+      tpImbalanced["DEM"].start();
+      imbalancedContactsChecked  = 0;
+      imbalancedContactsDetected = 0;
+      imbalancedContactsTreated  = 0;
+      lc->forEachParticlePairHalf(true,
+                                  kernel::SelectAll(),
+                                  accessor,
+                                  [&](const size_t idx1, const size_t idx2, auto& ac)
+      {
+         ++imbalancedContactsChecked;
+         if (double_cast(idx1, idx2, ac, acd, ac ))
+         {
+            ++imbalancedContactsDetected;
+            if (contact_filter(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), *domain))
+            {
+               ++imbalancedContactsTreated;
+               dem(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), acd.getContactNormal(), acd.getPenetrationDepth());
+            }
+         }
+      },
+      accessor );
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpImbalanced["DEM"].end();
+
+      tpImbalanced["ReduceForce"].start();
+      RP.operator()<ForceTorqueNotification>(*ps);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpImbalanced["ReduceForce"].end();
+
+      tpImbalanced["Euler"].start();
+      //ps->forEachParticle(false, [&](const size_t idx){WALBERLA_CHECK_EQUAL(ps->getForce(idx), Vec3(0,0,0), *(*ps)[idx] << "\n" << idx);});
+      ps->forEachParticle(true, kernel::SelectLocal(), accessor, explicitEulerWithShape, accessor);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpImbalanced["Euler"].end();
+
+      tpImbalanced["SNN"].start();
+      SNN(*ps, forest, domain);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpImbalanced["SNN"].end();
+   }
+   timerImbalanced.end();
+
+   vtkDomainOutput->write( );
+   vtkWriter->write();
+   WALBERLA_MPI_BARRIER();
+   timerLoadBalancing.start();
+   if (bRebalance)
+   {
+      WALBERLA_LOG_DEVEL_ON_ROOT("running load balancing");
+      domain::createWithNeighborhood( accessor, *forest, *ic );
+      for (auto pIt = ps->begin(); pIt != ps->end(); )
+      {
+         using namespace walberla::mesa_pd::data::particle_flags;
+         if (isSet(pIt->getFlags(), GHOST))
+         {
+            pIt = ps->erase(pIt);
+         } else
+         {
+            pIt->getGhostOwnersRef().clear();
+            ++pIt;
+         }
+      }
+      forest->refresh();
+      domain->refresh();
+      lc = std::make_shared<data::LinkedCells>(domain->getUnionOfLocalAABBs().getExtended(params.spacing), params.spacing );
+      ps->forEachParticle(true, kernel::SelectLocal(), accessor, assoc, accessor);
+      SNN(*ps, forest, domain);
+      sortParticleStorage(*ps, params.sorting, lc->domain_, uint_c(lc->numCellsPerDim_[0]));
+   }
+   timerLoadBalancing.end();
+   vtkDomainOutput->write( );
+   vtkWriter->write();
+
+   WALBERLA_MPI_BARRIER();
+   WALBERLA_LOG_DEVEL_ON_ROOT("running balanced simulation");
+   int64_t balancedContactsChecked  = 0;
+   int64_t balancedContactsDetected = 0;
+   int64_t balancedContactsTreated  = 0;
+   timerBalanced.start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      //WALBERLA_LOG_DEVEL_ON_ROOT("timestep: " << i << " / " << params.simulationSteps );
+      //         if (i % params.visSpacing == 0)
+      //         {
+      //            vtkWriter->write();
+      //         }
+
+      tpBalanced["AssocToBlock"].start();
+      ps->forEachParticle(true, kernel::SelectLocal(), accessor, assoc, accessor);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpBalanced["AssocToBlock"].end();
+
+      tpBalanced["GenerateLinkedCells"].start();
+      lc->clear();
+      ps->forEachParticle(true, kernel::SelectAll(), accessor, ipilc, accessor, *lc);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpBalanced["GenerateLinkedCells"].end();
+
+      tpBalanced["DEM"].start();
+      balancedContactsChecked  = 0;
+      balancedContactsDetected = 0;
+      balancedContactsTreated  = 0;
+      lc->forEachParticlePairHalf(true,
+                                  kernel::SelectAll(),
+                                  accessor,
+                                  [&](const size_t idx1, const size_t idx2, auto& ac)
+      {
+         ++balancedContactsChecked;
+         if (double_cast(idx1, idx2, ac, acd, ac ))
+         {
+            ++balancedContactsDetected;
+            if (contact_filter(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), *domain))
+            {
+               ++balancedContactsTreated;
+               dem(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), acd.getContactNormal(), acd.getPenetrationDepth());
+            }
+         }
+      },
+      accessor );
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpBalanced["DEM"].end();
+
+      tpBalanced["ReduceForce"].start();
+      RP.operator()<ForceTorqueNotification>(*ps);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpBalanced["ReduceForce"].end();
+
+      tpBalanced["Euler"].start();
+      //ps->forEachParticle(false, [&](const size_t idx){WALBERLA_CHECK_EQUAL(ps->getForce(idx), Vec3(0,0,0), *(*ps)[idx] << "\n" << idx);});
+      ps->forEachParticle(true, kernel::SelectLocal(), accessor, explicitEulerWithShape, accessor);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpBalanced["Euler"].end();
+
+      tpBalanced["SNN"].start();
+      SNN(*ps, forest, domain);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpBalanced["SNN"].end();
+   }
+   timerBalanced.end();
+
+   SNNBytesSent     = SNN.getBytesSent();
+   SNNBytesReceived = SNN.getBytesReceived();
+   SNNSends         = SNN.getNumberOfSends();
+   SNNReceives      = SNN.getNumberOfReceives();
+   RPBytesSent      = RP.getBytesSent();
+   RPBytesReceived  = RP.getBytesReceived();
+   RPSends          = RP.getNumberOfSends();
+   RPReceives       = RP.getNumberOfReceives();
+   walberla::mpi::reduceInplace(SNNBytesSent, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(SNNBytesReceived, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(SNNSends, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(SNNReceives, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(RPBytesSent, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(RPBytesReceived, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(RPSends, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(RPReceives, walberla::mpi::SUM);
+   auto cC = walberla::mpi::reduce(balancedContactsChecked, walberla::mpi::SUM);
+   auto cD = walberla::mpi::reduce(balancedContactsDetected, walberla::mpi::SUM);
+   auto cT = walberla::mpi::reduce(balancedContactsTreated, walberla::mpi::SUM);
+   WALBERLA_LOG_DEVEL_ON_ROOT( "SNN bytes communicated:   " << SNNBytesSent << " / " << SNNBytesReceived );
+   WALBERLA_LOG_DEVEL_ON_ROOT( "SNN communication partners: " << SNNSends << " / " << SNNReceives );
+   WALBERLA_LOG_DEVEL_ON_ROOT( "RP bytes communicated:  " << RPBytesSent << " / " << RPBytesReceived );
+   WALBERLA_LOG_DEVEL_ON_ROOT( "RP communication partners: " << RPSends << " / " << RPReceives );
+   WALBERLA_LOG_DEVEL_ON_ROOT( "contacts checked/detected/treated: " << cC << " / " << cD << " / " << cT );
+
+   auto timerImbalancedReduced = walberla::timing::getReduced(timerImbalanced, REDUCE_TOTAL, 0);
+   double PUpSImbalanced = 0.0;
+   WALBERLA_ROOT_SECTION()
+   {
+      WALBERLA_LOG_INFO_ON_ROOT("IMBALANCED " << *timerImbalancedReduced);
+      PUpSImbalanced = double_c(numParticles) * double_c(params.simulationSteps) / double_c(timerImbalancedReduced->max());
+      WALBERLA_LOG_INFO_ON_ROOT("PUpS: " << PUpSImbalanced);
+   }
+
+   auto timerBalancedReduced = walberla::timing::getReduced(timerBalanced, REDUCE_TOTAL, 0);
+   double PUpSBalanced = 0.0;
+   WALBERLA_ROOT_SECTION()
+   {
+      WALBERLA_LOG_INFO_ON_ROOT("BALANCED " << *timerBalancedReduced);
+      PUpSBalanced = double_c(numParticles) * double_c(params.simulationSteps) / double_c(timerBalancedReduced->max());
+      WALBERLA_LOG_INFO_ON_ROOT("PUpS: " << PUpSBalanced);
+   }
+
+   auto timerLoadBalancingReduced = walberla::timing::getReduced(timerLoadBalancing, REDUCE_TOTAL, 0);
+
+   auto tpImbalancedReduced = tpImbalanced.getReduced();
+   WALBERLA_LOG_INFO_ON_ROOT(*tpImbalancedReduced);
+
+   auto tpBalancedReduced = tpBalanced.getReduced();
+   WALBERLA_LOG_INFO_ON_ROOT(*tpBalancedReduced);
+   WALBERLA_LOG_INFO_ON_ROOT("*** SIMULATION - END ***");
+
+   if (params.checkSimulation)
+   {
+      check(*ps, *forest, params.spacing);
+   }
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** SQL OUTPUT - START ***");
+   numParticles = 0;
+   int64_t numGhostParticles = 0;
+   ps->forEachParticle(false,
+                       kernel::SelectAll(),
+                       accessor,
+                       [&numParticles, &numGhostParticles](const size_t idx, auto& ac)
+   {
+      if (data::particle_flags::isSet( ac.getFlagsRef(idx), data::particle_flags::GHOST))
+      {
+         ++numGhostParticles;
+      } else
+      {
+         ++numParticles;
+      }
+   },
+   accessor);
+   auto minParticles = walberla::mpi::reduce(numParticles, walberla::mpi::MIN);
+   auto maxParticles = walberla::mpi::reduce(numParticles, walberla::mpi::MAX);
+   WALBERLA_LOG_DEVEL_ON_ROOT("particle ratio: " << minParticles << " / " << maxParticles);
+   walberla::mpi::reduceInplace(numParticles, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(numGhostParticles, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(imbalancedContactsChecked, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(imbalancedContactsDetected, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(imbalancedContactsTreated, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(balancedContactsChecked, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(balancedContactsDetected, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(balancedContactsTreated, walberla::mpi::SUM);
+   double linkedCellsVolume = lc->domain_.volume();
+   walberla::mpi::reduceInplace(linkedCellsVolume, walberla::mpi::SUM);
+   size_t numLinkedCells = lc->cells_.size();
+   walberla::mpi::reduceInplace(numLinkedCells, walberla::mpi::SUM);
+   size_t local_aabbs         = domain->getNumLocalAABBs();
+   size_t neighbor_subdomains = domain->getNumNeighborSubdomains();
+   size_t neighbor_processes  = domain->getNumNeighborProcesses();
+   walberla::mpi::reduceInplace(local_aabbs, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(neighbor_subdomains, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(neighbor_processes, walberla::mpi::SUM);
+
+   uint_t runId = uint_c(-1);
+   WALBERLA_ROOT_SECTION()
+   {
+      stringProperties["walberla_git"]                  = WALBERLA_GIT_SHA1;
+      stringProperties["tag"]                           = "mesa_pd";
+      integerProperties["mpi_num_processes"]            = mpiManager->numProcesses();
+      integerProperties["omp_max_threads"]              = omp_get_max_threads();
+      realProperties["imbalanced_PUpS"]                 = double_c(PUpSImbalanced);
+      realProperties["imbalanced_timer_min"]            = timerImbalancedReduced->min();
+      realProperties["imbalanced_timer_max"]            = timerImbalancedReduced->max();
+      realProperties["imbalanced_timer_average"]        = timerImbalancedReduced->average();
+      realProperties["imbalanced_timer_total"]          = timerImbalancedReduced->total();
+      realProperties["loadbalancing_timer_min"]         = timerLoadBalancingReduced->min();
+      realProperties["loadbalancing_timer_max"]         = timerLoadBalancingReduced->max();
+      realProperties["loadbalancing_timer_average"]     = timerLoadBalancingReduced->average();
+      realProperties["loadbalancing_timer_total"]       = timerLoadBalancingReduced->total();
+      realProperties["balanced_PUpS"]                   = double_c(PUpSBalanced);
+      realProperties["balanced_timer_min"]              = timerBalancedReduced->min();
+      realProperties["balanced_timer_max"]              = timerBalancedReduced->max();
+      realProperties["balanced_timer_average"]          = timerBalancedReduced->average();
+      realProperties["balanced_timer_total"]            = timerBalancedReduced->total();
+      integerProperties["num_particles"]                = numParticles;
+      integerProperties["num_ghost_particles"]          = numGhostParticles;
+      integerProperties["minParticles"]                 = minParticles;
+      integerProperties["maxParticles"]                 = maxParticles;
+      integerProperties["imbalancedContactsChecked"]    = imbalancedContactsChecked;
+      integerProperties["imbalancedContactsDetected"]   = imbalancedContactsDetected;
+      integerProperties["imbalancedContactsTreated"]    = imbalancedContactsTreated;
+      integerProperties["balancedContactsChecked"]      = balancedContactsChecked;
+      integerProperties["balancedContactsDetected"]     = balancedContactsDetected;
+      integerProperties["balancedContactsTreated"]      = balancedContactsTreated;
+      integerProperties["local_aabbs"]                  = int64_c(local_aabbs);
+      integerProperties["neighbor_subdomains"]          = int64_c(neighbor_subdomains);
+      integerProperties["neighbor_processes"]           = int64_c(neighbor_processes);
+      integerProperties["SNNBytesSent"]                 = SNNBytesSent;
+      integerProperties["SNNBytesReceived"]             = SNNBytesReceived;
+      integerProperties["SNNSends"]                     = SNNSends;
+      integerProperties["SNNReceives"]                  = SNNReceives;
+      integerProperties["RPBytesSent"]                  = RPBytesSent;
+      integerProperties["RPBytesReceived"]              = RPBytesReceived;
+      integerProperties["RPSends"]                      = RPSends;
+      integerProperties["RPReceives"]                   = RPReceives;
+      realProperties["linkedCellsVolume"]               = linkedCellsVolume;
+      integerProperties["numLinkedCells"]               = int64_c(numLinkedCells);
+
+      addBuildInfoToSQL( integerProperties, realProperties, stringProperties );
+      saveToSQL(params, integerProperties, realProperties, stringProperties );
+      addDomainPropertiesToSQL(*forest, integerProperties, realProperties, stringProperties);
+      addSlurmPropertiesToSQL(integerProperties, realProperties, stringProperties);
+
+      runId = sqlite::storeRunInSqliteDB( params.sqlFile, integerProperties, stringProperties, realProperties );
+      sqlite::storeTimingPoolInSqliteDB( params.sqlFile, runId, *tpImbalancedReduced, "imbalanced" );
+      sqlite::storeTimingPoolInSqliteDB( params.sqlFile, runId, *tpImbalancedReduced, "balanced" );
+   }
+
+   if (params.storeNodeTimings)
+   {
+      storeNodeTimings(runId, params.sqlFile, "NodeTimingImbalanced", tpImbalanced);
+      storeNodeTimings(runId, params.sqlFile, "NodeTimingBalanced", tpBalanced);
+   }
+   WALBERLA_LOG_INFO_ON_ROOT("*** SQL OUTPUT - END ***");
+
+   return EXIT_SUCCESS;
+}
+
+} // namespace mesa_pd
+} // namespace walberla
+
+int main( int argc, char* argv[] )
+{
+   return walberla::mesa_pd::main( argc, argv );
+}
diff --git a/apps/benchmarks/GranularGas/PE_GranularGas.cpp b/apps/benchmarks/GranularGas/PE_GranularGas.cpp
index ea95b79cd81f042ca07517471c647ac6e1a8983b..dd75b61287aa8195495ab4d2e7160f3830674aca 100644
--- a/apps/benchmarks/GranularGas/PE_GranularGas.cpp
+++ b/apps/benchmarks/GranularGas/PE_GranularGas.cpp
@@ -90,7 +90,7 @@ int main( int argc, char ** argv )
    auto cfg = env.config();
    if (cfg == nullptr) WALBERLA_ABORT("No config specified!");
    const Config::BlockHandle mainConf  = cfg->getBlock( "GranularGas" );
-   Parameters params;
+   mesa_pd::Parameters params;
    loadFromConfig(params, mainConf);
 
    WALBERLA_LOG_INFO_ON_ROOT("*** GLOBALBODYSTORAGE ***");
@@ -174,9 +174,9 @@ int main( int argc, char ** argv )
    }
 
    WALBERLA_LOG_INFO_ON_ROOT("*** VTK ***");
-   auto vtkDomainOutput = vtk::createVTKOutput_DomainDecomposition( forest, "domain_decomposition", 1, "vtk_out", "simulation_step" );
+   auto vtkDomainOutput = vtk::createVTKOutput_DomainDecomposition( forest, "domain_decomposition", 1, params.vtk_out, "simulation_step" );
    auto vtkSphereHelper = make_shared<SphereVtkOutput>(storageID, *forest) ;
-   auto vtkSphereOutput = vtk::createVTKOutput_PointData(vtkSphereHelper, "Bodies", 1, "vtk_out", "simulation_step", false, false);
+   auto vtkSphereOutput = vtk::createVTKOutput_PointData(vtkSphereHelper, "Bodies", 1, params.vtk_out, "simulation_step", false, false);
 
    WALBERLA_LOG_INFO_ON_ROOT("*** SETUP - START ***");
    //const real_t   static_cof  ( real_c(0.1) / 2 );   // Coefficient of static friction. Note: pe doubles the input coefficient of friction for material-material contacts.
@@ -190,7 +190,7 @@ int main( int argc, char ** argv )
    for (auto& currentBlock : *forest)
    {
       for (auto it = grid_generator::SCIterator(currentBlock.getAABB().getIntersection(generationDomain),
-                                                Vector3<real_t>(params.spacing) * real_c(0.5),
+                                                Vector3<real_t>(params.spacing) * real_c(0.5) + params.shift,
                                                 params.spacing);
            it != grid_generator::SCIterator();
            ++it)
diff --git a/apps/benchmarks/GranularGas/PE_LoadBalancing.cpp b/apps/benchmarks/GranularGas/PE_LoadBalancing.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..42eb4181a00245a17f6c3397390fa036e1c80ade
--- /dev/null
+++ b/apps/benchmarks/GranularGas/PE_LoadBalancing.cpp
@@ -0,0 +1,473 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file   PE_LoadBalancing.cpp
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+#include "NodeTimings.h"
+#include "Parameters.h"
+#include "SQLProperties.h"
+
+#include <pe/amr/InfoCollection.h>
+#include <pe/amr/level_determination/MinMaxLevelDetermination.h>
+#include <pe/amr/weight_assignment/MetisAssignmentFunctor.h>
+#include <pe/amr/weight_assignment/WeightAssignmentFunctor.h>
+#include <pe/basic.h>
+#include <pe/synchronization/ClearSynchronization.h>
+#include <pe/vtk/SphereVtkOutput.h>
+
+#include <blockforest/Initialization.h>
+#include <blockforest/loadbalancing/DynamicCurve.h>
+#include <blockforest/loadbalancing/DynamicParMetis.h>
+#include <blockforest/loadbalancing/PODPhantomData.h>
+#include <core/Abort.h>
+#include <core/Environment.h>
+#include <core/math/Random.h>
+#include <core/grid_generator/SCIterator.h>
+#include <core/logging/Logging.h>
+#include <core/OpenMP.h>
+#include <core/timing/TimingTree.h>
+#include <core/waLBerlaBuildInfo.h>
+#include <sqlite/SQLite.h>
+#include <vtk/VTKOutput.h>
+
+#include <functional>
+#include <memory>
+#include <tuple>
+
+namespace walberla {
+using namespace walberla::pe;
+using namespace walberla::timing;
+
+using BodyTuple = std::tuple<Sphere, Plane> ;
+
+int main( int argc, char ** argv )
+{
+   WcTimingTree tt;
+   Environment env(argc, argv);
+
+   logging::Logging::instance()->setStreamLogLevel(logging::Logging::INFO);
+   logging::Logging::instance()->setFileLogLevel(logging::Logging::INFO);
+
+   WALBERLA_LOG_INFO_ON_ROOT( "config file: " << argv[1] )
+         WALBERLA_LOG_INFO_ON_ROOT( "waLBerla Revision: " << WALBERLA_GIT_SHA1 );
+
+   math::seedRandomGenerator( static_cast<unsigned int>(1337 * mpi::MPIManager::instance()->worldRank()) );
+
+   std::map< std::string, walberla::int64_t > integerProperties;
+   std::map< std::string, double >            realProperties;
+   std::map< std::string, std::string >       stringProperties;
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** READING COMMANDLINE ARGUMENTS ***");
+   bool bDEM = false;
+   bool bHCSITS = false;
+
+   bool bNN = false;
+   bool bSO = false;
+
+   bool bInelasticFrictionlessContact = false;
+   bool bApproximateInelasticCoulombContactByDecoupling = false;
+   bool bInelasticCoulombContactByDecoupling = false;
+   bool bInelasticGeneralizedMaximumDissipationContact = false;
+
+   for( int i = 1; i < argc; ++i )
+   {
+      if( std::strcmp( argv[i], "--DEM" ) == 0 ) bDEM = true;
+      if( std::strcmp( argv[i], "--HCSITS" ) == 0 ) bHCSITS = true;
+
+      if( std::strcmp( argv[i], "--syncNextNeighbor" ) == 0 ) bNN = true;
+      if( std::strcmp( argv[i], "--syncShadowOwners" ) == 0 ) bSO = true;
+
+      if( std::strcmp( argv[i], "--InelasticFrictionlessContact" ) == 0 ) bInelasticFrictionlessContact = true;
+      if( std::strcmp( argv[i], "--ApproximateInelasticCoulombContactByDecoupling" ) == 0 ) bApproximateInelasticCoulombContactByDecoupling = true;
+      if( std::strcmp( argv[i], "--InelasticCoulombContactByDecoupling" ) == 0 ) bInelasticCoulombContactByDecoupling = true;
+      if( std::strcmp( argv[i], "--InelasticGeneralizedMaximumDissipationContact" ) == 0 ) bInelasticGeneralizedMaximumDissipationContact = true;
+   }
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** READING CONFIG FILE ***");
+   auto cfg = env.config();
+   if (cfg == nullptr) WALBERLA_ABORT("No config specified!");
+   const Config::BlockHandle mainConf  = cfg->getBlock( "GranularGas" );
+   mesa_pd::Parameters params;
+   loadFromConfig(params, mainConf);
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** GLOBALBODYSTORAGE ***");
+   shared_ptr<BodyStorage> globalBodyStorage = make_shared<BodyStorage>();
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** BLOCKFOREST ***");
+   // create forest
+   shared_ptr< BlockForest > forest = blockforest::createBlockForestFromConfig( mainConf );
+   if (!forest)
+   {
+      WALBERLA_LOG_INFO_ON_ROOT( "No BlockForest created ... exiting!");
+      return EXIT_SUCCESS;
+   }
+
+   forest->recalculateBlockLevelsInRefresh( params.recalculateBlockLevelsInRefresh );
+   forest->alwaysRebalanceInRefresh( params.alwaysRebalanceInRefresh );
+   forest->reevaluateMinTargetLevelsAfterForcedRefinement( params.reevaluateMinTargetLevelsAfterForcedRefinement );
+   forest->allowRefreshChangingDepth( params.allowRefreshChangingDepth );
+
+   forest->allowMultipleRefreshCycles( params.allowMultipleRefreshCycles );
+   forest->checkForEarlyOutInRefresh( params.checkForEarlyOutInRefresh );
+   forest->checkForLateOutInRefresh( params.checkForLateOutInRefresh );
+
+   WALBERLA_LOG_INFO_ON_ROOT("simulationDomain: " << forest->getDomain());
+
+   WALBERLA_LOG_INFO_ON_ROOT("blocks: " << Vector3<uint_t>(forest->getXSize(), forest->getYSize(), forest->getZSize()) );
+
+   auto ic = make_shared<pe::InfoCollection>();
+
+   pe::amr::MinMaxLevelDetermination regrid(ic, params.regridMin, params.regridMax);
+   forest->setRefreshMinTargetLevelDeterminationFunction( regrid );
+
+   bool bRebalance = true;
+   if (params.LBAlgorithm == "None")
+   {
+      bRebalance = false;
+   } else if (params.LBAlgorithm == "Morton")
+   {
+      forest->setRefreshPhantomBlockDataAssignmentFunction( pe::amr::WeightAssignmentFunctor( ic, params.baseWeight ) );
+      forest->setRefreshPhantomBlockDataPackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+      forest->setRefreshPhantomBlockDataUnpackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+
+      auto prepFunc = blockforest::DynamicCurveBalance< pe::amr::WeightAssignmentFunctor::PhantomBlockWeight >( false, true, false );
+      prepFunc.setMaxBlocksPerProcess( params.maxBlocksPerProcess );
+      forest->setRefreshPhantomBlockMigrationPreparationFunction( prepFunc );
+   } else if (params.LBAlgorithm == "Hilbert")
+   {
+      forest->setRefreshPhantomBlockDataAssignmentFunction( pe::amr::WeightAssignmentFunctor( ic, params.baseWeight ) );
+      forest->setRefreshPhantomBlockDataPackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+      forest->setRefreshPhantomBlockDataUnpackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+
+      auto prepFunc = blockforest::DynamicCurveBalance< pe::amr::WeightAssignmentFunctor::PhantomBlockWeight >( true, true, false );
+      prepFunc.setMaxBlocksPerProcess( params.maxBlocksPerProcess );
+      forest->setRefreshPhantomBlockMigrationPreparationFunction( prepFunc );
+   } else if (params.LBAlgorithm == "Metis")
+   {
+      auto assFunc = pe::amr::MetisAssignmentFunctor( ic, params.baseWeight );
+      forest->setRefreshPhantomBlockDataAssignmentFunction( assFunc );
+      forest->setRefreshPhantomBlockDataPackFunction( pe::amr::MetisAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+      forest->setRefreshPhantomBlockDataUnpackFunction( pe::amr::MetisAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+
+      auto alg     = blockforest::DynamicParMetis::stringToAlgorithm(    params.metisAlgorithm );
+      auto vWeight = blockforest::DynamicParMetis::stringToWeightsToUse( params.metisWeightsToUse );
+      auto eWeight = blockforest::DynamicParMetis::stringToEdgeSource(   params.metisEdgeSource );
+
+      auto prepFunc = blockforest::DynamicParMetis( alg, vWeight, eWeight );
+      prepFunc.setipc2redist(params.metisipc2redist);
+      mesa_pd::addParMetisPropertiesToSQL(prepFunc, integerProperties, realProperties, stringProperties);
+      forest->setRefreshPhantomBlockMigrationPreparationFunction( prepFunc );
+   } else if (params.LBAlgorithm == "Diffusive")
+   {
+      forest->setRefreshPhantomBlockDataAssignmentFunction( pe::amr::WeightAssignmentFunctor( ic, params.baseWeight ) );
+      forest->setRefreshPhantomBlockDataPackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+      forest->setRefreshPhantomBlockDataUnpackFunction( pe::amr::WeightAssignmentFunctor::PhantomBlockWeightPackUnpackFunctor() );
+      auto prepFunc = blockforest::DynamicDiffusionBalance< pe::amr::WeightAssignmentFunctor::PhantomBlockWeight >( 1, 1, false );
+      //configure(cfg, prepFunc);
+      //addDynamicDiffusivePropertiesToSQL(prepFunc, integerProperties, realProperties, stringProperties);
+      forest->setRefreshPhantomBlockMigrationPreparationFunction(prepFunc);
+   } else
+   {
+      WALBERLA_ABORT("Unknown LBAlgorithm: " << params.LBAlgorithm);
+   }
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** BODYTUPLE ***");
+   // initialize body type ids
+   SetBodyTypeIDs<BodyTuple>::execute();
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** STORAGEDATAHANDLING ***");
+   // add block data
+   auto storageID           = forest->addBlockData(createStorageDataHandling<BodyTuple>(), "Storage");
+   auto ccdID               = forest->addBlockData(ccd::createHashGridsDataHandling( globalBodyStorage, storageID ), "CCD");
+   auto fcdID               = forest->addBlockData(fcd::createGenericFCDDataHandling<BodyTuple, fcd::AnalyticCollideFunctor>(), "FCD");
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** INTEGRATOR ***");
+   std::unique_ptr<cr::ICR> cr;
+   if (bDEM)
+   {
+      cr = std::make_unique<cr::DEM>(globalBodyStorage, forest, storageID, ccdID, fcdID, &tt);
+      WALBERLA_LOG_INFO_ON_ROOT("Using DEM!");
+   } else if (bHCSITS)
+   {
+      cr = std::make_unique<cr::HCSITS>(globalBodyStorage, forest, storageID, ccdID, fcdID, &tt);
+      configure(mainConf, *static_cast<cr::HCSITS*>(cr.get()));
+      WALBERLA_LOG_INFO_ON_ROOT("Using HCSITS!");
+
+      cr::HCSITS* hcsits = static_cast<cr::HCSITS*>(cr.get());
+
+      if (bInelasticFrictionlessContact)
+      {
+         hcsits->setRelaxationModel(cr::HCSITS::InelasticFrictionlessContact);
+         WALBERLA_LOG_INFO_ON_ROOT("Using InelasticFrictionlessContact!");
+      } else if (bApproximateInelasticCoulombContactByDecoupling)
+      {
+         hcsits->setRelaxationModel(cr::HCSITS::ApproximateInelasticCoulombContactByDecoupling);
+         WALBERLA_LOG_INFO_ON_ROOT("Using ApproximateInelasticCoulombContactByDecoupling!");
+      } else if (bInelasticCoulombContactByDecoupling)
+      {
+         hcsits->setRelaxationModel(cr::HCSITS::InelasticCoulombContactByDecoupling);
+         WALBERLA_LOG_INFO_ON_ROOT("Using InelasticCoulombContactByDecoupling!");
+      } else if (bInelasticGeneralizedMaximumDissipationContact)
+      {
+         hcsits->setRelaxationModel(cr::HCSITS::InelasticGeneralizedMaximumDissipationContact);
+         WALBERLA_LOG_INFO_ON_ROOT("Using InelasticGeneralizedMaximumDissipationContact!");
+      } else
+      {
+         WALBERLA_ABORT("Friction model could not be determined!");
+      }
+   } else
+   {
+      WALBERLA_ABORT("Model could not be determined!");
+   }
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** SYNCCALL ***");
+   std::function<void(void)> syncCallWithoutTT;
+   if (bNN)
+   {
+      syncCallWithoutTT = std::bind( pe::syncNextNeighbors<BodyTuple>, std::ref(*forest), storageID, &tt, real_c(0.1), false );
+      WALBERLA_LOG_INFO_ON_ROOT("Using NextNeighbor sync!");
+   } else if (bSO)
+   {
+      syncCallWithoutTT = std::bind( pe::syncShadowOwners<BodyTuple>, std::ref(*forest), storageID, &tt, real_c(0.1), false );
+      WALBERLA_LOG_INFO_ON_ROOT("Using ShadowOwner sync!");
+   } else
+   {
+      WALBERLA_ABORT("Synchronization method could not be determined!");
+   }
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** VTK ***");
+   auto vtkDomainOutput = vtk::createVTKOutput_DomainDecomposition( forest, "domain_decomposition", 1, params.vtk_out, "simulation_step" );
+   auto vtkSphereHelper = make_shared<SphereVtkOutput>(storageID, *forest) ;
+   auto vtkSphereOutput = vtk::createVTKOutput_PointData(vtkSphereHelper, "Bodies", 1, params.vtk_out, "simulation_step", false, false);
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** SETUP - START ***");
+   //const real_t   static_cof  ( real_c(0.1) / 2 );   // Coefficient of static friction. Note: pe doubles the input coefficient of friction for material-material contacts.
+   //const real_t   dynamic_cof ( static_cof ); // Coefficient of dynamic friction. Similar to static friction for low speed friction.
+   MaterialID     material = createMaterial( "granular", real_t( 1.0 ), 0, 0, 0, real_t( 0.5 ), 1, real_t(1e-6), 0, 0 );
+
+   auto simulationDomain = forest->getDomain();
+   const auto& generationDomain = simulationDomain; // simulationDomain.getExtended(-real_c(0.5) * spacing);
+   int64_t numParticles = 0;
+
+   auto center = forest->getDomain().center();
+   for (auto& currentBlock : *forest)
+   {
+      for (auto it = grid_generator::SCIterator(currentBlock.getAABB().getIntersection(generationDomain),
+                                                Vector3<real_t>(params.spacing) * real_c(0.5) + params.shift,
+                                                params.spacing);
+           it != grid_generator::SCIterator();
+           ++it)
+      {
+         auto tmp = dot( (*it - center), params.normal );
+         if (tmp < 0)
+         {
+            SphereID sp = pe::createSphere( *globalBodyStorage, *forest, storageID, 0, *it, params.radius, material);
+            if (sp != nullptr) ++numParticles;
+         }
+      }
+   }
+   mpi::reduceInplace(numParticles, mpi::SUM);
+   WALBERLA_LOG_INFO_ON_ROOT("#particles created: " << numParticles);
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** SETUP - END ***");
+
+   // synchronize particles
+   syncCallWithoutTT();
+   syncCallWithoutTT();
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** SIMULATION - START ***");
+
+   WcTimer      timerImbalanced;
+   WcTimer      timerLoadBalancing;
+   WcTimer      timerBalanced;
+   WcTimingPool tpImbalanced;
+   WcTimingPool tpBalanced;
+   WALBERLA_MPI_BARRIER();
+   timerImbalanced.start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      if( i % 200 == 0 )
+      {
+         WALBERLA_LOG_DEVEL_ON_ROOT( "Timestep " << i << " / " << params.simulationSteps );
+      }
+
+      tpImbalanced["CR"].start();
+      cr->timestep( real_c(params.dt) );
+      tpImbalanced["CR"].end();
+      tpImbalanced["Sync"].start();
+      syncCallWithoutTT();
+      tpImbalanced["Sync"].end();
+
+      //      if( i % params.visSpacing == 0 )
+      //      {
+      //         vtkDomainOutput->write( );
+      //         vtkSphereOutput->write( );
+      //      }
+   }
+   timerImbalanced.end();
+
+   if (bRebalance)
+   {
+      vtkDomainOutput->write( );
+      vtkSphereOutput->write( );
+      WALBERLA_MPI_BARRIER();
+      timerLoadBalancing.start();
+      WALBERLA_LOG_INFO_ON_ROOT("*** Rebalance ***");
+      createWithNeighborhoodLocalShadow( *forest, storageID, *ic );
+      clearSynchronization( *forest, storageID );
+      forest->refresh();
+      integerProperties["MigrationIterations1"] = int64_c(forest->phantomBlockMigrationIterations());
+      syncNextNeighbors<BodyTuple>(*forest, storageID);
+      for (auto blockIt = forest->begin(); blockIt != forest->end(); ++blockIt)
+      {
+         ccd::ICCD* ccd = blockIt->getData< ccd::ICCD >( ccdID );
+         ccd->reloadBodies();
+      }
+      timerLoadBalancing.end();
+      vtkDomainOutput->write( );
+      vtkSphereOutput->write( );
+   }
+
+   WALBERLA_MPI_BARRIER();
+   timerBalanced.start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      if( i % 200 == 0 )
+      {
+         WALBERLA_LOG_DEVEL_ON_ROOT( "Timestep " << i << " / " << params.simulationSteps );
+      }
+
+      tpBalanced["CR"].start();
+      cr->timestep( real_c(params.dt) );
+      tpBalanced["CR"].end();
+      tpBalanced["Sync"].start();
+      syncCallWithoutTT();
+      tpBalanced["Sync"].end();
+
+      //      if( i % params.visSpacing == 0 )
+      //      {
+      //         vtkDomainOutput->write( );
+      //         vtkSphereOutput->write( );
+      //      }
+   }
+   timerBalanced.end();
+
+   auto timerImbalancedReduced = walberla::timing::getReduced(timerImbalanced, REDUCE_TOTAL, 0);
+   double PUpSImbalanced = 0.0;
+   WALBERLA_ROOT_SECTION()
+   {
+      WALBERLA_LOG_INFO_ON_ROOT("IMBALANCED " << *timerImbalancedReduced);
+      PUpSImbalanced = double_c(numParticles) * double_c(params.simulationSteps) / double_c(timerImbalancedReduced->max());
+      WALBERLA_LOG_INFO_ON_ROOT("PUpS: " << PUpSImbalanced);
+   }
+
+   auto timerBalancedReduced = walberla::timing::getReduced(timerBalanced, REDUCE_TOTAL, 0);
+   double PUpSBalanced = 0.0;
+   WALBERLA_ROOT_SECTION()
+   {
+      WALBERLA_LOG_INFO_ON_ROOT("BALANCED " << *timerBalancedReduced);
+      PUpSBalanced = double_c(numParticles) * double_c(params.simulationSteps) / double_c(timerBalancedReduced->max());
+      WALBERLA_LOG_INFO_ON_ROOT("PUpS: " << PUpSBalanced);
+   }
+
+   auto timerLoadBalancingReduced = walberla::timing::getReduced(timerLoadBalancing, REDUCE_TOTAL, 0);
+
+   auto tpImbalancedReduced = tpImbalanced.getReduced();
+   WALBERLA_LOG_INFO_ON_ROOT(*tpImbalancedReduced);
+
+   auto tpBalancedReduced = tpBalanced.getReduced();
+   WALBERLA_LOG_INFO_ON_ROOT(*tpBalancedReduced);
+   WALBERLA_LOG_INFO_ON_ROOT("*** SIMULATION - END ***");
+
+   auto temp = tt.getReduced( );
+   WALBERLA_ROOT_SECTION()
+   {
+      std::cout << temp;
+   }
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** CHECKING RESULT - START ***");
+   numParticles = 0;
+   int64_t numGhostParticles = 0;
+   for (auto& currentBlock : *forest)
+   {
+      Storage * storage = currentBlock.getData< Storage >( storageID );
+      BodyStorage& localStorage = (*storage)[0];
+      BodyStorage& shadowStorage = (*storage)[1];
+      numParticles += localStorage.size();
+      numGhostParticles += shadowStorage.size();
+   }
+   auto minParticles = mpi::reduce(numParticles, mpi::MIN);
+   auto maxParticles = mpi::reduce(numParticles, mpi::MAX);
+   WALBERLA_LOG_DEVEL_ON_ROOT("particle ratio: " << minParticles << " / " << maxParticles);
+
+   mpi::reduceInplace(numParticles, mpi::SUM);
+   mpi::reduceInplace(numGhostParticles, mpi::SUM);
+   WALBERLA_LOG_INFO_ON_ROOT("*** CHECKING RESULT - END ***");
+
+   uint_t runId = uint_c(-1);
+   WALBERLA_ROOT_SECTION()
+   {
+      stringProperties["walberla_git"]         = WALBERLA_GIT_SHA1;
+      stringProperties["tag"]                  = "pe";
+      integerProperties["bDEM"]                = bDEM;
+      integerProperties["bNN"]                 = bNN;
+      integerProperties["mpi_num_processes"]   = mpi::MPIManager::instance()->numProcesses();
+      integerProperties["omp_max_threads"]     = omp_get_max_threads();
+      realProperties["imbalanced_PUpS"]          = double_c(PUpSImbalanced);
+      realProperties["imbalanced_timer_min"]     = timerImbalancedReduced->min();
+      realProperties["imbalanced_timer_max"]     = timerImbalancedReduced->max();
+      realProperties["imbalanced_timer_average"] = timerImbalancedReduced->average();
+      realProperties["imbalanced_timer_total"]   = timerImbalancedReduced->total();
+      realProperties["loadbalancing_timer_min"]     = timerLoadBalancingReduced->min();
+      realProperties["loadbalancing_timer_max"]     = timerLoadBalancingReduced->max();
+      realProperties["loadbalancing_timer_average"] = timerLoadBalancingReduced->average();
+      realProperties["loadbalancing_timer_total"]   = timerLoadBalancingReduced->total();
+      realProperties["balanced_PUpS"]          = double_c(PUpSBalanced);
+      realProperties["balanced_timer_min"]     = timerBalancedReduced->min();
+      realProperties["balanced_timer_max"]     = timerBalancedReduced->max();
+      realProperties["balanced_timer_average"] = timerBalancedReduced->average();
+      realProperties["balanced_timer_total"]   = timerBalancedReduced->total();
+      integerProperties["num_particles"]       = numParticles;
+      integerProperties["num_ghost_particles"] = numGhostParticles;
+      integerProperties["minParticles"]        = minParticles;
+      integerProperties["maxParticles"]        = maxParticles;
+
+      mesa_pd::addBuildInfoToSQL( integerProperties, realProperties, stringProperties );
+      saveToSQL(params, integerProperties, realProperties, stringProperties );
+      mesa_pd::addDomainPropertiesToSQL(*forest, integerProperties, realProperties, stringProperties);
+      mesa_pd::addSlurmPropertiesToSQL(integerProperties, realProperties, stringProperties);
+
+      runId = sqlite::storeRunInSqliteDB( params.sqlFile, integerProperties, stringProperties, realProperties );
+      sqlite::storeTimingPoolInSqliteDB( params.sqlFile, runId, *tpImbalancedReduced, "imbalanced" );
+      sqlite::storeTimingPoolInSqliteDB( params.sqlFile, runId, *tpImbalancedReduced, "balanced" );
+   }
+   if (params.storeNodeTimings)
+   {
+      mesa_pd::storeNodeTimings(runId, params.sqlFile, "NodeTimingImbalanced", tpImbalanced);
+      mesa_pd::storeNodeTimings(runId, params.sqlFile, "NodeTimingBalanced", tpBalanced);
+   }
+   WALBERLA_LOG_INFO_ON_ROOT("*** SQL OUTPUT - END ***");
+
+   return EXIT_SUCCESS;
+}
+} // namespace walberla
+
+int main( int argc, char* argv[] )
+{
+   return walberla::main( argc, argv );
+}
diff --git a/apps/benchmarks/GranularGas/Parameters.cpp b/apps/benchmarks/GranularGas/Parameters.cpp
index 2080935eab627936e9888afd8e2ac948fcfaf403..9b9e904ca5744ee472ee91fed5ea4b8f3a94de2a 100644
--- a/apps/benchmarks/GranularGas/Parameters.cpp
+++ b/apps/benchmarks/GranularGas/Parameters.cpp
@@ -29,15 +29,22 @@
 #include <core/logging/Logging.h>
 
 namespace walberla {
+namespace mesa_pd {
 
 void loadFromConfig(Parameters& params, const Config::BlockHandle& cfg)
 {
    params.sorting = cfg.getParameter<std::string>("sorting", "none" );
    WALBERLA_LOG_INFO_ON_ROOT("sorting: " << params.sorting);
    
+   params.normal = cfg.getParameter<Vec3>("normal", Vec3(real_t(1.0), real_t(1.0), real_t(1.0)) );
+   WALBERLA_LOG_INFO_ON_ROOT("normal: " << params.normal);
+   
    params.spacing = cfg.getParameter<real_t>("spacing", real_t(1.0) );
    WALBERLA_LOG_INFO_ON_ROOT("spacing: " << params.spacing);
    
+   params.shift = cfg.getParameter<Vec3>("shift", Vec3(real_t(0.1), real_t(0.1), real_t(0.1)) );
+   WALBERLA_LOG_INFO_ON_ROOT("shift: " << params.shift);
+   
    params.radius = cfg.getParameter<real_t>("radius", real_t(0.5) );
    WALBERLA_LOG_INFO_ON_ROOT("radius: " << params.radius);
    
@@ -65,12 +72,60 @@ void loadFromConfig(Parameters& params, const Config::BlockHandle& cfg)
    params.visSpacing = cfg.getParameter<int64_t>("visSpacing", 1000 );
    WALBERLA_LOG_INFO_ON_ROOT("visSpacing: " << params.visSpacing);
    
-   params.path = cfg.getParameter<std::string>("path", "vtk_out" );
-   WALBERLA_LOG_INFO_ON_ROOT("path: " << params.path);
+   params.vtk_out = cfg.getParameter<std::string>("vtk_out", "vtk_out" );
+   WALBERLA_LOG_INFO_ON_ROOT("vtk_out: " << params.vtk_out);
    
    params.sqlFile = cfg.getParameter<std::string>("sqlFile", "benchmark.sqlite" );
    WALBERLA_LOG_INFO_ON_ROOT("sqlFile: " << params.sqlFile);
    
+   params.recalculateBlockLevelsInRefresh = cfg.getParameter<bool>("recalculateBlockLevelsInRefresh", false );
+   WALBERLA_LOG_INFO_ON_ROOT("recalculateBlockLevelsInRefresh: " << params.recalculateBlockLevelsInRefresh);
+   
+   params.alwaysRebalanceInRefresh = cfg.getParameter<bool>("alwaysRebalanceInRefresh", true );
+   WALBERLA_LOG_INFO_ON_ROOT("alwaysRebalanceInRefresh: " << params.alwaysRebalanceInRefresh);
+   
+   params.reevaluateMinTargetLevelsAfterForcedRefinement = cfg.getParameter<bool>("reevaluateMinTargetLevelsAfterForcedRefinement", false );
+   WALBERLA_LOG_INFO_ON_ROOT("reevaluateMinTargetLevelsAfterForcedRefinement: " << params.reevaluateMinTargetLevelsAfterForcedRefinement);
+   
+   params.allowRefreshChangingDepth = cfg.getParameter<bool>("allowRefreshChangingDepth", false );
+   WALBERLA_LOG_INFO_ON_ROOT("allowRefreshChangingDepth: " << params.allowRefreshChangingDepth);
+   
+   params.allowMultipleRefreshCycles = cfg.getParameter<bool>("allowMultipleRefreshCycles", false );
+   WALBERLA_LOG_INFO_ON_ROOT("allowMultipleRefreshCycles: " << params.allowMultipleRefreshCycles);
+   
+   params.checkForEarlyOutInRefresh = cfg.getParameter<bool>("checkForEarlyOutInRefresh", true );
+   WALBERLA_LOG_INFO_ON_ROOT("checkForEarlyOutInRefresh: " << params.checkForEarlyOutInRefresh);
+   
+   params.checkForLateOutInRefresh = cfg.getParameter<bool>("checkForLateOutInRefresh", true );
+   WALBERLA_LOG_INFO_ON_ROOT("checkForLateOutInRefresh: " << params.checkForLateOutInRefresh);
+   
+   params.regridMin = cfg.getParameter<uint_t>("regridMin", uint_c(100) );
+   WALBERLA_LOG_INFO_ON_ROOT("regridMin: " << params.regridMin);
+   
+   params.regridMax = cfg.getParameter<uint_t>("regridMax", uint_c(1000) );
+   WALBERLA_LOG_INFO_ON_ROOT("regridMax: " << params.regridMax);
+   
+   params.maxBlocksPerProcess = cfg.getParameter<int>("maxBlocksPerProcess", int_c(1000) );
+   WALBERLA_LOG_INFO_ON_ROOT("maxBlocksPerProcess: " << params.maxBlocksPerProcess);
+   
+   params.baseWeight = cfg.getParameter<real_t>("baseWeight", real_t(10.0) );
+   WALBERLA_LOG_INFO_ON_ROOT("baseWeight: " << params.baseWeight);
+   
+   params.metisipc2redist = cfg.getParameter<real_t>("metisipc2redist", real_t(1000.0) );
+   WALBERLA_LOG_INFO_ON_ROOT("metisipc2redist: " << params.metisipc2redist);
+   
+   params.LBAlgorithm = cfg.getParameter<std::string>("LBAlgorithm", "Hilbert" );
+   WALBERLA_LOG_INFO_ON_ROOT("LBAlgorithm: " << params.LBAlgorithm);
+   
+   params.metisAlgorithm = cfg.getParameter<std::string>("metisAlgorithm", "PART_GEOM_KWAY" );
+   WALBERLA_LOG_INFO_ON_ROOT("metisAlgorithm: " << params.metisAlgorithm);
+   
+   params.metisWeightsToUse = cfg.getParameter<std::string>("metisWeightsToUse", "BOTH_WEIGHTS" );
+   WALBERLA_LOG_INFO_ON_ROOT("metisWeightsToUse: " << params.metisWeightsToUse);
+   
+   params.metisEdgeSource = cfg.getParameter<std::string>("metisEdgeSource", "EDGES_FROM_EDGE_WEIGHTS" );
+   WALBERLA_LOG_INFO_ON_ROOT("metisEdgeSource: " << params.metisEdgeSource);
+   
 }
 
 void saveToSQL(const Parameters& params,
@@ -80,8 +135,10 @@ void saveToSQL(const Parameters& params,
 {
    stringProperties["sorting"] = params.sorting;
    
+   
    realProperties["spacing"] = double_c(params.spacing);
    
+   
    realProperties["radius"] = double_c(params.radius);
    
    
@@ -97,10 +154,33 @@ void saveToSQL(const Parameters& params,
    
    integerProperties["visSpacing"] = params.visSpacing;
    
-   stringProperties["path"] = params.path;
+   stringProperties["vtk_out"] = params.vtk_out;
    
    stringProperties["sqlFile"] = params.sqlFile;
    
+   
+   
+   
+   
+   
+   
+   
+   
+   
+   
+   realProperties["baseWeight"] = double_c(params.baseWeight);
+   
+   realProperties["metisipc2redist"] = double_c(params.metisipc2redist);
+   
+   stringProperties["LBAlgorithm"] = params.LBAlgorithm;
+   
+   stringProperties["metisAlgorithm"] = params.metisAlgorithm;
+   
+   stringProperties["metisWeightsToUse"] = params.metisWeightsToUse;
+   
+   stringProperties["metisEdgeSource"] = params.metisEdgeSource;
+   
 }
 
+} //namespace mesa_pd
 } //namespace walberla
\ No newline at end of file
diff --git a/apps/benchmarks/GranularGas/Parameters.h b/apps/benchmarks/GranularGas/Parameters.h
index caae193de89e1a063e13dd16e3f8534d71fa3e12..263b1ff5a8732d8babc110ceb7268b613260edae 100644
--- a/apps/benchmarks/GranularGas/Parameters.h
+++ b/apps/benchmarks/GranularGas/Parameters.h
@@ -27,16 +27,19 @@
 #pragma once
 
 #include <core/config/Config.h>
-#include <core/DataTypes.h>
+#include <mesa_pd/data/DataTypes.h>
 
 #include <string>
 
 namespace walberla {
+namespace mesa_pd {
 
 struct Parameters
 {
    std::string sorting = "none";
+   Vec3 normal = Vec3(real_t(1.0), real_t(1.0), real_t(1.0));
    real_t spacing = real_t(1.0);
+   Vec3 shift = Vec3(real_t(0.1), real_t(0.1), real_t(0.1));
    real_t radius = real_t(0.5);
    bool bBarrier = false;
    bool storeNodeTimings = false;
@@ -46,8 +49,24 @@ struct Parameters
    int64_t simulationSteps = 10;
    real_t dt = real_t(0.01);
    int64_t visSpacing = 1000;
-   std::string path = "vtk_out";
+   std::string vtk_out = "vtk_out";
    std::string sqlFile = "benchmark.sqlite";
+   bool recalculateBlockLevelsInRefresh = false;
+   bool alwaysRebalanceInRefresh = true;
+   bool reevaluateMinTargetLevelsAfterForcedRefinement = false;
+   bool allowRefreshChangingDepth = false;
+   bool allowMultipleRefreshCycles = false;
+   bool checkForEarlyOutInRefresh = true;
+   bool checkForLateOutInRefresh = true;
+   uint_t regridMin = uint_c(100);
+   uint_t regridMax = uint_c(1000);
+   int maxBlocksPerProcess = int_c(1000);
+   real_t baseWeight = real_t(10.0);
+   real_t metisipc2redist = real_t(1000.0);
+   std::string LBAlgorithm = "Hilbert";
+   std::string metisAlgorithm = "PART_GEOM_KWAY";
+   std::string metisWeightsToUse = "BOTH_WEIGHTS";
+   std::string metisEdgeSource = "EDGES_FROM_EDGE_WEIGHTS";
 };
 
 void loadFromConfig(Parameters& params,
@@ -58,4 +77,5 @@ void saveToSQL(const Parameters& params,
                std::map< std::string, double >&            realProperties,
                std::map< std::string, std::string >&       stringProperties );
 
+} //namespace mesa_pd
 } //namespace walberla
\ No newline at end of file
diff --git a/apps/benchmarks/GranularGas/Parameters.templ.cpp b/apps/benchmarks/GranularGas/Parameters.templ.cpp
index 8f9be98116d57a52f6d9b9ea200e6ee0882d36f6..c8aae96b39b6587ca89328685ae2d81250ac0095 100644
--- a/apps/benchmarks/GranularGas/Parameters.templ.cpp
+++ b/apps/benchmarks/GranularGas/Parameters.templ.cpp
@@ -29,6 +29,7 @@
 #include <core/logging/Logging.h>
 
 namespace walberla {
+namespace mesa_pd {
 
 void loadFromConfig(Parameters& params, const Config::BlockHandle& cfg)
 {
@@ -56,4 +57,5 @@ void saveToSQL(const Parameters& params,
    {% endfor %}
 }
 
+} //namespace mesa_pd
 } //namespace walberla
diff --git a/apps/benchmarks/GranularGas/Parameters.templ.h b/apps/benchmarks/GranularGas/Parameters.templ.h
index 29b18935ca1b3ffe708826b141c2cc170ab4c321..6f02004127219d02cf5a9105e23a9986b7711494 100644
--- a/apps/benchmarks/GranularGas/Parameters.templ.h
+++ b/apps/benchmarks/GranularGas/Parameters.templ.h
@@ -27,11 +27,12 @@
 #pragma once
 
 #include <core/config/Config.h>
-#include <core/DataTypes.h>
+#include <mesa_pd/data/DataTypes.h>
 
 #include <string>
 
 namespace walberla {
+namespace mesa_pd {
 
 struct Parameters
 {
@@ -48,4 +49,5 @@ void saveToSQL(const Parameters& params,
                std::map< std::string, double >&            realProperties,
                std::map< std::string, std::string >&       stringProperties );
 
+} //namespace mesa_pd
 } //namespace walberla
diff --git a/apps/benchmarks/GranularGas/SQLProperties.cpp b/apps/benchmarks/GranularGas/SQLProperties.cpp
index 99e82194d9ac188ae693c8998ea77ef99a5b5396..2147f259e5a7d47e382f7ee4127a458a110096b4 100644
--- a/apps/benchmarks/GranularGas/SQLProperties.cpp
+++ b/apps/benchmarks/GranularGas/SQLProperties.cpp
@@ -108,6 +108,7 @@ void addSlurmPropertiesToSQL( std::map< std::string, int64_t > &        /*intege
    stringProperties["SLURM_NTASKS_PER_CORE"]    = envToString(std::getenv( "SLURM_NTASKS_PER_CORE" ));
    stringProperties["SLURM_NTASKS_PER_NODE"]    = envToString(std::getenv( "SLURM_NTASKS_PER_NODE" ));
    stringProperties["SLURM_NTASKS_PER_SOCKET"]  = envToString(std::getenv( "SLURM_NTASKS_PER_SOCKET" ));
+   stringProperties["SLURM_CPU_BIND_TYPE"]      = envToString(std::getenv( "SLURM_CPU_BIND_TYPE" ));
 }
 
 } //namespace mesa_pd
diff --git a/apps/benchmarks/GranularGas/generateConfig.py b/apps/benchmarks/GranularGas/generateConfig.py
index fe13e1768f08f015d151c6e6d41068dc65d8435e..195534574c85a9fee6efe988b8dab8b0f77a7fb1 100755
--- a/apps/benchmarks/GranularGas/generateConfig.py
+++ b/apps/benchmarks/GranularGas/generateConfig.py
@@ -5,7 +5,9 @@ from ConfigGenerator import Config
 
 cfg = Config()
 cfg.addParameter("sorting",                "std::string", '"none"')
+cfg.addParameter("normal",                 "Vec3",        "Vec3(real_t(1.0), real_t(1.0), real_t(1.0))")
 cfg.addParameter("spacing",                "real_t",      "real_t(1.0)")
+cfg.addParameter("shift",                  "Vec3",        "Vec3(real_t(0.1), real_t(0.1), real_t(0.1))")
 cfg.addParameter("radius",                 "real_t",      "real_t(0.5)")
 cfg.addParameter("bBarrier",               "bool",        "false")
 cfg.addParameter("storeNodeTimings",       "bool",        "false")
@@ -15,7 +17,26 @@ cfg.addParameter("initialRefinementLevel", "int64_t",     "0")
 cfg.addParameter("simulationSteps",        "int64_t",     "10")
 cfg.addParameter("dt",                     "real_t",      "real_t(0.01)")
 cfg.addParameter("visSpacing",             "int64_t",     "1000")
-cfg.addParameter("path",                   "std::string", '"vtk_out"')
+cfg.addParameter("vtk_out",                "std::string", '"vtk_out"')
 cfg.addParameter("sqlFile",                "std::string", '"benchmark.sqlite"')
 
+cfg.addParameter("recalculateBlockLevelsInRefresh",                "bool", "false");
+cfg.addParameter("alwaysRebalanceInRefresh",                       "bool", "true");
+cfg.addParameter("reevaluateMinTargetLevelsAfterForcedRefinement", "bool", "false");
+cfg.addParameter("allowRefreshChangingDepth",                      "bool", "false");
+
+cfg.addParameter("allowMultipleRefreshCycles",                     "bool", "false");
+cfg.addParameter("checkForEarlyOutInRefresh",                      "bool", "true");
+cfg.addParameter("checkForLateOutInRefresh",                       "bool", "true");
+
+cfg.addParameter("regridMin",              "uint_t",      'uint_c(100)')
+cfg.addParameter("regridMax",              "uint_t",      'uint_c(1000)')
+cfg.addParameter("maxBlocksPerProcess",    "int",         'int_c(1000)')
+cfg.addParameter("baseWeight",             "real_t",      'real_t(10.0)')
+cfg.addParameter("metisipc2redist",        "real_t",      'real_t(1000.0)')
+cfg.addParameter("LBAlgorithm",            "std::string", '"Hilbert"')
+cfg.addParameter("metisAlgorithm",         "std::string", '"PART_GEOM_KWAY"' );
+cfg.addParameter("metisWeightsToUse",      "std::string", '"BOTH_WEIGHTS"' );
+cfg.addParameter("metisEdgeSource",        "std::string", '"EDGES_FROM_EDGE_WEIGHTS"' );
+
 cfg.generate()
diff --git a/python/mesa_pd.py b/python/mesa_pd.py
index 7e807749289873c6d30073805d73b8c32f86f6d5..0ba17be6b76d5f60c39471b924b2fe6c95411471 100755
--- a/python/mesa_pd.py
+++ b/python/mesa_pd.py
@@ -32,6 +32,7 @@ if __name__ == '__main__':
    ps    = data.ParticleStorage()
    ch    = data.ContactHistory()
    lc    = data.LinkedCells()
+   slc   = data.SparseLinkedCells()
    ss    = data.ShapeStorage(ps, shapes)
    cs    = data.ContactStorage()
 
@@ -47,6 +48,9 @@ if __name__ == '__main__':
    ps.addProperty("torque",           "walberla::mesa_pd::Vec3", defValue="real_t(0)", syncMode="NEVER")
    ps.addProperty("oldTorque",        "walberla::mesa_pd::Vec3", defValue="real_t(0)", syncMode="MIGRATION")
 
+   ps.addInclude("blockforest/BlockForest.h")
+   ps.addProperty("currentBlock",     "blockforest::Block*",     defValue="nullptr",   syncMode="NEVER")
+
    ps.addProperty("type",             "uint_t",                  defValue="0",         syncMode="COPY")
 
    ps.addProperty("flags",            "walberla::mesa_pd::data::particle_flags::FlagT", defValue="", syncMode="COPY")
@@ -96,6 +100,7 @@ if __name__ == '__main__':
    kernels.append( kernel.InitContactsForHCSITS() )
    kernels.append( kernel.IntegrateParticlesHCSITS() )
    kernels.append( kernel.InsertParticleIntoLinkedCells() )
+   kernels.append( kernel.InsertParticleIntoSparseLinkedCells() )
    kernels.append( kernel.LinearSpringDashpot() )
    kernels.append( kernel.NonLinearSpringDashpot() )
    kernels.append( kernel.SingleCast(shapes) )
@@ -122,6 +127,7 @@ if __name__ == '__main__':
    ps.generate(args.path + "/src/mesa_pd/")
    ch.generate(args.path + "/src/mesa_pd/")
    lc.generate(args.path + "/src/mesa_pd/")
+   slc.generate(args.path + "/src/mesa_pd/")
    ss.generate(args.path + "/src/mesa_pd/")
    cs.generate(args.path + "/src/mesa_pd/")
 
diff --git a/python/mesa_pd/data/SparseLinkedCells.py b/python/mesa_pd/data/SparseLinkedCells.py
new file mode 100644
index 0000000000000000000000000000000000000000..516acfd82d4ae94474604f238fdd7fcd13ab4052
--- /dev/null
+++ b/python/mesa_pd/data/SparseLinkedCells.py
@@ -0,0 +1,8 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+from ..utility import generateFile
+
+class SparseLinkedCells:
+   def generate(self, path):
+      generateFile(path, 'data/SparseLinkedCells.templ.h')
diff --git a/python/mesa_pd/data/__init__.py b/python/mesa_pd/data/__init__.py
index a8b06695767be4d34ea93c3c1d9797b8a8798d24..d09dc6e57c8eeae41b4462b161eca662667f9fea 100644
--- a/python/mesa_pd/data/__init__.py
+++ b/python/mesa_pd/data/__init__.py
@@ -5,9 +5,11 @@ from .ContactStorage import ContactStorage
 from .LinkedCells import LinkedCells
 from .ParticleStorage import ParticleStorage
 from .ShapeStorage import ShapeStorage
+from .SparseLinkedCells import SparseLinkedCells
 
 __all__ = ['ContactHistory',
            'ContactStorage',
            'GeometryStorage',
            'LinkedCells',
-           'ParticleStorage']
+           'ParticleStorage',
+           'SparseLinkedCells']
diff --git a/python/mesa_pd/kernel/InsertParticleIntoSparseLinkedCells.py b/python/mesa_pd/kernel/InsertParticleIntoSparseLinkedCells.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bb841b01e00b688025030b4b84600f1bff2ffcd
--- /dev/null
+++ b/python/mesa_pd/kernel/InsertParticleIntoSparseLinkedCells.py
@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+
+from mesa_pd.accessor import Accessor
+from mesa_pd.utility import generateFile
+
+class InsertParticleIntoSparseLinkedCells:
+   def __init__(self):
+      self.accessor = Accessor()
+      self.accessor.require("position",     "walberla::mesa_pd::Vec3",                        access="g")
+      self.accessor.require("flags",        "walberla::mesa_pd::data::particle_flags::FlagT", access="g")
+      self.accessor.require("nextParticle", "size_t",                                     access="gs" )
+
+   def getRequirements(self):
+      return self.accessor
+
+   def generate(self, path):
+      context = dict()
+      context["interface"]        = self.accessor.properties
+      generateFile(path, 'kernel/InsertParticleIntoSparseLinkedCells.templ.h', context)
diff --git a/python/mesa_pd/kernel/__init__.py b/python/mesa_pd/kernel/__init__.py
index 28d7b5a1bc72c401ce59cabc3338fda64b1d3a91..eef9907a547ca08e93d35a26eed6d89ef005580b 100644
--- a/python/mesa_pd/kernel/__init__.py
+++ b/python/mesa_pd/kernel/__init__.py
@@ -10,6 +10,7 @@ from .InitParticlesForHCSITS import InitParticlesForHCSITS
 from .InitContactsForHCSITS import InitContactsForHCSITS
 from .IntegrateParticlesHCSITS import IntegrateParticlesHCSITS
 from .InsertParticleIntoLinkedCells import InsertParticleIntoLinkedCells
+from .InsertParticleIntoSparseLinkedCells import InsertParticleIntoSparseLinkedCells
 from .LinearSpringDashpot import LinearSpringDashpot
 from .NonLinearSpringDashpot import NonLinearSpringDashpot
 from .SingleCast import SingleCast
@@ -29,6 +30,7 @@ __all__ = ['DoubleCast',
            'InitContactsForHCSITS',
            'IntegrateParticlesHCSITS',
            'InsertParticleIntoLinkedCells',
+           'InsertParticleIntoSparseLinkedCells',
            'LinearSpringDashpot',
            'NonLinearSpringDashpot',
            'SingleCast',
diff --git a/python/mesa_pd/templates/data/LinkedCells.templ.h b/python/mesa_pd/templates/data/LinkedCells.templ.h
index c5107a6ef8659f1787a3d5fa6991908f43148fea..3cd762e5e6ea49ca7c4d33e99243a5dcd63c17b3 100644
--- a/python/mesa_pd/templates/data/LinkedCells.templ.h
+++ b/python/mesa_pd/templates/data/LinkedCells.templ.h
@@ -93,7 +93,10 @@ struct LinkedCells
 };
 
 inline
-math::AABB getCellAABB(const LinkedCells& ll, const int hash0, const int hash1, const int hash2)
+math::AABB getCellAABB(const LinkedCells& ll,
+                       const int64_t hash0,
+                       const int64_t hash1,
+                       const int64_t hash2)
 {
    {%- for dim in range(3) %}
    WALBERLA_ASSERT_GREATER_EQUAL(hash{{dim}}, 0);
@@ -110,13 +113,35 @@ math::AABB getCellAABB(const LinkedCells& ll, const int hash0, const int hash1,
 }
 
 inline
-int getCellIdx(const LinkedCells& ll, const int hash0, const int hash1, const int hash2)
+uint_t getCellIdx(const LinkedCells& ll,
+                  const int64_t hash0,
+                  const int64_t hash1,
+                  const int64_t hash2)
 {
    {%- for dim in range(3) %}
    WALBERLA_ASSERT_GREATER_EQUAL(hash{{dim}}, 0);
    WALBERLA_ASSERT_LESS(hash{{dim}}, ll.numCellsPerDim_[{{dim}}]);
    {%- endfor %}
-   return hash2 * ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0] + hash1 * ll.numCellsPerDim_[0] + hash0;
+   return uint_c(hash2 * ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0] + hash1 * ll.numCellsPerDim_[0] + hash0);
+}
+
+inline
+void getCellCoordinates(const LinkedCells& ll,
+                        const uint64_t idx,
+                        int64_t& hash0,
+                        int64_t& hash1,
+                        int64_t& hash2)
+{
+   hash2 = int64_c(idx) / (ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0]);
+   hash1 = (int64_c(idx) - (hash2 * ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0])) / (ll.numCellsPerDim_[0]);
+   hash0 = int64_c(idx) - hash2 * ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0] - hash1 * ll.numCellsPerDim_[0];
+
+   WALBERLA_ASSERT_GREATER_EQUAL(hash0, 0);
+   WALBERLA_ASSERT_LESS(hash0, ll.numCellsPerDim_[0]);
+   WALBERLA_ASSERT_GREATER_EQUAL(hash1, 0);
+   WALBERLA_ASSERT_LESS(hash1, ll.numCellsPerDim_[1]);
+   WALBERLA_ASSERT_GREATER_EQUAL(hash2, 0);
+   WALBERLA_ASSERT_LESS(hash2, ll.numCellsPerDim_[2]);
 }
 
 inline
@@ -143,15 +168,17 @@ LinkedCells::LinkedCells(const math::AABB& domain, const Vec3& cellDiameter)
 
    WALBERLA_CHECK_GREATER_EQUAL(numCellsPerDim_[{{dim}}], 0);
    {%- endfor %}
+
+   std::fill(cells_.begin(), cells_.end(), -1);
 }
 
 void LinkedCells::clear()
 {
    const uint64_t cellsSize = cells_.size();
    //clear existing linked cells
-   #ifdef _OPENMP
-   #pragma omp parallel for schedule(static)
-   #endif
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static)
+#endif
    for (int64_t i = 0; i < int64_c(cellsSize); ++i)
       cells_[uint64_c(i)] = -1;
    infiniteParticles_ = -1;
@@ -172,8 +199,8 @@ inline void LinkedCells::forEachParticlePair{%- if half %}Half{%- endif %}(const
       {
          for (int x = 0; x < numCellsPerDim_[0]; ++x)
          {
-            const int cell_idx = getCellIdx(*this, x, y, z); ///< current cell index
-            int p_idx = cells_[uint_c(cell_idx)]; ///< current particle index
+            const uint_t cell_idx = getCellIdx(*this, x, y, z); ///< current cell index
+            int p_idx = cells_[cell_idx]; ///< current particle index
             int np_idx = -1; ///< particle to be checked against
 
             while (p_idx != -1)
@@ -212,11 +239,11 @@ inline void LinkedCells::forEachParticlePair{%- if half %}Half{%- endif %}(const
                   if (ny >= numCellsPerDim_[1]) continue;
                   if (nz >= numCellsPerDim_[2]) continue;
 
-                  const int ncell_idx = getCellIdx(*this, nx, ny, nz); ///< neighbor cell index
+                  const uint_t ncell_idx = getCellIdx(*this, nx, ny, nz); ///< neighbor cell index
 
                   WALBERLA_ASSERT_GREATER_EQUAL(p_idx, 0);
                   WALBERLA_ASSERT_LESS(p_idx, acForLC.size());
-                  np_idx = cells_[uint_c(ncell_idx)]; ///< neighbor particle index
+                  np_idx = cells_[ncell_idx]; ///< neighbor particle index
                   while (np_idx != -1)
                   {
                      WALBERLA_ASSERT_GREATER_EQUAL(np_idx, 0);
diff --git a/python/mesa_pd/templates/data/ParticleAccessor.templ.h b/python/mesa_pd/templates/data/ParticleAccessor.templ.h
index 387824fbee9f3a4726eb1dc048cbd5d4c4079b90..7372641f421672b9da01482024ba029d8f75b3ba 100644
--- a/python/mesa_pd/templates/data/ParticleAccessor.templ.h
+++ b/python/mesa_pd/templates/data/ParticleAccessor.templ.h
@@ -50,9 +50,9 @@ public:
    virtual ~ParticleAccessor() = default;
 
    {%- for prop in properties %}
-   const {{prop.type}}& get{{prop.name | capFirst}}(const size_t p_idx) const {return ps_->get{{prop.name | capFirst}}(p_idx);}
+   {{prop.type}} const & get{{prop.name | capFirst}}(const size_t p_idx) const {return ps_->get{{prop.name | capFirst}}(p_idx);}
    {{prop.type}}& get{{prop.name | capFirst}}Ref(const size_t p_idx) {return ps_->get{{prop.name | capFirst}}Ref(p_idx);}
-   void set{{prop.name | capFirst}}(const size_t p_idx, const {{prop.type}}& v) { ps_->set{{prop.name | capFirst}}(p_idx, v);}
+   void set{{prop.name | capFirst}}(const size_t p_idx, {{prop.type}} const & v) { ps_->set{{prop.name | capFirst}}(p_idx, v);}
    {% endfor %}
 
    id_t getInvalidUid() const {return UniqueID<data::Particle>::invalidID();}
@@ -101,8 +101,8 @@ public:
    virtual ~SingleParticleAccessor() = default;
 
    {%- for prop in properties %}
-   const {{prop.type}}& get{{prop.name | capFirst}}(const size_t /*p_idx*/) const {return {{prop.name}}_;}
-   void set{{prop.name | capFirst}}(const size_t /*p_idx*/, const {{prop.type}}& v) { {{prop.name}}_ = v;}
+   {{prop.type}} const & get{{prop.name | capFirst}}(const size_t /*p_idx*/) const {return {{prop.name}}_;}
+   void set{{prop.name | capFirst}}(const size_t /*p_idx*/, {{prop.type}} const & v) { {{prop.name}}_ = v;}
    {{prop.type}}& get{{prop.name | capFirst}}Ref(const size_t /*p_idx*/) {return {{prop.name}}_;}
    {% endfor %}
 
diff --git a/python/mesa_pd/templates/data/ParticleStorage.templ.h b/python/mesa_pd/templates/data/ParticleStorage.templ.h
index 21ecee6d700d1af75a396720039bef9c385a03b4..72b309e241404bf9b7bc5e14c195ddf527122df2 100644
--- a/python/mesa_pd/templates/data/ParticleStorage.templ.h
+++ b/python/mesa_pd/templates/data/ParticleStorage.templ.h
@@ -76,9 +76,9 @@ public:
       {%- endfor %}
 
       {% for prop in properties %}
-      const {{prop.name}}_type& get{{prop.name | capFirst}}() const {return storage_.get{{prop.name | capFirst}}(i_);}
+      {{prop.name}}_type const & get{{prop.name | capFirst}}() const {return storage_.get{{prop.name | capFirst}}(i_);}
       {{prop.name}}_type& get{{prop.name | capFirst}}Ref() {return storage_.get{{prop.name | capFirst}}Ref(i_);}
-      void set{{prop.name | capFirst}}(const {{prop.name}}_type& v) { storage_.set{{prop.name | capFirst}}(i_, v);}
+      void set{{prop.name | capFirst}}({{prop.name}}_type const & v) { storage_.set{{prop.name | capFirst}}(i_, v);}
       {% endfor %}
 
       size_t getIdx() const {return i_;}
@@ -144,9 +144,9 @@ public:
    {%- endfor %}
 
    {% for prop in properties %}
-   const {{prop.name}}_type& get{{prop.name | capFirst}}(const size_t idx) const {return {{prop.name}}_[idx];}
+   {{prop.name}}_type const & get{{prop.name | capFirst}}(const size_t idx) const {return {{prop.name}}_[idx];}
    {{prop.name}}_type& get{{prop.name | capFirst}}Ref(const size_t idx) {return {{prop.name}}_[idx];}
-   void set{{prop.name | capFirst}}(const size_t idx, const {{prop.name}}_type& v) { {{prop.name}}_[idx] = v; }
+   void set{{prop.name | capFirst}}(const size_t idx, {{prop.name}}_type const & v) { {{prop.name}}_[idx] = v; }
    {% endfor %}
 
    /**
@@ -567,7 +567,7 @@ public:
    using return_type = {{prop.type}};
    {{prop.type}}& operator()(data::Particle& p) const {return p.get{{prop.name | capFirst}}Ref();}
    {{prop.type}}& operator()(data::Particle&& p) const {return p.get{{prop.name | capFirst}}Ref();}
-   const {{prop.type}}& operator()(const data::Particle& p) const {return p.get{{prop.name | capFirst}}();}
+   {{prop.type}} const & operator()(const data::Particle& p) const {return p.get{{prop.name | capFirst}}();}
 };
 {%- endfor %}
 
diff --git a/python/mesa_pd/templates/data/SparseLinkedCells.templ.h b/python/mesa_pd/templates/data/SparseLinkedCells.templ.h
new file mode 100644
index 0000000000000000000000000000000000000000..139dbb0a4b0e989ba5f2e513b0e0a4637255acdc
--- /dev/null
+++ b/python/mesa_pd/templates/data/SparseLinkedCells.templ.h
@@ -0,0 +1,302 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file SparseLinkedCells.h
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+//======================================================================================================================
+//
+//  THIS FILE IS GENERATED - PLEASE CHANGE THE TEMPLATE !!!
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/data/IAccessor.h>
+#include <mesa_pd/data/ParticleStorage.h>
+
+#include <core/Abort.h>
+#include <core/debug/Debug.h>
+#include <core/math/AABB.h>
+#include <stencil/D3Q27.h>
+
+#include <atomic>
+#include <cmath>
+#include <vector>
+
+namespace walberla {
+namespace mesa_pd {
+namespace data {
+
+struct SparseLinkedCells
+{
+   SparseLinkedCells(const math::AABB& domain, const real_t cellDiameter)
+      : SparseLinkedCells(domain, Vec3(cellDiameter,cellDiameter,cellDiameter))
+   {}
+   SparseLinkedCells(const math::AABB& domain, const Vec3& cellDiameter);
+
+   void clear();
+
+   /**
+    * Calls the provided functor \p func for all particle pairs.
+    *
+    * Additional arguments can be provided. No pairs with twice the same particle.
+    * Call syntax for the provided functor
+    * \code
+    * func( *this, i, j, std::forward<Args>(args)... );
+    * \endcode
+    * \param openmp enables/disables OpenMP parallelization of the kernel call
+    */
+   template <typename Selector, typename Accessor, typename Func, typename... Args>
+   void forEachParticlePair(const bool openmp,
+                            const Selector& selector,
+                            Accessor& acForLC,
+                            Func&& func,
+                            Args&&... args) const;
+   /**
+    * Calls the provided functor \p func for all particle pairs.
+    *
+    * Additional arguments can be provided. No pairs with twice the same particle are generated.
+    * No pair is called twice!
+    * Call syntax for the provided functor
+    * \code
+    * func( *this, i, j, std::forward<Args>(args)... );
+    * \endcode
+    * \param openmp enables/disables OpenMP parallelization of the kernel call
+    */
+   template <typename Selector, typename Accessor, typename Func, typename... Args>
+   void forEachParticlePairHalf(const bool openmp,
+                                const Selector& selector,
+                                Accessor& acForLC,
+                                Func&& func,
+                                Args&&... args) const;
+
+   math::AABB   domain_ {}; ///< local domain covered by this data structure
+   Vector3<int> numCellsPerDim_ {}; ///< number of linked cells per dimension
+   Vec3         cellDiameter_ {};
+   Vec3         invCellDiameter_ {};
+   std::atomic<int> infiniteParticles_ {}; ///< data structure for particles to large for the cells
+   std::vector< std::atomic<int> > cells_ {}; ///< actual cell data structure
+   std::vector<size_t> nonEmptyCells_ {}; ///< list of cells containing particles
+};
+
+inline
+math::AABB getCellAABB(const SparseLinkedCells& ll,
+                       const int64_t hash0,
+                       const int64_t hash1,
+                       const int64_t hash2)
+{
+   {%- for dim in range(3) %}
+   WALBERLA_ASSERT_GREATER_EQUAL(hash{{dim}}, 0);
+   WALBERLA_ASSERT_LESS(hash{{dim}}, ll.numCellsPerDim_[{{dim}}]);
+   {%- endfor %}
+   const auto& minCorner = ll.domain_.minCorner();
+   const real_t xMin = ll.cellDiameter_[0] * real_c(hash0) + minCorner[0];
+   const real_t yMin = ll.cellDiameter_[1] * real_c(hash1) + minCorner[1];
+   const real_t zMin = ll.cellDiameter_[2] * real_c(hash2) + minCorner[2];
+   const real_t xMax = ll.cellDiameter_[0] * real_c(hash0 + 1) + minCorner[0];
+   const real_t yMax = ll.cellDiameter_[1] * real_c(hash1 + 1) + minCorner[1];
+   const real_t zMax = ll.cellDiameter_[2] * real_c(hash2 + 1) + minCorner[2];
+   return math::AABB(xMin, yMin, zMin, xMax, yMax, zMax);
+}
+
+inline
+uint_t getCellIdx(const SparseLinkedCells& ll,
+                  const int64_t hash0,
+                  const int64_t hash1,
+                  const int64_t hash2)
+{
+   {%- for dim in range(3) %}
+   WALBERLA_ASSERT_GREATER_EQUAL(hash{{dim}}, 0);
+   WALBERLA_ASSERT_LESS(hash{{dim}}, ll.numCellsPerDim_[{{dim}}]);
+   {%- endfor %}
+   return uint_c(hash2 * ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0] + hash1 * ll.numCellsPerDim_[0] + hash0);
+}
+
+inline
+void getCellCoordinates(const SparseLinkedCells& ll,
+                        const uint64_t idx,
+                        int64_t& hash0,
+                        int64_t& hash1,
+                        int64_t& hash2)
+{
+   hash2 = int64_c(idx) / (ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0]);
+   hash1 = (int64_c(idx) - (hash2 * ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0])) / (ll.numCellsPerDim_[0]);
+   hash0 = int64_c(idx) - hash2 * ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0] - hash1 * ll.numCellsPerDim_[0];
+
+   WALBERLA_ASSERT_GREATER_EQUAL(hash0, 0);
+   WALBERLA_ASSERT_LESS(hash0, ll.numCellsPerDim_[0]);
+   WALBERLA_ASSERT_GREATER_EQUAL(hash1, 0);
+   WALBERLA_ASSERT_LESS(hash1, ll.numCellsPerDim_[1]);
+   WALBERLA_ASSERT_GREATER_EQUAL(hash2, 0);
+   WALBERLA_ASSERT_LESS(hash2, ll.numCellsPerDim_[2]);
+}
+
+inline
+SparseLinkedCells::SparseLinkedCells(const math::AABB& domain, const Vec3& cellDiameter)
+   : domain_(domain)
+   , numCellsPerDim_( static_cast<int>(std::ceil( domain.sizes()[0] / cellDiameter[0])),
+     static_cast<int>(std::ceil( domain.sizes()[1] / cellDiameter[1])),
+     static_cast<int>(std::ceil( domain.sizes()[2] / cellDiameter[2])) )
+   , cellDiameter_( domain.sizes()[0] / real_c(numCellsPerDim_[0]),
+     domain.sizes()[1] / real_c(numCellsPerDim_[1]),
+     domain.sizes()[2] / real_c(numCellsPerDim_[2]) )
+   , invCellDiameter_( real_t(1) / cellDiameter_[0], real_t(1) / cellDiameter_[1], real_t(1) / cellDiameter_[2] )
+   , cells_(uint_c(numCellsPerDim_[0]*numCellsPerDim_[1]*numCellsPerDim_[2]))
+   , nonEmptyCells_(uint_c(numCellsPerDim_[0]*numCellsPerDim_[1]*numCellsPerDim_[2]))
+{
+   //precondition
+   {%- for dim in range(3) %}
+   WALBERLA_CHECK_GREATER_EQUAL(cellDiameter[{{dim}}], real_t(0));
+   {%- endfor %}
+
+   //postcondition
+   {%- for dim in range(3) %}
+   WALBERLA_CHECK_GREATER_EQUAL(cellDiameter_[{{dim}}], real_t(0));
+   WALBERLA_CHECK_LESS_EQUAL(cellDiameter_[{{dim}}], cellDiameter[{{dim}}]);
+
+   WALBERLA_CHECK_GREATER_EQUAL(numCellsPerDim_[{{dim}}], 0);
+   {%- endfor %}
+
+   nonEmptyCells_.clear();
+   std::fill(cells_.begin(), cells_.end(), -1);
+}
+
+void SparseLinkedCells::clear()
+{
+   for (const auto v : nonEmptyCells_)
+   {
+      WALBERLA_ASSERT_LESS(v, cells_.size());
+      cells_[v] = -1;
+   }
+   nonEmptyCells_.clear();
+   infiniteParticles_ = -1;
+}
+
+{%- for half in [False, True] %}
+template <typename Selector, typename Accessor, typename Func, typename... Args>
+inline void SparseLinkedCells::forEachParticlePair{%- if half %}Half{%- endif %}(const bool openmp, const Selector& selector, Accessor& acForLC, Func&& func, Args&&... args) const
+{
+   static_assert(std::is_base_of<data::IAccessor, Accessor>::value, "please provide a valid accessor");
+   WALBERLA_UNUSED(openmp);
+
+   for (const auto cellIdx : nonEmptyCells_)
+   {
+      int64_t x = 0;
+      int64_t y = 0;
+      int64_t z = 0;
+      getCellCoordinates(*this, cellIdx, x, y, z);
+      int p_idx = cells_[cellIdx]; ///< current particle index
+      int np_idx = -1; ///< particle to be checked against
+
+      while (p_idx != -1)
+      {
+         WALBERLA_ASSERT_GREATER_EQUAL(p_idx, 0);
+         WALBERLA_ASSERT_LESS(p_idx, acForLC.size());
+
+         // check particles in own cell
+         np_idx = acForLC.getNextParticle(uint_c(p_idx)); ///< neighbor particle index
+         while (np_idx != -1)
+         {
+            WALBERLA_ASSERT_GREATER_EQUAL(np_idx, 0);
+            WALBERLA_ASSERT_LESS(np_idx, acForLC.size());
+
+            if (selector(uint_c(p_idx), uint_c(np_idx), acForLC))
+            {
+               func(uint_c(p_idx), uint_c(np_idx), std::forward<Args>(args)...);
+               {%- if not half %}
+               func(uint_c(np_idx), uint_c(p_idx), std::forward<Args>(args)...);
+               {%- endif %}
+            }
+
+            // go to next particle
+            np_idx = acForLC.getNextParticle(uint_c(np_idx));
+         }
+
+         // check particles in infiniteParticles list
+         np_idx = infiniteParticles_; ///< neighbor particle index
+         while (np_idx != -1)
+         {
+            WALBERLA_ASSERT_GREATER_EQUAL(np_idx, 0);
+            WALBERLA_ASSERT_LESS(np_idx, acForLC.size());
+
+            if (selector(uint_c(p_idx), uint_c(np_idx), acForLC))
+            {
+               func(uint_c(p_idx), uint_c(np_idx), std::forward<Args>(args)...);
+               {%- if not half %}
+               func(uint_c(np_idx), uint_c(p_idx), std::forward<Args>(args)...);
+               {%- endif %}
+            }
+
+            // go to next particle
+            np_idx = acForLC.getNextParticle(uint_c(np_idx));
+         }
+
+         // go to next particle
+         p_idx = acForLC.getNextParticle(uint_c(p_idx));
+      }
+
+      // check particles in neighboring cells (only positive ones)
+      for (auto dir : stencil::D3Q27::dir_pos)
+      {
+         const int64_t nx = x + int64_c(stencil::cx[dir]);
+         const int64_t ny = y + int64_c(stencil::cy[dir]);
+         const int64_t nz = z + int64_c(stencil::cz[dir]);
+         if (nx < 0) continue;
+         if (ny < 0) continue;
+         if (nz < 0) continue;
+         if (nx >= numCellsPerDim_[0]) continue;
+         if (ny >= numCellsPerDim_[1]) continue;
+         if (nz >= numCellsPerDim_[2]) continue;
+
+         const uint64_t ncell_idx = getCellIdx(*this, nx, ny, nz); ///< neighbor cell index
+
+         p_idx = cells_[cellIdx]; ///< current particle index
+         WALBERLA_ASSERT_GREATER_EQUAL(p_idx, 0);
+         WALBERLA_ASSERT_LESS(p_idx, acForLC.size());
+         while (p_idx != -1)
+         {
+            np_idx = cells_[ncell_idx]; ///< neighbor particle index
+            while (np_idx != -1)
+            {
+               WALBERLA_ASSERT_GREATER_EQUAL(np_idx, 0);
+               WALBERLA_ASSERT_LESS(np_idx, acForLC.size());
+
+               if (selector(uint_c(p_idx), uint_c(np_idx), acForLC))
+               {
+                  func(uint_c(p_idx), uint_c(np_idx), std::forward<Args>(args)...);
+                  {%- if not half %}
+                  func(uint_c(np_idx), uint_c(p_idx), std::forward<Args>(args)...);
+                  {%- endif %}
+               }
+
+               // go to next particle
+               np_idx = acForLC.getNextParticle(uint_c(np_idx));
+            }
+
+            // go to next particle
+            p_idx = acForLC.getNextParticle(uint_c(p_idx));
+         }
+      }
+   }
+}
+{%- endfor %}
+
+} //namespace data
+} //namespace mesa_pd
+} //namespace walberla
diff --git a/python/mesa_pd/templates/kernel/InsertParticleIntoLinkedCells.templ.h b/python/mesa_pd/templates/kernel/InsertParticleIntoLinkedCells.templ.h
index ac2bcc5575aaf077c8d878ae701acac723177f95..d80b9cfd81cde268f01336e326f85a851fd518e4 100644
--- a/python/mesa_pd/templates/kernel/InsertParticleIntoLinkedCells.templ.h
+++ b/python/mesa_pd/templates/kernel/InsertParticleIntoLinkedCells.templ.h
@@ -84,8 +84,8 @@ inline void InsertParticleIntoLinkedCells::operator()(const size_t p_idx, Access
       if (hash{{dim}} < 0) hash{{dim}} = 0;
       if (hash{{dim}} >= lc.numCellsPerDim_[{{dim}}]) hash{{dim}} = lc.numCellsPerDim_[{{dim}}] - 1;
       {%- endfor %}
-      int cell_idx = getCellIdx(lc, hash0, hash1, hash2);
-      ac.setNextParticle(p_idx, lc.cells_[uint_c(cell_idx)].exchange(int_c(p_idx)));
+      uint_t cell_idx = getCellIdx(lc, hash0, hash1, hash2);
+      ac.setNextParticle(p_idx, lc.cells_[cell_idx].exchange(int_c(p_idx)));
    }
 }
 
diff --git a/python/mesa_pd/templates/kernel/InsertParticleIntoSparseLinkedCells.templ.h b/python/mesa_pd/templates/kernel/InsertParticleIntoSparseLinkedCells.templ.h
new file mode 100644
index 0000000000000000000000000000000000000000..ca9f6ba0121463368feb787547d73603f9b29513
--- /dev/null
+++ b/python/mesa_pd/templates/kernel/InsertParticleIntoSparseLinkedCells.templ.h
@@ -0,0 +1,98 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file InsertParticleIntoSparseLinkedCells.h
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+//======================================================================================================================
+//
+//  THIS FILE IS GENERATED - PLEASE CHANGE THE TEMPLATE !!!
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/data/IAccessor.h>
+#include <mesa_pd/data/SparseLinkedCells.h>
+
+#include <vector>
+
+namespace walberla {
+namespace mesa_pd {
+namespace kernel {
+
+/**
+ * Inserts a particle into the data::SparseLinkedCells data structure
+ *
+ * \attention Make sure to data::SparseLinkedCells::clear() the data structure before
+ * reinserting new particles.
+ *
+ * This kernel requires the following particle accessor interface
+ * \code
+   {%- for prop in interface %}
+   {%- if 'g' in prop.access %}
+ * const {{prop.type}}& get{{prop.name | capFirst}}(const size_t p_idx) const;
+   {%- endif %}
+   {%- if 's' in prop.access %}
+ * void set{{prop.name | capFirst}}(const size_t p_idx, const {{prop.type}}& v);
+   {%- endif %}
+   {%- if 'r' in prop.access %}
+ * {{prop.type}}& get{{prop.name | capFirst}}Ref(const size_t p_idx);
+   {%- endif %}
+ *
+   {%- endfor %}
+ * \endcode
+ * \ingroup mesa_pd_kernel
+ */
+class InsertParticleIntoSparseLinkedCells
+{
+public:
+   template <typename Accessor>
+   void operator()(const size_t p_idx, Accessor& ac, data::SparseLinkedCells& lc) const;
+};
+
+template <typename Accessor>
+inline void InsertParticleIntoSparseLinkedCells::operator()(const size_t p_idx, Accessor& ac, data::SparseLinkedCells& lc) const
+{
+   static_assert(std::is_base_of<data::IAccessor, Accessor>::value, "please provide a valid accessor");
+
+   const auto& minCorner = lc.domain_.minCorner();
+   if (data::particle_flags::isSet(ac.getFlags(p_idx), data::particle_flags::INFINITE))
+   {
+      ac.setNextParticle(p_idx, lc.infiniteParticles_.exchange(int_c(p_idx)));
+   } else
+   {
+      {%- for dim in range(3) %}
+      int hash{{dim}} = static_cast<int>(std::floor((ac.getPosition(p_idx)[{{dim}}] - minCorner[{{dim}}]) * lc.invCellDiameter_[{{dim}}]));
+      {%- endfor %}
+      {%- for dim in range(3) %}
+      if (hash{{dim}} < 0) hash{{dim}} = 0;
+      if (hash{{dim}} >= lc.numCellsPerDim_[{{dim}}]) hash{{dim}} = lc.numCellsPerDim_[{{dim}}] - 1;
+      {%- endfor %}
+      uint64_t cell_idx = getCellIdx(lc, hash0, hash1, hash2);
+      ac.setNextParticle(p_idx, lc.cells_[cell_idx].exchange(int_c(p_idx)));
+      if (ac.getNextParticle(p_idx) == -1)
+      {
+         lc.nonEmptyCells_.emplace_back(cell_idx);
+      }
+   }
+}
+
+} //namespace kernel
+} //namespace mesa_pd
+} //namespace walberla
diff --git a/src/core/math/RotationMatrix.h b/src/core/math/RotationMatrix.h
deleted file mode 100644
index 8a79b15de0cd3cfcbd0554a18123353a7b293667..0000000000000000000000000000000000000000
--- a/src/core/math/RotationMatrix.h
+++ /dev/null
@@ -1,1448 +0,0 @@
-//======================================================================================================================
-//
-//  This file is part of waLBerla. waLBerla is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file RotationMatrix.h
-//! \ingroup core
-//! \author Klaus Iglberger
-//! \author Sebastian Eibl <sebastian.eibl@fau.de>
-//! \brief Implementation of a 3x3 rotation matrix
-//
-//======================================================================================================================
-
-#pragma once
-
-
-//*************************************************************************************************
-// Includes
-//*************************************************************************************************
-
-#include "core/math/MathTrait.h"
-#include "core/math/Matrix3.h"
-#include "core/math/Vector3.h"
-#include "core/DataTypes.h"
-#include "core/debug/Debug.h"
-
-#include <algorithm>
-#include <cmath>
-#include <ostream>
-#include <limits>
-#include <type_traits>
-
-namespace walberla {
-namespace math {
-
-//=================================================================================================
-//
-//  NAMESPACE FORWARD DECLARATIONS
-//
-//=================================================================================================
-
-template< typename >       class  Quaternion;
-
-
-//=================================================================================================
-//
-//  EULER ROTATIONS
-//
-//=================================================================================================
-
-//*************************************************************************************************
-/*!\brief Order of the Euler rotation
- * \ingroup math
- *
- * This codes are needed for the EulerAngles function in order to calculate the Euler angles
- * for a specific combination of rotations.
- */
-enum EulerRotation {
-   XYZs =  0,  //!< Rotation order x, y, z in a static frame.
-   ZYXr =  1,  //!< Rotation order z, y, x in a rotating frame.
-   XYXs =  2,  //!< Rotation order x, y, x in a static frame.
-   XYXr =  3,  //!< Rotation order x, y, z in a rotating frame.
-   XZYs =  4,  //!< Rotation order x, z, y in a static frame.
-   YZXr =  5,  //!< Rotation order y, z, x in a rotating frame.
-   XZXs =  6,  //!< Rotation order x, z, x in a static frame.
-   XZXr =  7,  //!< Rotation order x, z, x in a rotating frame.
-   YZXs =  8,  //!< Rotation order y, z, x in a static frame.
-   XZYr =  9,  //!< Rotation order x, z, y in a rotating frame.
-   YZYs = 10,  //!< Rotation order y, z, y in a static frame.
-   YZYr = 11,  //!< Rotation order y, z, y in a rotating frame.
-   YXZs = 12,  //!< Rotation order y, x, z in a static frame.
-   ZXYr = 13,  //!< Rotation order z, x, y in a rotating frame.
-   YXYs = 14,  //!< Rotation order y, x, y in a static frame.
-   YXYr = 15,  //!< Rotation order y, x, y in a rotating frame.
-   ZXYs = 16,  //!< Rotation order z, x, y in a static frame.
-   YXZr = 17,  //!< Rotation order y, x, z in a rotating frame.
-   ZXZs = 18,  //!< Rotation order z, x, z in a static frame.
-   ZXZr = 19,  //!< Rotation order z, x, z in a rotating frame.
-   ZYXs = 20,  //!< Rotation order z, y, x in a static frame.
-   XYZr = 21,  //!< Rotation order x, y, z in a rotating frame.
-   ZYZs = 22,  //!< Rotation order z, y, z in a static frame.
-   ZYZr = 23   //!< Rotation order z, y, z in a rotating frame.
-};
-//*************************************************************************************************
-
-
-
-
-//=================================================================================================
-//
-//  CLASS DEFINITION
-//
-//=================================================================================================
-
-//*************************************************************************************************
-/*!\brief Efficient, generic implementation of a 3x3 rotation matrix.
- *
- * The RotationMatrix class is the representation of a 3x3 rotation matrix with a total of 9
- * statically allocated elements of arbitrary type. The naming convention of the elements is
- * as following:
-
-                          \f[\left(\begin{array}{*{3}{c}}
-                          xx & xy & xz \\
-                          yx & yy & yz \\
-                          zx & zy & zz \\
-                          \end{array}\right)\f]\n
-
- * These elements can be accessed directly with the 1D subscript operator or with the 2D function
- * operator. The numbering of the matrix elements is
-
-                          \f[\left(\begin{array}{*{3}{c}}
-                          0 & 1 & 2 \\
-                          3 & 4 & 5 \\
-                          6 & 7 & 8 \\
-                          \end{array}\right)\f]
-
- * \b Note: The RotationMatrix class can only be instantiated for non-cv-qualified floating point
- * types! Therefore the only possible RotationMatrix instantiations are
- *
- *  - RotationMatrix<float>
- *  - RotationMatrix<double>
- *  - RotationMatrix<long double>
- *
- * The attempt to create a rotation matrix with an integral data type results in a compile time
- * error.
- */
-template< typename Type >  // Data type of the rotation matrix
-class RotationMatrix : public Matrix3<Type>
-{
-   //**Compile time checks*************************************************************************
-   /*! \cond internal */
-   static_assert(std::is_floating_point<Type>::value, "T has to be floating point!");
-   static_assert(!std::is_const<Type>::value, "T has to be non const!");
-   static_assert(!std::is_volatile<Type>::value, "T has to be non volatile!");
-   /*! \endcond */
-   //**********************************************************************************************
-
-public:
-   //**Type definitions****************************************************************************
-   typedef RotationMatrix<Type>   This;           //!< Type of this RotationMatrix instance.
-   typedef This                   ResultType;     //!< Result type for expression template evaluations.
-   typedef Type                   ElementType;    //!< Type of the matrix elements.
-   typedef const RotationMatrix&  CompositeType;  //!< Data type for composite expression templates.
-   //**********************************************************************************************
-
-   //**Constructors********************************************************************************
-   /*!\name Constructors */
-   //@{
-   explicit inline RotationMatrix();
-
-   template< typename Axis >
-   explicit RotationMatrix( Vector3<Axis> axis, Type angle );
-
-   inline RotationMatrix( const RotationMatrix& m );
-
-   template< typename Other >
-   inline RotationMatrix( const RotationMatrix<Other>& m );
-   //@}
-   //**********************************************************************************************
-
-   //**Destructor**********************************************************************************
-   // No explicitly declared destructor.
-   //**********************************************************************************************
-
-   //**Operators***********************************************************************************
-   /*!\name Operators */
-   //@{
-                              inline RotationMatrix& operator= ( const RotationMatrix& rhs );
-   template< typename Other > inline RotationMatrix& operator= ( const RotationMatrix<Other>& rhs );
-                              inline Type            operator[]( size_t index )                    const;
-                              inline Type            operator()( size_t i, size_t j )              const;
-   template< typename Other > inline RotationMatrix& operator*=( const RotationMatrix<Other>& rhs );
-   //@}
-   //**********************************************************************************************
-
-   //**Utility functions***************************************************************************
-   /*!\name Utility functions */
-   //@{
-                              inline size_t               rows()           const;
-                              inline size_t               columns()        const;
-                              inline void                 reset();
-                              inline Type                 getDeterminant() const;
-                              inline RotationMatrix&      transpose();
-                              inline RotationMatrix&      invert();
-                              inline const RotationMatrix getInverse()     const;
-                              inline void                 swap( RotationMatrix& m ) /* throw() */;
-   //@}
-   //**********************************************************************************************
-
-   //**Expression template evaluation functions****************************************************
-   /*!\name Expression template evaluation functions */
-   //@{
-   template< typename Other > inline bool isAliased( const Other* alias ) const;
-   //@}
-   //**********************************************************************************************
-
-   //**Math functions******************************************************************************
-   /*!\name Math functions
-   //
-   // The return type of the math functions depends on the involved data types of the
-   // matrices and vectors (for further detail see the MathTrait class description).
-   */
-   //@{
-   template< typename Other >
-   inline const Matrix3< typename MathTrait<Type,Other>::MultType > rotate( const Matrix3<Other>& m ) const;
-
-   template< typename Other >
-   inline const Matrix3< typename MathTrait<Type,Other>::MultType > diagRotate( const Matrix3<Other>& m ) const;
-   //@}
-   //**********************************************************************************************
-
-   //**Euler rotations*****************************************************************************
-   /*!\name Euler rotations
-   //
-   // For the classification of the Euler rotation, the following characteristics are
-   // defined:\n
-   //  - Inner axis: the inner axis is the axis of the first rotation matrix multiplied
-   //    to a vector.
-   //  - Parity: the parity is even, if the inner axis X is followed by the middle axis
-   //    Y, or Y is followed by Z, or Z is followed by X; otherwise parity is odd.
-   //  - Repetition: repetition tells, if the first and last axes are the same or different.
-   //  - Frame: the frame refers to the frame from which the Euler angles are calculated.
-   //
-   // Altogether, there are 24 possible Euler rotations. The possibilities are consisting
-   // of the choice of the inner axis (X,Y or Z), the parity (even or odd), repetition
-   // (yes or no) and the frame (static or rotating). E.g., an Euler order of XYZs stands
-   // for the rotation order of x-, y- and z-axis in a static frame (inner axis: X, parity:
-   // even, repetition: no, frame: static), whereas YXYr stands for the rotation order y-,
-   // x- and y-axis in a rotating frame ( inner axis: Y, parity: odd, repetition: yes,
-   // frame: rotating).
-   */
-   //@{
-   inline const Vector3<Type> getEulerAnglesXYZ()                   const;
-          const Vector3<Type> getEulerAngles( EulerRotation order ) const;
-   //@}
-   //**********************************************************************************************
-
-private:
-   //**Constructors********************************************************************************
-   /*!\name Constructors */
-   //@{
-   explicit inline RotationMatrix( Type xx, Type xy, Type xz,
-                                   Type yx, Type yy, Type yz,
-                                   Type zx, Type zy, Type zz );
-   //@}
-   //**********************************************************************************************
-
-   //**Member variables****************************************************************************
-   /*!\name Member variables */
-   //@{
-   Type v_[9];  //!< The nine statically allocated matrix elements.
-                /*!< Access to the matrix elements is gained via the subscript or function call
-                     operator. The order of the elements is
-                     \f[\left(\begin{array}{*{3}{c}}
-                     0 & 1 & 2 \\
-                     3 & 4 & 5 \\
-                     6 & 7 & 8 \\
-                     \end{array}\right)\f] */
-   //@}
-   //**********************************************************************************************
-
-   //**Friend declarations*************************************************************************
-   /*! \cond internal */
-   template< typename Other > friend class Quaternion;
-
-   template< typename Other >
-   friend const RotationMatrix<Other> trans( const RotationMatrix<Other>& m );
-
-   template< typename T1, typename T2 >
-   friend const RotationMatrix< typename MathTrait<T1,T2>::MultType >
-      operator*( const RotationMatrix<T1>& lhs, const RotationMatrix<T2>& rhs );
-   /*! \endcond */
-   //**********************************************************************************************
-};
-//*************************************************************************************************
-
-
-
-
-//=================================================================================================
-//
-//  CONSTRUCTORS
-//
-//=================================================================================================
-
-//*************************************************************************************************
-/*!\brief The default constructor for RotationMatrix.
- *
- * The diagonal matrix elements are initialized with 1, all other elements are initialized
- * with 0.
- */
-template< typename Type >  // Data type of the rotation matrix
-inline RotationMatrix<Type>::RotationMatrix()
-{
-   v_[0] = v_[4] = v_[8] = Type(1);
-   v_[1] = v_[2] = v_[3] = v_[5] = v_[6] = v_[7] = Type(0);
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Rotation matrix constructor.
- *
- * \param axis The rotation axis.
- * \param angle The rotation angle (radian measure).
- *
- * This constructor creates a rotation matrix from the rotation axis \a axis and the rotation
- * angle \a angle. \a axis may be an arbitrary, non-zero vector of any length. However, it is
- * allowed to use the zero vector (0,0,0) in combination with an angle of 0. This combination
- * results in the default rotation matrix
-
-                          \f[\left(\begin{array}{*{3}{c}}
-                          1 & 0 & 0 \\
-                          0 & 1 & 0 \\
-                          0 & 0 & 1 \\
-                          \end{array}\right)\f]
- */
-template< typename Type >  // Data type of the rotation matrix
-template< typename Axis >  // Data type of the rotation axis
-RotationMatrix<Type>::RotationMatrix( Vector3<Axis> axis, Type angle )
-{
-   static_asser(std::is_floating_point<Axis>::value, "Axis has to be floating point!");
-
-   WALBERLA_ASSERT( ( axis.sqrLength() > Axis(0) || angle == Type(0) ), "Invalid matrix parameters" );
-
-   const Type sina( std::sin(angle) );
-   const Type cosa( std::cos(angle) );
-   const Type tmp( Type(1)-cosa );
-
-   axis.normalize();
-
-   v_[0] = cosa + axis[0]*axis[0]*tmp;
-   v_[1] = axis[0]*axis[1]*tmp - axis[2]*sina;
-   v_[2] = axis[0]*axis[2]*tmp + axis[1]*sina;
-   v_[3] = axis[1]*axis[0]*tmp + axis[2]*sina;
-   v_[4] = cosa + axis[1]*axis[1]*tmp;
-   v_[5] = axis[1]*axis[2]*tmp - axis[0]*sina;
-   v_[6] = axis[2]*axis[0]*tmp - axis[1]*sina;
-   v_[7] = axis[2]*axis[1]*tmp + axis[0]*sina;
-   v_[8] = cosa + axis[2]*axis[2]*tmp;
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief The copy constructor for RotationMatrix.
- *
- * \param m Rotation matrix to be copied.
- *
- * The copy constructor is explicitly defined in order to enable/facilitate NRV optimization.
- */
-template< typename Type >  // Data type of the rotation matrix
-inline RotationMatrix<Type>::RotationMatrix( const RotationMatrix& m )
-{
-   v_[0] = m.v_[0];
-   v_[1] = m.v_[1];
-   v_[2] = m.v_[2];
-   v_[3] = m.v_[3];
-   v_[4] = m.v_[4];
-   v_[5] = m.v_[5];
-   v_[6] = m.v_[6];
-   v_[7] = m.v_[7];
-   v_[8] = m.v_[8];
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Conversion constructor from different RotationMatrix instances.
- *
- * \param m Rotation matrix to be copied.
- */
-template< typename Type >   // Data type of the rotation matrix
-template< typename Other >  // Data type of the foreign rotation matrix
-inline RotationMatrix<Type>::RotationMatrix( const RotationMatrix<Other>& m )
-{
-   v_[0] = m[0];
-   v_[1] = m[1];
-   v_[2] = m[2];
-   v_[3] = m[3];
-   v_[4] = m[4];
-   v_[5] = m[5];
-   v_[6] = m[6];
-   v_[7] = m[7];
-   v_[8] = m[8];
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Constructor for a direct initialization of all rotation matrix elements.
- *
- * \param xx The initial value for the xx-component.
- * \param xy The initial value for the xy-component.
- * \param xz The initial value for the xz-component.
- * \param yx The initial value for the yx-component.
- * \param yy The initial value for the yy-component.
- * \param yz The initial value for the yz-component.
- * \param zx The initial value for the zx-component.
- * \param zy The initial value for the zy-component.
- * \param zz The initial value for the zz-component.
- */
-template< typename Type >  // Data type of the rotation matrix
-inline RotationMatrix<Type>::RotationMatrix( Type xx, Type xy, Type xz,
-                                             Type yx, Type yy, Type yz,
-                                             Type zx, Type zy, Type zz )
-{
-   v_[0] = xx; v_[1] = xy; v_[2] = xz;
-   v_[3] = yx; v_[4] = yy; v_[5] = yz;
-   v_[6] = zx; v_[7] = zy; v_[8] = zz;
-}
-//*************************************************************************************************
-
-
-
-
-//=================================================================================================
-//
-//  OPERATORS
-//
-//=================================================================================================
-
-//*************************************************************************************************
-/*!\brief Copy assignment operator for RotationMatrix.
- *
- * \param rhs Rotation matrix to be copied.
- * \return Reference to the assigned rotation matrix.
- *
- * Explicit definition of a copy assignment operator for performance reasons.
- */
-template< typename Type >  // Data type of the rotation matrix
-inline RotationMatrix<Type>& RotationMatrix<Type>::operator=( const RotationMatrix& rhs )
-{
-   // This implementation is faster than the synthesized default copy assignment operator and
-   // faster than an implementation with the C library function 'memcpy' in combination with a
-   // protection against self-assignment. Additionally, this version goes without a protection
-   // against self-assignment.
-   v_[0] = rhs.v_[0];
-   v_[1] = rhs.v_[1];
-   v_[2] = rhs.v_[2];
-   v_[3] = rhs.v_[3];
-   v_[4] = rhs.v_[4];
-   v_[5] = rhs.v_[5];
-   v_[6] = rhs.v_[6];
-   v_[7] = rhs.v_[7];
-   v_[8] = rhs.v_[8];
-   return *this;
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Assignment operator for different RotationMatrix instances.
- *
- * \param rhs Rotation matrix to be copied.
- * \return Reference to the assigned rotation matrix.
- */
-template< typename Type >   // Data type of the rotation matrix
-template< typename Other >  // Data type of the foreign rotation matrix
-inline RotationMatrix<Type>& RotationMatrix<Type>::operator=( const RotationMatrix<Other>& rhs )
-{
-   // This implementation is faster than the synthesized default copy assignment operator and
-   // faster than an implementation with the C library function 'memcpy' in combination with a
-   // protection against self-assignment. Additionally, this version goes without a protection
-   // against self-assignment.
-   v_[0] = rhs[0];
-   v_[1] = rhs[1];
-   v_[2] = rhs[2];
-   v_[3] = rhs[3];
-   v_[4] = rhs[4];
-   v_[5] = rhs[5];
-   v_[6] = rhs[6];
-   v_[7] = rhs[7];
-   v_[8] = rhs[8];
-   return *this;
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief 1D-access to the rotation matrix elements.
- *
- * \param index Access index. The index has to be in the range \f$[0..8]\f$.
- * \return Copy of the accessed element.
- *
- * In case WALBERLA_ASSERT() is active, this operator performs an index check.
- */
-template< typename Type >  // Data type of the rotation matrix
-inline Type RotationMatrix<Type>::operator[]( size_t index ) const
-{
-   WALBERLA_ASSERT( index < 9, "Invalid rotation matrix access index" );
-   return v_[index];
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief 2D-access to the rotation matrix elements.
- *
- * \param i Access index for the row. The index has to be in the range [0..2].
- * \param j Access index for the column. The index has to be in the range [0..2].
- * \return Copy of the accessed element.
- */
-template< typename Type >  // Data type of the rotation matrix
-inline Type RotationMatrix<Type>::operator()( size_t i, size_t j ) const
-{
-   WALBERLA_ASSERT( i<3 && j<3, "Invalid rotation matrix access index" );
-   return v_[i*3+j];
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Multiplication assignment operator for the multiplication between two rotation matrices
- *        (\f$ A*=B \f$).
- *
- * \param rhs The right-hand side rotation matrix for the multiplication.
- * \return Reference to the rotation matrix.
- */
-template< typename Type >   // Data type of the rotation matrix
-template< typename Other >  // Data type of the right-hand side rotation matrix
-inline RotationMatrix<Type>& RotationMatrix<Type>::operator*=( const RotationMatrix<Other>& rhs )
-{
-   // Creating a temporary due to data dependencies
-   const RotationMatrix tmp( v_[0]*rhs[0] + v_[1]*rhs[3] + v_[2]*rhs[6],
-                             v_[0]*rhs[1] + v_[1]*rhs[4] + v_[2]*rhs[7],
-                             v_[0]*rhs[2] + v_[1]*rhs[5] + v_[2]*rhs[8],
-                             v_[3]*rhs[0] + v_[4]*rhs[3] + v_[5]*rhs[6],
-                             v_[3]*rhs[1] + v_[4]*rhs[4] + v_[5]*rhs[7],
-                             v_[3]*rhs[2] + v_[4]*rhs[5] + v_[5]*rhs[8],
-                             v_[6]*rhs[0] + v_[7]*rhs[3] + v_[8]*rhs[6],
-                             v_[6]*rhs[1] + v_[7]*rhs[4] + v_[8]*rhs[7],
-                             v_[6]*rhs[2] + v_[7]*rhs[5] + v_[8]*rhs[8] );
-
-   return this->operator=( tmp );
-}
-//*************************************************************************************************
-
-
-
-
-//=================================================================================================
-//
-//  UTILITY FUNCTIONS
-//
-//=================================================================================================
-
-//*************************************************************************************************
-/*!\brief Returns the current number of rows of the rotation matrix.
- *
- * \return The number of rows of the rotation matrix.
- */
-template< typename Type >  // Data type of the rotation matrix
-inline size_t RotationMatrix<Type>::rows() const
-{
-   return size_t(3);
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Returns the current number of columns of the rotation matrix.
- *
- * \return The number of columns of the rotation matrix.
- */
-template< typename Type >  // Data type of the rotation matrix
-inline size_t RotationMatrix<Type>::columns() const
-{
-   return size_t(3);
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Reset to the default initial values.
- *
- * \return void
- *
- * This function resets the rotation matrix to the default initial values:
-
-                          \f[\left(\begin{array}{*{3}{c}}
-                          1 & 0 & 0 \\
-                          0 & 1 & 0 \\
-                          0 & 0 & 1 \\
-                          \end{array}\right)\f]
- */
-template< typename Type >  // Data type of the rotation matrix
-inline void RotationMatrix<Type>::reset()
-{
-   v_[0] = v_[4] = v_[8] = Type(1);
-   v_[1] = v_[2] = v_[3] = v_[5] = v_[6] = v_[7] = Type(0);
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Calculation of the determinant of the rotation matrix.
- *
- * \return The determinant of the rotation matrix.
- */
-template< typename Type >  // Data type of the rotation matrix
-inline Type RotationMatrix<Type>::getDeterminant() const
-{
-   // Although the determinant of a rotation matrix should always be exactly one, the
-   // function calculates the actual determinant to enable checks.
-   return v_[0]*v_[4]*v_[8] + v_[1]*v_[5]*v_[6] + v_[2]*v_[3]*v_[7] -
-          v_[6]*v_[4]*v_[2] - v_[7]*v_[5]*v_[0] - v_[8]*v_[3]*v_[1];
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Transposing the rotation matrix.
- *
- * \return Reference to the transposed rotation matrix.
- *
- * This function has the same effect as the invert() function (\f$ R^T = R^-1 \f$).
- */
-template< typename Type >  // Data type of the rotation matrix
-inline RotationMatrix<Type>& RotationMatrix<Type>::transpose()
-{
-   std::swap( v_[1], v_[3] );
-   std::swap( v_[2], v_[6] );
-   std::swap( v_[5], v_[7] );
-   return *this;
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Inverting the matrix.
- *
- * \return Reference to the inverted matrix.
- *
- * This function has the same effect as the transpose() function (\f$ R^-1 = R^T \f$).
- */
-template< typename Type >  // Data type of the rotation matrix
-inline RotationMatrix<Type>& RotationMatrix<Type>::invert()
-{
-   std::swap( v_[1], v_[3] );
-   std::swap( v_[2], v_[6] );
-   std::swap( v_[5], v_[7] );
-   return *this;
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Calculation of the inverse of the matrix.
- *
- * \return The inverse of the matrix.
- *
- * This function has the same effect as the trans() function (\f$ R^-1 = R^T \f$).
- */
-template< typename Type >  // Data type of the rotation matrix
-inline const RotationMatrix<Type> RotationMatrix<Type>::getInverse() const
-{
-   return RotationMatrix( v_[0], v_[3], v_[6], v_[1], v_[4], v_[7], v_[2], v_[5], v_[8] );
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Swapping the contents of two 3x3 matrices.
- *
- * \param m The matrix to be swapped.
- * \return void
- * \exception no-throw guarantee.
- */
-template< typename Type >  // Data type of the rotation matrix
-inline void RotationMatrix<Type>::swap( RotationMatrix& m ) /* throw() */
-{
-   std::swap( v_[0], m.v_[0] );
-   std::swap( v_[1], m.v_[1] );
-   std::swap( v_[2], m.v_[2] );
-   std::swap( v_[3], m.v_[3] );
-   std::swap( v_[4], m.v_[4] );
-   std::swap( v_[5], m.v_[5] );
-   std::swap( v_[6], m.v_[6] );
-   std::swap( v_[7], m.v_[7] );
-   std::swap( v_[8], m.v_[8] );
-}
-//*************************************************************************************************
-
-
-
-
-//=================================================================================================
-//
-//  EXPRESSION TEMPLATE EVALUATION FUNCTIONS
-//
-//=================================================================================================
-
-//*************************************************************************************************
-/*!\brief Returns whether the rotation matrix is aliased with the given address \a alias.
- *
- * \param alias The alias to be checked.
- * \return \a true in case the alias corresponds to this matrix, \a false if not.
- */
-template< typename Type >   // Data type of the matrix
-template< typename Other >  // Data type of the foreign expression
-inline bool RotationMatrix<Type>::isAliased( const Other* alias ) const
-{
-   return static_cast<const void*>( this ) == static_cast<const void*>( alias );
-}
-//*************************************************************************************************
-
-
-
-
-//=================================================================================================
-//
-//  MATH FUNCTIONS
-//
-//=================================================================================================
-
-//*************************************************************************************************
-/*!\brief Rotation of a matrix M (\f$ ROT=R*M*R^{-1} \f$).
- *
- * \param m The matrix to be rotated.
- * \return The rotated matrix.
- *
- * The function is selected for matrices of different data type (in case \a Type and \a Other
- * are supported by the MathTrait class). The function returns a matrix of the higher-order
- * data type of the two involved data types.
- *
- * \b Note: This function is only defined for matrices of floating point type. The attempt to
- * use this function with matrices of integral data type will result in a compile time error.
- */
-template< typename Type >   // Data type of the rotation matrix
-template< typename Other >  // Data type of the standard matrix
-inline const Matrix3< typename MathTrait<Type,Other>::MultType >
-   RotationMatrix<Type>::rotate( const Matrix3<Other>& m ) const
-{
-   static_assert(std::is_floating_point<Other>::value, "Other has to be floating point!");
-
-   typedef typename MathTrait<Type,Other>::MultType  MT;
-
-   //--Multiplication in two steps (number of FLOP = 90, 1 additional temporary matrix)------------
-
-   // Precalculation of tmp = m * R(-1)
-   const Matrix3<MT> tmp( m.v_[0]*v_[0] + m.v_[1]*v_[1] + m.v_[2]*v_[2],
-                            m.v_[0]*v_[3] + m.v_[1]*v_[4] + m.v_[2]*v_[5],
-                            m.v_[0]*v_[6] + m.v_[1]*v_[7] + m.v_[2]*v_[8],
-                            m.v_[3]*v_[0] + m.v_[4]*v_[1] + m.v_[5]*v_[2],
-                            m.v_[3]*v_[3] + m.v_[4]*v_[4] + m.v_[5]*v_[5],
-                            m.v_[3]*v_[6] + m.v_[4]*v_[7] + m.v_[5]*v_[8],
-                            m.v_[6]*v_[0] + m.v_[7]*v_[1] + m.v_[8]*v_[2],
-                            m.v_[6]*v_[3] + m.v_[7]*v_[4] + m.v_[8]*v_[5],
-                            m.v_[6]*v_[6] + m.v_[7]*v_[7] + m.v_[8]*v_[8] );
-
-   // Calculating ROT = R * tmp
-   return Matrix3<MT>( v_[0]*tmp.v_[0] + v_[1]*tmp.v_[3] + v_[2]*tmp.v_[6],
-                         v_[0]*tmp.v_[1] + v_[1]*tmp.v_[4] + v_[2]*tmp.v_[7],
-                         v_[0]*tmp.v_[2] + v_[1]*tmp.v_[5] + v_[2]*tmp.v_[8],
-                         v_[3]*tmp.v_[0] + v_[4]*tmp.v_[3] + v_[5]*tmp.v_[6],
-                         v_[3]*tmp.v_[1] + v_[4]*tmp.v_[4] + v_[5]*tmp.v_[7],
-                         v_[3]*tmp.v_[2] + v_[4]*tmp.v_[5] + v_[5]*tmp.v_[8],
-                         v_[6]*tmp.v_[0] + v_[7]*tmp.v_[3] + v_[8]*tmp.v_[6],
-                         v_[6]*tmp.v_[1] + v_[7]*tmp.v_[4] + v_[8]*tmp.v_[7],
-                         v_[6]*tmp.v_[2] + v_[7]*tmp.v_[5] + v_[8]*tmp.v_[8] );
-
-   //--Multiplication in one step (number of FLOP = 180, no additional temporary matrix)-----------
-   /*
-   return Matrix3<MT>( m.v_[0]*v_[0]*v_[0] + m.v_[4]*v_[1]*v_[1] + m.v_[8]*v_[2]*v_[2] + v_[0]*v_[1]*( m.v_[1]+m.v_[3] ) + v_[0]*v_[2]*( m.v_[2]+m.v_[6] ) + v_[1]*v_[2]*( m.v_[5]+m.v_[7] ),
-                         v_[0]*( m.v_[0]*v_[3] + m.v_[1]*v_[4] + m.v_[2]*v_[5] ) + v_[1]*( m.v_[3]*v_[3] + m.v_[4]*v_[4] + m.v_[5]*v_[5] ) + v_[2]*( m.v_[6]*v_[3] + m.v_[7]*v_[4] + m.v_[8]*v_[5] ),
-                         v_[0]*( m.v_[0]*v_[6] + m.v_[1]*v_[7] + m.v_[2]*v_[8] ) + v_[1]*( m.v_[3]*v_[6] + m.v_[4]*v_[7] + m.v_[5]*v_[8] ) + v_[2]*( m.v_[6]*v_[6] + m.v_[7]*v_[7] + m.v_[8]*v_[8] ),
-                         v_[3]*( m.v_[0]*v_[0] + m.v_[1]*v_[1] + m.v_[2]*v_[2] ) + v_[4]*( m.v_[3]*v_[0] + m.v_[4]*v_[1] + m.v_[5]*v_[2] ) + v_[5]*( m.v_[6]*v_[0] + m.v_[7]*v_[1] + m.v_[8]*v_[2] ),
-                         m.v_[0]*v_[3]*v_[3] + m.v_[4]*v_[4]*v_[4] + m.v_[8]*v_[5]*v_[5] + v_[3]*v_[4]*( m.v_[1]+m.v_[3] ) + v_[3]*v_[5]*( m.v_[2]+m.v_[6] ) + v_[4]*v_[5]*( m.v_[5]+m.v_[7] ),
-                         v_[3]*( m.v_[0]*v_[6] + m.v_[1]*v_[7] + m.v_[2]*v_[8] ) + v_[4]*( m.v_[3]*v_[6] + m.v_[4]*v_[7] + m.v_[5]*v_[8] ) + v_[5]*( m.v_[6]*v_[6] + m.v_[7]*v_[7] + m.v_[8]*v_[8] ),
-                         v_[6]*( m.v_[0]*v_[0] + m.v_[1]*v_[1] + m.v_[2]*v_[2] ) + v_[7]*( m.v_[3]*v_[0] + m.v_[4]*v_[1] + m.v_[5]*v_[2] ) + v_[8]*( m.v_[6]*v_[0] + m.v_[7]*v_[1] + m.v_[8]*v_[2] ),
-                         v_[6]*( m.v_[0]*v_[3] + m.v_[1]*v_[4] + m.v_[2]*v_[5] ) + v_[7]*( m.v_[3]*v_[3] + m.v_[4]*v_[4] + m.v_[5]*v_[5] ) + v_[8]*( m.v_[6]*v_[3] + m.v_[7]*v_[4] + m.v_[8]*v_[5] ),
-                         m.v_[0]*v_[6]*v_[6] + m.v_[4]*v_[7]*v_[7] + m.v_[8]*v_[8]*v_[8] + v_[6]*v_[7]*( m.v_[1]+m.v_[3] ) + v_[6]*v_[8]*( m.v_[2]+m.v_[6] ) + v_[7]*v_[8]*( m.v_[5]+m.v_[7] ) );
-   */
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Rotation of a diagonal matrix M (\f$ ROT=R*M*R^{-1} \f$).
- *
- * \param m The diagonal matrix to be rotated.
- * \return The rotated matrix.
- *
- * The DiagRotate function is a special case of the rotate function. The matrix is assumed to
- * be a diagonal matrix, which reduces the number of floating point operations of the rotation.
- * The function is selected for matrices of different data type (in case \a Type and \a Other
- * are supported by the MathTrait class). The function returns a matrix of the higher-order
- * data type of the two involved data types.
- *
- * \b Note: This function is only defined for matrices of floating point type. The attempt to
- * use this function with matrices of integral data type will result in a compile time error.
- */
-template< typename Type >   // Data type of the rotation matrix
-template< typename Other >  // Data type of the diagonal standard matrix
-inline const Matrix3< typename MathTrait<Type,Other>::MultType >
-   RotationMatrix<Type>::diagRotate( const Matrix3<Other>& m ) const
-{
-   static_assert(std::is_floating_point<Other>::value, "Other has to be floating point!");
-
-   typedef typename MathTrait<Type,Other>::MultType  MT;
-
-   // Precalculating tmp = m * R(-1)
-   const Matrix3<MT> tmp( m.v_[0]*v_[0], m.v_[0]*v_[3], m.v_[0]*v_[6],
-                            m.v_[4]*v_[1], m.v_[4]*v_[4], m.v_[4]*v_[7],
-                            m.v_[8]*v_[2], m.v_[8]*v_[5], m.v_[8]*v_[8] );
-
-   // Calculating ROT = R * tmp
-   return Matrix3<MT>( v_[0]*tmp.v_[0] + v_[1]*tmp.v_[3] + v_[2]*tmp.v_[6],
-                         v_[0]*tmp.v_[1] + v_[1]*tmp.v_[4] + v_[2]*tmp.v_[7],
-                         v_[0]*tmp.v_[2] + v_[1]*tmp.v_[5] + v_[2]*tmp.v_[8],
-                         v_[3]*tmp.v_[0] + v_[4]*tmp.v_[3] + v_[5]*tmp.v_[6],
-                         v_[3]*tmp.v_[1] + v_[4]*tmp.v_[4] + v_[5]*tmp.v_[7],
-                         v_[3]*tmp.v_[2] + v_[4]*tmp.v_[5] + v_[5]*tmp.v_[8],
-                         v_[6]*tmp.v_[0] + v_[7]*tmp.v_[3] + v_[8]*tmp.v_[6],
-                         v_[6]*tmp.v_[1] + v_[7]*tmp.v_[4] + v_[8]*tmp.v_[7],
-                         v_[6]*tmp.v_[2] + v_[7]*tmp.v_[5] + v_[8]*tmp.v_[8] );
-}
-//*************************************************************************************************
-
-
-
-
-//=================================================================================================
-//
-//  EULER ROTATIONS
-//
-//=================================================================================================
-
-//*************************************************************************************************
-/*!\brief Calculation of the Euler angles (in radian measure).
- *
- * \return The Euler angles for a rotation order of x, y, z (radian measure).
- *
- * The Euler angles are calculated for a rotation order of x-, y- and z-axis.
- */
-template< typename Type >  // Data type of the rotation matrix
-inline const Vector3<Type> RotationMatrix<Type>::getEulerAnglesXYZ() const
-{
-   const Type cy( std::sqrt( v_[0]*v_[0] + v_[3]*v_[3] ) );
-
-   if( cy > Limits<real_t>::accuracy() ) {
-      return Vector3<Type>( std::atan2( v_[7], v_[8] ), std::atan2( -v_[6], cy ), std::atan2( v_[3], v_[0] ) );
-   }
-   else {
-      return Vector3<Type>( std::atan2( -v_[5], v_[4] ), std::atan2( -v_[6], cy ), Type(0) );
-   }
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Calculation of the Euler angles for a specific rotation order.
- *
- * \param order The specific rotation order.
- * \return The specific Euler angles (radian measure).
- */
-template< typename Type >  // Data type of the rotation matrix
-const Vector3<Type> RotationMatrix<Type>::getEulerAngles( EulerRotation order ) const
-{
-   static const unsigned int eulSafe[4] = { 0, 1, 2, 0 };
-   static const unsigned int eulNext[4] = { 1, 2, 0, 1 };
-
-   Vector3<Type> ea;
-
-   // Unpacking the euler order
-   const unsigned int frame( order&1 );
-   const unsigned int repetition( (order&2)>>1 );
-   const unsigned int parity( (order&4)>>2 );
-   const unsigned int i( eulSafe[(order&24)>>3] );
-   const unsigned int j( eulNext[i+parity] );
-   const unsigned int k( eulNext[i+1-parity] );
-
-   // Treatment of rotations with repetition
-   if( repetition ) {
-      const Type sy( std::sqrt( v_[i*3+j]*v_[i*3+j] + v_[i*3+k]*v_[i*3+k] ) );
-      if( sy > Limits<real_t>::accuracy() ) {
-         ea[0] = std::atan2( v_[i*3+j], v_[i*3+k] );
-         ea[1] = std::atan2( sy, v_[i*3+i] );
-         ea[2] = std::atan2( v_[j*3+i], -v_[k*3+i] );
-      }
-      else {
-         ea[0] = std::atan2( -v_[j*3+k], v_[j*3+j] );
-         ea[1] = std::atan2( sy, v_[i*3+i] );
-         ea[2] = Type(0);
-      }
-   }
-
-   // Treatment of rotations without repetition
-   else {
-      const Type cy( std::sqrt( v_[i*3+i]*v_[i*3+i] + v_[j*3+i]*v_[j*3+i] ) );
-      if( cy > Limits<real_t>::accuracy() ) {
-         ea[0] = std::atan2( v_[k*3+j], v_[k*3+k] );
-         ea[1] = std::atan2( -v_[k*3+i], cy );
-         ea[2] = std::atan2( v_[j*3+i], v_[i*3+i] );
-      }
-      else {
-         ea[0] = std::atan2( -v_[j*3+k], v_[j*3+j] );
-         ea[1] = std::atan2( -v_[k*3+i], cy );
-         ea[2] = Type(0);
-      }
-   }
-
-   // Treatment of an odd partity
-   if( parity ) {
-      ea[0] = -ea[0];
-      ea[1] = -ea[1];
-      ea[2] = -ea[2];
-   }
-
-   // Treatment of a rotating frame
-   if( frame ) {
-      Type tmp = ea[0];
-      ea[0] = ea[2];
-      ea[2] = tmp;
-   }
-
-   return ea;
-}
-//*************************************************************************************************
-
-
-
-
-//=================================================================================================
-//
-//  GLOBAL OPERATORS
-//
-//=================================================================================================
-
-//*************************************************************************************************
-/*!\name RotationMatrix operators */
-//@{
-template< typename T1, typename T2 >
-inline bool operator==( const RotationMatrix<T1>& lhs, const RotationMatrix<T2>& rhs );
-
-template< typename T1, typename T2 >
-inline bool operator!=( const RotationMatrix<T1>& lhs, const RotationMatrix<T2>& rhs );
-
-template< typename Type >
-std::ostream& operator<<( std::ostream& os, const RotationMatrix<Type>& m );
-
-template< typename Type >
-inline bool isnan( const RotationMatrix<Type>& m );
-
-template< typename Type >
-inline const Matrix3<Type> abs( const RotationMatrix<Type>& m );
-
-template< typename Type >
-inline const Matrix3<Type> fabs( const RotationMatrix<Type>& m );
-
-template< typename Type >
-inline void reset( RotationMatrix<Type>& m );
-
-template< typename Type >
-inline void clear( RotationMatrix<Type>& m );
-
-template< typename Type >
-inline bool isDefault( const RotationMatrix<Type>& m );
-
-template< typename Type >
-inline const RotationMatrix<Type> trans( const RotationMatrix<Type>& m );
-
-template< typename Type >
-inline const RotationMatrix<Type> inv( const RotationMatrix<Type>& m );
-
-template< typename Type >
-inline const RotationMatrix<Type> sq( const RotationMatrix<Type>& m );
-
-template< typename Type >
-inline void swap( RotationMatrix<Type>& a, RotationMatrix<Type>& b ) /* throw() */;
-//@}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Equality operator for the comparison of two rotation matrices.
- * \ingroup dense_rotation_matrix
- *
- * \param lhs The left-hand side rotation matrix for the comparison.
- * \param rhs The right-hand side rotation matrix for the comparison.
- * \return \a true if the two rotation matrices are equal, \a false if not.
- */
-template< typename T1    // Data type of the left-hand side rotation matrix
-        , typename T2 >  // Data type of the right-hand side rotation matrix
-inline bool operator==( const RotationMatrix<T1>& lhs, const RotationMatrix<T2>& rhs )
-{
-   // In order to compare the two matrices, the data values of the lower-order data
-   // type are converted to the higher-order data type within the equal function.
-   if( !equal( lhs[0], rhs[0] ) ||
-       !equal( lhs[1], rhs[1] ) ||
-       !equal( lhs[2], rhs[2] ) ||
-       !equal( lhs[3], rhs[3] ) ||
-       !equal( lhs[4], rhs[4] ) ||
-       !equal( lhs[5], rhs[5] ) ||
-       !equal( lhs[6], rhs[6] ) ||
-       !equal( lhs[7], rhs[7] ) ||
-       !equal( lhs[8], rhs[8] ) )
-      return false;
-   else return true;
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Inequality operator for the comparison of two rotation matrices.
- * \ingroup dense_rotation_matrix
- *
- * \param lhs The left-hand side rotation matrix for the comparison.
- * \param rhs The right-hand side rotation matrix for the comparison.
- * \return \a true if the two rotation matrices are not equal, \a false if they are equal.
- */
-template< typename T1    // Data type of the left-hand side rotation matrix
-        , typename T2 >  // Data type of the right-hand side rotation matrix
-inline bool operator!=( const RotationMatrix<T1>& lhs, const RotationMatrix<T2>& rhs )
-{
-   return !( lhs == rhs );
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Global output operator for 3x3 rotation matrices.
- * \ingroup dense_rotation_matrix
- *
- * \param os Reference to the output stream.
- * \param m Reference to a constant rotation matrix object.
- * \return Reference to the output stream.
- */
-template< typename Type >  // Data type of the rotation matrix
-std::ostream& operator<<( std::ostream& os, const RotationMatrix<Type>& m )
-{
-   return os << " ( " << m[0] << " , " << m[1] << " , " << m[2] << " )\n"
-             << " ( " << m[3] << " , " << m[4] << " , " << m[5] << " )\n"
-             << " ( " << m[6] << " , " << m[7] << " , " << m[8] << " )\n";
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Checks the given rotation matrix for not-a-number elements.
- * \ingroup dense_rotation_matrix
- *
- * \param m The rotation matrix to be checked for not-a-number elements.
- * \return \a true if at least one element of the matrix is not-a-number, \a false otherwise.
- */
-template< typename Type >  // Data type of the rotation matrix
-inline bool isnan( const RotationMatrix<Type>& m )
-{
-   if( isnan( m[0] ) || isnan( m[1] ) || isnan( m[2] ) ||
-       isnan( m[3] ) || isnan( m[4] ) || isnan( m[5] ) ||
-       isnan( m[6] ) || isnan( m[7] ) || isnan( m[8] ) )
-      return true;
-   else return false;
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Returns a matrix containing the absolute values of each single element of \a m.
- * \ingroup dense_rotation_matrix
- *
- * \param m The input rotation matrix.
- * \return The absolute value of each single element of \a m.
- *
- * The \a abs function calculates the absolute value of each element of the input rotation
- * matrix \a m.
- */
-template< typename Type >  // Data type of the rotation matrix
-inline const Matrix3<Type> abs( const RotationMatrix<Type>& m )
-{
-   using std::abs;
-   return Matrix3<Type>( abs(m[0]), abs(m[1]), abs(m[2]),
-                           abs(m[3]), abs(m[4]), abs(m[5]),
-                           abs(m[6]), abs(m[7]), abs(m[8]) );
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Returns a matrix containing the absolute values of each single element of \a m.
- * \ingroup dense_rotation_matrix
- *
- * \param m The input rotation matrix.
- * \return The absolute value of each single element of \a m.
- *
- * The \a fabs function calculates the absolute value of each element of the input rotation
- * matrix \a m.
- */
-template< typename Type >  // Data type of the rotation matrix
-inline const Matrix3<Type> fabs( const RotationMatrix<Type>& m )
-{
-   using std::fabs;
-   return Matrix3<Type>( fabs(m[0]), fabs(m[1]), fabs(m[2]),
-                           fabs(m[3]), fabs(m[4]), fabs(m[5]),
-                           fabs(m[6]), fabs(m[7]), fabs(m[8]) );
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Resetting the given rotation matrix.
- * \ingroup dense_rotation_matrix
- *
- * \param m The rotation matrix to be resetted.
- * \return void
- */
-template< typename Type >  // Data type of the rotation matrix
-inline void reset( RotationMatrix<Type>& m )
-{
-   m.reset();
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Clearing the given rotation matrix.
- * \ingroup dense_rotation_matrix
- *
- * \param m The rotation matrix to be cleared.
- * \return void
- *
- * Clearing a rotation matrix is equivalent to resetting it via the reset() function.
- */
-template< typename Type >  // Data type of the rotation matrix
-inline void clear( RotationMatrix<Type>& m )
-{
-   m.reset();
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Returns whether the given rotation matrix is in default state.
- * \ingroup dense_rotation_matrix
- *
- * \param m The rotation matrix to be tested for its default state.
- * \return \a true in case the given matrix is component-wise zero, \a false otherwise.
- */
-template< typename Type >  // Data type of the rotation matrix
-inline bool isDefault( const RotationMatrix<Type>& m )
-{
-   return ( m[0] == Type(1) ) && ( m[1] == Type(0) ) && ( m[2] == Type(0) ) &&
-          ( m[3] == Type(0) ) && ( m[4] == Type(1) ) && ( m[5] == Type(0) ) &&
-          ( m[6] == Type(0) ) && ( m[7] == Type(0) ) && ( m[8] == Type(1) );
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Calculation of the transpose of the rotation matrix.
- * \ingroup dense_rotation_matrix
- *
- * \param m The rotation matrix to be transposed.
- * \return The transpose of the rotation matrix.
- *
- * This function returns the transpose of the given rotation matrix:
-
-   \code
-   pe::Rot3 R1, R2;
-   // ... Resizing and initialization
-   R1 = trans( R2 );
-   \endcode
-
- * Note that this function has the same effect as the inv() function (\f$ R^T = R^-1 \f$).
- */
-template< typename Type >  // Data type of the rotation matrix
-inline const RotationMatrix<Type> trans( const RotationMatrix<Type>& m )
-{
-   return RotationMatrix<Type>( m[0], m[3], m[6],
-                                m[1], m[4], m[7],
-                                m[2], m[5], m[8] );
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Inverting the given rotation matrix.
- * \ingroup dense_rotation_matrix
- *
- * \param m The rotation matrix to be inverted.
- * \return The inverse rotation matrix.
- *
- * This function returns the inverse of the given rotation matrix:
-
-   \code
-   pe::Rot3 R1, R2;
-   // ... Resizing and initialization
-   R1 = inv( R2 );
-   \endcode
-
- * Note that this function has the same effect as the trans() function (\f$ R^-1 = R^T \f$).
- */
-template< typename Type >  // Data type of the rotation matrix
-inline const RotationMatrix<Type> inv( const RotationMatrix<Type>& m )
-{
-   return m.getInverse();
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Squaring the given rotation matrix.
- * \ingroup dense_rotation_matrix
- *
- * \param m The rotation matrix to be squared.
- * \return The result of the square operation.
- *
- * This function squares the given rotation matrix \a m. This function has the same effect as
- * multiplying the rotation matrix with itself (\f$ m * m \f$).
- */
-template< typename Type >  // Data type of the rotation matrix
-inline const RotationMatrix<Type> sq( const RotationMatrix<Type>& m )
-{
-   return m * m;
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Swapping the contents of two rotation matrices.
- * \ingroup dense_rotation_matrix
- *
- * \param a The first rotation matrix to be swapped.
- * \param b The second rotation matrix to be swapped.
- * \return void
- * \exception no-throw guarantee.
- */
-template< typename Type >  // Data type of the rotation matrices
-inline void swap( RotationMatrix<Type>& a, RotationMatrix<Type>& b ) /* throw() */
-{
-   a.swap( b );
-}
-//*************************************************************************************************
-
-
-
-
-//=================================================================================================
-//
-//  GLOBAL ARITHMETIC OPERATORS
-//
-//=================================================================================================
-
-//*************************************************************************************************
-/*!\name RotationMatrix arithmetic operators
- *
- * These operators support operations between matrices of different element types. They work
- * for all element types supported by the MathTrait class template.
- */
-//@{
-template< typename T1, typename T2 >
-inline const Vector3< typename MathTrait<T1,T2>::MultType >
-   operator*( const RotationMatrix<T1>& lhs, const Vector3<T2>& rhs );
-
-//template< typename T1, typename T2 >
-//inline const Vector3< typename MathTrait<T1,T2>::MultType, true >
-//   operator*( const Vector3<T1,true>& lhs, const RotationMatrix<T2>& rhs );
-
-template< typename T1, typename T2 >
-inline const Matrix3< typename MathTrait<T1,T2>::MultType >
-   operator*( const RotationMatrix<T1>& lhs, const Matrix3<T2>& rhs );
-
-template< typename T1, typename T2 >
-inline const Matrix3< typename MathTrait<T1,T2>::MultType >
-   operator*( const Matrix3<T1>& lhs, const RotationMatrix<T2>& rhs );
-
-template< typename T1, typename T2 >
-inline const RotationMatrix< typename MathTrait<T1,T2>::MultType >
-   operator*( const RotationMatrix<T1>& lhs, const RotationMatrix<T2>& rhs );
-//@}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Multiplication operator for the multiplication of a rotation matrix and a vector
- *        (\f$ \vec{a}=B*\vec{c} \f$).
- * \ingroup dense_rotation_matrix
- *
- * \param lhs The left-hand side rotation matrix for the multiplication.
- * \param rhs The right-hand side vector for the multiplication.
- * \return The resulting vector.
- *
- * This operator is selected for multiplications between rotation matrices and vectors of two
- * different data types \a T1 and \a T2, which are supported by the MathTrait class. The operator
- * returns a vector of the higher-order data type of the two involved data types.
- */
-template< typename T1    // Data type of the left-hand side rotation matrix
-        , typename T2 >  // Data type of the right-hand side vector
-inline const Vector3< typename MathTrait<T1,T2>::MultType >
-   operator*( const RotationMatrix<T1>& lhs, const Vector3<T2>& rhs )
-{
-   typedef typename MathTrait<T1,T2>::MultType  MT;
-   return Vector3<MT>( lhs[0]*rhs[0] + lhs[1]*rhs[1] + lhs[2]*rhs[2],
-                             lhs[3]*rhs[0] + lhs[4]*rhs[1] + lhs[5]*rhs[2],
-                             lhs[6]*rhs[0] + lhs[7]*rhs[1] + lhs[8]*rhs[2] );
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Multiplication operator for the multiplication of a vector and a rotation matrix
- *        (\f$ \vec{a}=\vec{b}^T*B \f$).
- * \ingroup dense_rotation_matrix
- *
- * \param lhs The left-hand side transpose vector for the multiplication.
- * \param rhs The right-hand side rotation matrix for the multiplication.
- * \return The resulting vector.
- *
- * This operator is selected for multiplications between rotation matrices and vectors of two
- * different data types \a T1 and \a T2, which are supported by the MathTrait class. The operator
- * returns a vector of the higher-order data type of the two involved data types.
- */
-//template< typename T1    // Data type of the left-hand side vector
-//        , typename T2 >  // Data type of the right-hand side rotation matrix
-//inline const Vector3< typename MathTrait<T1,T2>::MultType, true >
-//   operator*( const Vector3<T1,true>& lhs, const RotationMatrix<T2>& rhs )
-//{
-//   typedef typename MathTrait<T1,T2>::MultType  MT;
-//   return Vector3<MT,true>( lhs[0]*rhs[0] + lhs[1]*rhs[3] + lhs[2]*rhs[6],
-//                            lhs[0]*rhs[1] + lhs[1]*rhs[4] + lhs[2]*rhs[7],
-//                            lhs[0]*rhs[2] + lhs[1]*rhs[5] + lhs[2]*rhs[8] );
-//}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Multiplication operator for the multiplication of a rotation matrix and a standard
- *        matrix (\f$ A=R*B \f$).
- * \ingroup dense_rotation_matrix
- *
- * \param lhs The left-hand side rotation matrix for the multiplication.
- * \param rhs The right-hand side standard matrix for the multiplication.
- * \return The resulting matrix.
- *
- * This operator is selected for multiplications between matrices of two different data types
- * \a T1 and \a T2, which are supported by the MathTrait class. The operator returns a matrix
- * of the higher-order data type of the two involved matrix data types.
- */
-template< typename T1    // Data type of the left-hand side rotation matrix
-        , typename T2 >  // Data type of the right-hand side standard matrix
-inline const Matrix3< typename MathTrait<T1,T2>::MultType >
-   operator*( const RotationMatrix<T1>& lhs, const Matrix3<T2>& rhs )
-{
-   typedef typename MathTrait<T1,T2>::MultType  MT;
-   return Matrix3<MT>( lhs[0]*rhs[0] + lhs[1]*rhs[3] + lhs[2]*rhs[6],
-                         lhs[0]*rhs[1] + lhs[1]*rhs[4] + lhs[2]*rhs[7],
-                         lhs[0]*rhs[2] + lhs[1]*rhs[5] + lhs[2]*rhs[8],
-                         lhs[3]*rhs[0] + lhs[4]*rhs[3] + lhs[5]*rhs[6],
-                         lhs[3]*rhs[1] + lhs[4]*rhs[4] + lhs[5]*rhs[7],
-                         lhs[3]*rhs[2] + lhs[4]*rhs[5] + lhs[5]*rhs[8],
-                         lhs[6]*rhs[0] + lhs[7]*rhs[3] + lhs[8]*rhs[6],
-                         lhs[6]*rhs[1] + lhs[7]*rhs[4] + lhs[8]*rhs[7],
-                         lhs[6]*rhs[2] + lhs[7]*rhs[5] + lhs[8]*rhs[8] );
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Multiplication operator for the multiplication of a standard matrix and a rotation
- *        matrix (\f$ A=B*R \f$).
- * \ingroup dense_rotation_matrix
- *
- * \param lhs The left-hand side standard matrix for the multiplication.
- * \param rhs The right-hand side rotation matrix for the multiplication.
- * \return The resulting matrix.
- *
- * This operator is selected for multiplications between matrices of two different data types
- * \a T1 and \a T2, which are supported by the MathTrait class. The operator returns a matrix
- * of the higher-order data type of the two involved matrix data types.
- */
-template< typename T1    // Data type of the left-hand side standard matrix
-        , typename T2 >  // Data type of the right-hand side rotation matrix
-inline const Matrix3< typename MathTrait<T1,T2>::MultType >
-   operator*( const Matrix3<T1>& lhs, const RotationMatrix<T2>& rhs )
-{
-   typedef typename MathTrait<T1,T2>::MultType  MT;
-   return Matrix3<MT>( lhs[0]*rhs[0] + lhs[1]*rhs[3] + lhs[2]*rhs[6],
-                         lhs[0]*rhs[1] + lhs[1]*rhs[4] + lhs[2]*rhs[7],
-                         lhs[0]*rhs[2] + lhs[1]*rhs[5] + lhs[2]*rhs[8],
-                         lhs[3]*rhs[0] + lhs[4]*rhs[3] + lhs[5]*rhs[6],
-                         lhs[3]*rhs[1] + lhs[4]*rhs[4] + lhs[5]*rhs[7],
-                         lhs[3]*rhs[2] + lhs[4]*rhs[5] + lhs[5]*rhs[8],
-                         lhs[6]*rhs[0] + lhs[7]*rhs[3] + lhs[8]*rhs[6],
-                         lhs[6]*rhs[1] + lhs[7]*rhs[4] + lhs[8]*rhs[7],
-                         lhs[6]*rhs[2] + lhs[7]*rhs[5] + lhs[8]*rhs[8] );
-}
-//*************************************************************************************************
-
-
-//*************************************************************************************************
-/*!\brief Multiplication operator for the multiplication of two rotation matrices (\f$ A=B*C \f$).
- * \ingroup dense_rotation_matrix
- *
- * \param lhs The left-hand side rotation matrix for the multiplication.
- * \param rhs The right-hand side rotation matrix for the multiplication.
- * \return The resulting rotation matrix.
- *
- * This operator is selected for multiplications between rotation matrices of two different
- * data types \a T1 and \a T2, which are supported by the MathTrait class. The operator
- * returns a matrix of the higher-order data type of the two involved matrix data types.
- */
-template< typename T1    // Data type of the left-hand side rotation matrix
-        , typename T2 >  // Data type of the right-hand side rotation matrix
-inline const RotationMatrix< typename MathTrait<T1,T2>::MultType >
-   operator*( const RotationMatrix<T1>& lhs, const RotationMatrix<T2>& rhs )
-{
-   typedef typename MathTrait<T1,T2>::MultType  MT;
-   return RotationMatrix<MT>( lhs.v_[0]*rhs.v_[0] + lhs.v_[1]*rhs.v_[3] + lhs.v_[2]*rhs.v_[6],
-                              lhs.v_[0]*rhs.v_[1] + lhs.v_[1]*rhs.v_[4] + lhs.v_[2]*rhs.v_[7],
-                              lhs.v_[0]*rhs.v_[2] + lhs.v_[1]*rhs.v_[5] + lhs.v_[2]*rhs.v_[8],
-                              lhs.v_[3]*rhs.v_[0] + lhs.v_[4]*rhs.v_[3] + lhs.v_[5]*rhs.v_[6],
-                              lhs.v_[3]*rhs.v_[1] + lhs.v_[4]*rhs.v_[4] + lhs.v_[5]*rhs.v_[7],
-                              lhs.v_[3]*rhs.v_[2] + lhs.v_[4]*rhs.v_[5] + lhs.v_[5]*rhs.v_[8],
-                              lhs.v_[6]*rhs.v_[0] + lhs.v_[7]*rhs.v_[3] + lhs.v_[8]*rhs.v_[6],
-                              lhs.v_[6]*rhs.v_[1] + lhs.v_[7]*rhs.v_[4] + lhs.v_[8]*rhs.v_[7],
-                              lhs.v_[6]*rhs.v_[2] + lhs.v_[7]*rhs.v_[5] + lhs.v_[8]*rhs.v_[8] );
-}
-//*************************************************************************************************
-
-
-
-
-//=================================================================================================
-//
-//  TYPE DEFINITIONS
-//
-//=================================================================================================
-
-//*************************************************************************************************
-/*!\brief Rotation matrix of real type.
- * \ingroup dense_rotation_matrix
- */
-typedef RotationMatrix<real_t>  Rot3;
-//*************************************************************************************************
-
-} // namespace math
-}
diff --git a/src/mesa_pd/data/LinkedCells.h b/src/mesa_pd/data/LinkedCells.h
index cdcec817229e0d1b88fc3470773e8ae424010d19..ead0d25d0f45971a342c8ffc60b71f891cb041bf 100644
--- a/src/mesa_pd/data/LinkedCells.h
+++ b/src/mesa_pd/data/LinkedCells.h
@@ -93,7 +93,10 @@ struct LinkedCells
 };
 
 inline
-math::AABB getCellAABB(const LinkedCells& ll, const int hash0, const int hash1, const int hash2)
+math::AABB getCellAABB(const LinkedCells& ll,
+                       const int64_t hash0,
+                       const int64_t hash1,
+                       const int64_t hash2)
 {
    WALBERLA_ASSERT_GREATER_EQUAL(hash0, 0);
    WALBERLA_ASSERT_LESS(hash0, ll.numCellsPerDim_[0]);
@@ -112,7 +115,10 @@ math::AABB getCellAABB(const LinkedCells& ll, const int hash0, const int hash1,
 }
 
 inline
-int getCellIdx(const LinkedCells& ll, const int hash0, const int hash1, const int hash2)
+uint_t getCellIdx(const LinkedCells& ll,
+                  const int64_t hash0,
+                  const int64_t hash1,
+                  const int64_t hash2)
 {
    WALBERLA_ASSERT_GREATER_EQUAL(hash0, 0);
    WALBERLA_ASSERT_LESS(hash0, ll.numCellsPerDim_[0]);
@@ -120,7 +126,26 @@ int getCellIdx(const LinkedCells& ll, const int hash0, const int hash1, const in
    WALBERLA_ASSERT_LESS(hash1, ll.numCellsPerDim_[1]);
    WALBERLA_ASSERT_GREATER_EQUAL(hash2, 0);
    WALBERLA_ASSERT_LESS(hash2, ll.numCellsPerDim_[2]);
-   return hash2 * ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0] + hash1 * ll.numCellsPerDim_[0] + hash0;
+   return uint_c(hash2 * ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0] + hash1 * ll.numCellsPerDim_[0] + hash0);
+}
+
+inline
+void getCellCoordinates(const LinkedCells& ll,
+                        const uint64_t idx,
+                        int64_t& hash0,
+                        int64_t& hash1,
+                        int64_t& hash2)
+{
+   hash2 = int64_c(idx) / (ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0]);
+   hash1 = (int64_c(idx) - (hash2 * ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0])) / (ll.numCellsPerDim_[0]);
+   hash0 = int64_c(idx) - hash2 * ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0] - hash1 * ll.numCellsPerDim_[0];
+
+   WALBERLA_ASSERT_GREATER_EQUAL(hash0, 0);
+   WALBERLA_ASSERT_LESS(hash0, ll.numCellsPerDim_[0]);
+   WALBERLA_ASSERT_GREATER_EQUAL(hash1, 0);
+   WALBERLA_ASSERT_LESS(hash1, ll.numCellsPerDim_[1]);
+   WALBERLA_ASSERT_GREATER_EQUAL(hash2, 0);
+   WALBERLA_ASSERT_LESS(hash2, ll.numCellsPerDim_[2]);
 }
 
 inline
@@ -153,15 +178,17 @@ LinkedCells::LinkedCells(const math::AABB& domain, const Vec3& cellDiameter)
    WALBERLA_CHECK_LESS_EQUAL(cellDiameter_[2], cellDiameter[2]);
 
    WALBERLA_CHECK_GREATER_EQUAL(numCellsPerDim_[2], 0);
+
+   std::fill(cells_.begin(), cells_.end(), -1);
 }
 
 void LinkedCells::clear()
 {
    const uint64_t cellsSize = cells_.size();
    //clear existing linked cells
-   #ifdef _OPENMP
-   #pragma omp parallel for schedule(static)
-   #endif
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static)
+#endif
    for (int64_t i = 0; i < int64_c(cellsSize); ++i)
       cells_[uint64_c(i)] = -1;
    infiniteParticles_ = -1;
@@ -180,8 +207,8 @@ inline void LinkedCells::forEachParticlePair(const bool openmp, const Selector&
       {
          for (int x = 0; x < numCellsPerDim_[0]; ++x)
          {
-            const int cell_idx = getCellIdx(*this, x, y, z); ///< current cell index
-            int p_idx = cells_[uint_c(cell_idx)]; ///< current particle index
+            const uint_t cell_idx = getCellIdx(*this, x, y, z); ///< current cell index
+            int p_idx = cells_[cell_idx]; ///< current particle index
             int np_idx = -1; ///< particle to be checked against
 
             while (p_idx != -1)
@@ -218,11 +245,11 @@ inline void LinkedCells::forEachParticlePair(const bool openmp, const Selector&
                   if (ny >= numCellsPerDim_[1]) continue;
                   if (nz >= numCellsPerDim_[2]) continue;
 
-                  const int ncell_idx = getCellIdx(*this, nx, ny, nz); ///< neighbor cell index
+                  const uint_t ncell_idx = getCellIdx(*this, nx, ny, nz); ///< neighbor cell index
 
                   WALBERLA_ASSERT_GREATER_EQUAL(p_idx, 0);
                   WALBERLA_ASSERT_LESS(p_idx, acForLC.size());
-                  np_idx = cells_[uint_c(ncell_idx)]; ///< neighbor particle index
+                  np_idx = cells_[ncell_idx]; ///< neighbor particle index
                   while (np_idx != -1)
                   {
                      WALBERLA_ASSERT_GREATER_EQUAL(np_idx, 0);
@@ -275,8 +302,8 @@ inline void LinkedCells::forEachParticlePairHalf(const bool openmp, const Select
       {
          for (int x = 0; x < numCellsPerDim_[0]; ++x)
          {
-            const int cell_idx = getCellIdx(*this, x, y, z); ///< current cell index
-            int p_idx = cells_[uint_c(cell_idx)]; ///< current particle index
+            const uint_t cell_idx = getCellIdx(*this, x, y, z); ///< current cell index
+            int p_idx = cells_[cell_idx]; ///< current particle index
             int np_idx = -1; ///< particle to be checked against
 
             while (p_idx != -1)
@@ -312,11 +339,11 @@ inline void LinkedCells::forEachParticlePairHalf(const bool openmp, const Select
                   if (ny >= numCellsPerDim_[1]) continue;
                   if (nz >= numCellsPerDim_[2]) continue;
 
-                  const int ncell_idx = getCellIdx(*this, nx, ny, nz); ///< neighbor cell index
+                  const uint_t ncell_idx = getCellIdx(*this, nx, ny, nz); ///< neighbor cell index
 
                   WALBERLA_ASSERT_GREATER_EQUAL(p_idx, 0);
                   WALBERLA_ASSERT_LESS(p_idx, acForLC.size());
-                  np_idx = cells_[uint_c(ncell_idx)]; ///< neighbor particle index
+                  np_idx = cells_[ncell_idx]; ///< neighbor particle index
                   while (np_idx != -1)
                   {
                      WALBERLA_ASSERT_GREATER_EQUAL(np_idx, 0);
diff --git a/src/mesa_pd/data/ParticleAccessor.h b/src/mesa_pd/data/ParticleAccessor.h
index 1f85d7348ae73806abf3529c4b65e2c8c203fceb..e1a9b17c721491afee2135f955f205598683f311 100644
--- a/src/mesa_pd/data/ParticleAccessor.h
+++ b/src/mesa_pd/data/ParticleAccessor.h
@@ -48,101 +48,105 @@ class ParticleAccessor : public IAccessor
 public:
    ParticleAccessor(const std::shared_ptr<data::ParticleStorage>& ps) : ps_(ps) {}
    virtual ~ParticleAccessor() = default;
-   const walberla::id_t& getUid(const size_t p_idx) const {return ps_->getUid(p_idx);}
+   walberla::id_t const & getUid(const size_t p_idx) const {return ps_->getUid(p_idx);}
    walberla::id_t& getUidRef(const size_t p_idx) {return ps_->getUidRef(p_idx);}
-   void setUid(const size_t p_idx, const walberla::id_t& v) { ps_->setUid(p_idx, v);}
+   void setUid(const size_t p_idx, walberla::id_t const & v) { ps_->setUid(p_idx, v);}
    
-   const walberla::mesa_pd::Vec3& getPosition(const size_t p_idx) const {return ps_->getPosition(p_idx);}
+   walberla::mesa_pd::Vec3 const & getPosition(const size_t p_idx) const {return ps_->getPosition(p_idx);}
    walberla::mesa_pd::Vec3& getPositionRef(const size_t p_idx) {return ps_->getPositionRef(p_idx);}
-   void setPosition(const size_t p_idx, const walberla::mesa_pd::Vec3& v) { ps_->setPosition(p_idx, v);}
+   void setPosition(const size_t p_idx, walberla::mesa_pd::Vec3 const & v) { ps_->setPosition(p_idx, v);}
    
-   const walberla::real_t& getInteractionRadius(const size_t p_idx) const {return ps_->getInteractionRadius(p_idx);}
+   walberla::real_t const & getInteractionRadius(const size_t p_idx) const {return ps_->getInteractionRadius(p_idx);}
    walberla::real_t& getInteractionRadiusRef(const size_t p_idx) {return ps_->getInteractionRadiusRef(p_idx);}
-   void setInteractionRadius(const size_t p_idx, const walberla::real_t& v) { ps_->setInteractionRadius(p_idx, v);}
+   void setInteractionRadius(const size_t p_idx, walberla::real_t const & v) { ps_->setInteractionRadius(p_idx, v);}
    
-   const walberla::mesa_pd::data::particle_flags::FlagT& getFlags(const size_t p_idx) const {return ps_->getFlags(p_idx);}
+   walberla::mesa_pd::data::particle_flags::FlagT const & getFlags(const size_t p_idx) const {return ps_->getFlags(p_idx);}
    walberla::mesa_pd::data::particle_flags::FlagT& getFlagsRef(const size_t p_idx) {return ps_->getFlagsRef(p_idx);}
-   void setFlags(const size_t p_idx, const walberla::mesa_pd::data::particle_flags::FlagT& v) { ps_->setFlags(p_idx, v);}
+   void setFlags(const size_t p_idx, walberla::mesa_pd::data::particle_flags::FlagT const & v) { ps_->setFlags(p_idx, v);}
    
-   const int& getOwner(const size_t p_idx) const {return ps_->getOwner(p_idx);}
+   int const & getOwner(const size_t p_idx) const {return ps_->getOwner(p_idx);}
    int& getOwnerRef(const size_t p_idx) {return ps_->getOwnerRef(p_idx);}
-   void setOwner(const size_t p_idx, const int& v) { ps_->setOwner(p_idx, v);}
+   void setOwner(const size_t p_idx, int const & v) { ps_->setOwner(p_idx, v);}
    
-   const std::unordered_set<walberla::mpi::MPIRank>& getGhostOwners(const size_t p_idx) const {return ps_->getGhostOwners(p_idx);}
+   std::unordered_set<walberla::mpi::MPIRank> const & getGhostOwners(const size_t p_idx) const {return ps_->getGhostOwners(p_idx);}
    std::unordered_set<walberla::mpi::MPIRank>& getGhostOwnersRef(const size_t p_idx) {return ps_->getGhostOwnersRef(p_idx);}
-   void setGhostOwners(const size_t p_idx, const std::unordered_set<walberla::mpi::MPIRank>& v) { ps_->setGhostOwners(p_idx, v);}
+   void setGhostOwners(const size_t p_idx, std::unordered_set<walberla::mpi::MPIRank> const & v) { ps_->setGhostOwners(p_idx, v);}
    
-   const size_t& getShapeID(const size_t p_idx) const {return ps_->getShapeID(p_idx);}
+   size_t const & getShapeID(const size_t p_idx) const {return ps_->getShapeID(p_idx);}
    size_t& getShapeIDRef(const size_t p_idx) {return ps_->getShapeIDRef(p_idx);}
-   void setShapeID(const size_t p_idx, const size_t& v) { ps_->setShapeID(p_idx, v);}
+   void setShapeID(const size_t p_idx, size_t const & v) { ps_->setShapeID(p_idx, v);}
    
-   const walberla::mesa_pd::Rot3& getRotation(const size_t p_idx) const {return ps_->getRotation(p_idx);}
+   walberla::mesa_pd::Rot3 const & getRotation(const size_t p_idx) const {return ps_->getRotation(p_idx);}
    walberla::mesa_pd::Rot3& getRotationRef(const size_t p_idx) {return ps_->getRotationRef(p_idx);}
-   void setRotation(const size_t p_idx, const walberla::mesa_pd::Rot3& v) { ps_->setRotation(p_idx, v);}
+   void setRotation(const size_t p_idx, walberla::mesa_pd::Rot3 const & v) { ps_->setRotation(p_idx, v);}
    
-   const walberla::mesa_pd::Vec3& getAngularVelocity(const size_t p_idx) const {return ps_->getAngularVelocity(p_idx);}
+   walberla::mesa_pd::Vec3 const & getAngularVelocity(const size_t p_idx) const {return ps_->getAngularVelocity(p_idx);}
    walberla::mesa_pd::Vec3& getAngularVelocityRef(const size_t p_idx) {return ps_->getAngularVelocityRef(p_idx);}
-   void setAngularVelocity(const size_t p_idx, const walberla::mesa_pd::Vec3& v) { ps_->setAngularVelocity(p_idx, v);}
+   void setAngularVelocity(const size_t p_idx, walberla::mesa_pd::Vec3 const & v) { ps_->setAngularVelocity(p_idx, v);}
    
-   const walberla::mesa_pd::Vec3& getTorque(const size_t p_idx) const {return ps_->getTorque(p_idx);}
+   walberla::mesa_pd::Vec3 const & getTorque(const size_t p_idx) const {return ps_->getTorque(p_idx);}
    walberla::mesa_pd::Vec3& getTorqueRef(const size_t p_idx) {return ps_->getTorqueRef(p_idx);}
-   void setTorque(const size_t p_idx, const walberla::mesa_pd::Vec3& v) { ps_->setTorque(p_idx, v);}
+   void setTorque(const size_t p_idx, walberla::mesa_pd::Vec3 const & v) { ps_->setTorque(p_idx, v);}
    
-   const walberla::mesa_pd::Vec3& getLinearVelocity(const size_t p_idx) const {return ps_->getLinearVelocity(p_idx);}
+   walberla::mesa_pd::Vec3 const & getLinearVelocity(const size_t p_idx) const {return ps_->getLinearVelocity(p_idx);}
    walberla::mesa_pd::Vec3& getLinearVelocityRef(const size_t p_idx) {return ps_->getLinearVelocityRef(p_idx);}
-   void setLinearVelocity(const size_t p_idx, const walberla::mesa_pd::Vec3& v) { ps_->setLinearVelocity(p_idx, v);}
+   void setLinearVelocity(const size_t p_idx, walberla::mesa_pd::Vec3 const & v) { ps_->setLinearVelocity(p_idx, v);}
    
-   const walberla::real_t& getInvMass(const size_t p_idx) const {return ps_->getInvMass(p_idx);}
+   walberla::real_t const & getInvMass(const size_t p_idx) const {return ps_->getInvMass(p_idx);}
    walberla::real_t& getInvMassRef(const size_t p_idx) {return ps_->getInvMassRef(p_idx);}
-   void setInvMass(const size_t p_idx, const walberla::real_t& v) { ps_->setInvMass(p_idx, v);}
+   void setInvMass(const size_t p_idx, walberla::real_t const & v) { ps_->setInvMass(p_idx, v);}
    
-   const walberla::mesa_pd::Vec3& getForce(const size_t p_idx) const {return ps_->getForce(p_idx);}
+   walberla::mesa_pd::Vec3 const & getForce(const size_t p_idx) const {return ps_->getForce(p_idx);}
    walberla::mesa_pd::Vec3& getForceRef(const size_t p_idx) {return ps_->getForceRef(p_idx);}
-   void setForce(const size_t p_idx, const walberla::mesa_pd::Vec3& v) { ps_->setForce(p_idx, v);}
+   void setForce(const size_t p_idx, walberla::mesa_pd::Vec3 const & v) { ps_->setForce(p_idx, v);}
    
-   const walberla::mesa_pd::Vec3& getOldForce(const size_t p_idx) const {return ps_->getOldForce(p_idx);}
+   walberla::mesa_pd::Vec3 const & getOldForce(const size_t p_idx) const {return ps_->getOldForce(p_idx);}
    walberla::mesa_pd::Vec3& getOldForceRef(const size_t p_idx) {return ps_->getOldForceRef(p_idx);}
-   void setOldForce(const size_t p_idx, const walberla::mesa_pd::Vec3& v) { ps_->setOldForce(p_idx, v);}
+   void setOldForce(const size_t p_idx, walberla::mesa_pd::Vec3 const & v) { ps_->setOldForce(p_idx, v);}
    
-   const walberla::mesa_pd::Vec3& getOldTorque(const size_t p_idx) const {return ps_->getOldTorque(p_idx);}
+   walberla::mesa_pd::Vec3 const & getOldTorque(const size_t p_idx) const {return ps_->getOldTorque(p_idx);}
    walberla::mesa_pd::Vec3& getOldTorqueRef(const size_t p_idx) {return ps_->getOldTorqueRef(p_idx);}
-   void setOldTorque(const size_t p_idx, const walberla::mesa_pd::Vec3& v) { ps_->setOldTorque(p_idx, v);}
+   void setOldTorque(const size_t p_idx, walberla::mesa_pd::Vec3 const & v) { ps_->setOldTorque(p_idx, v);}
    
-   const uint_t& getType(const size_t p_idx) const {return ps_->getType(p_idx);}
+   blockforest::Block* const & getCurrentBlock(const size_t p_idx) const {return ps_->getCurrentBlock(p_idx);}
+   blockforest::Block*& getCurrentBlockRef(const size_t p_idx) {return ps_->getCurrentBlockRef(p_idx);}
+   void setCurrentBlock(const size_t p_idx, blockforest::Block* const & v) { ps_->setCurrentBlock(p_idx, v);}
+   
+   uint_t const & getType(const size_t p_idx) const {return ps_->getType(p_idx);}
    uint_t& getTypeRef(const size_t p_idx) {return ps_->getTypeRef(p_idx);}
-   void setType(const size_t p_idx, const uint_t& v) { ps_->setType(p_idx, v);}
+   void setType(const size_t p_idx, uint_t const & v) { ps_->setType(p_idx, v);}
    
-   const int& getNextParticle(const size_t p_idx) const {return ps_->getNextParticle(p_idx);}
+   int const & getNextParticle(const size_t p_idx) const {return ps_->getNextParticle(p_idx);}
    int& getNextParticleRef(const size_t p_idx) {return ps_->getNextParticleRef(p_idx);}
-   void setNextParticle(const size_t p_idx, const int& v) { ps_->setNextParticle(p_idx, v);}
+   void setNextParticle(const size_t p_idx, int const & v) { ps_->setNextParticle(p_idx, v);}
    
-   const std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& getOldContactHistory(const size_t p_idx) const {return ps_->getOldContactHistory(p_idx);}
+   std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory> const & getOldContactHistory(const size_t p_idx) const {return ps_->getOldContactHistory(p_idx);}
    std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& getOldContactHistoryRef(const size_t p_idx) {return ps_->getOldContactHistoryRef(p_idx);}
-   void setOldContactHistory(const size_t p_idx, const std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& v) { ps_->setOldContactHistory(p_idx, v);}
+   void setOldContactHistory(const size_t p_idx, std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory> const & v) { ps_->setOldContactHistory(p_idx, v);}
    
-   const std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& getNewContactHistory(const size_t p_idx) const {return ps_->getNewContactHistory(p_idx);}
+   std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory> const & getNewContactHistory(const size_t p_idx) const {return ps_->getNewContactHistory(p_idx);}
    std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& getNewContactHistoryRef(const size_t p_idx) {return ps_->getNewContactHistoryRef(p_idx);}
-   void setNewContactHistory(const size_t p_idx, const std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& v) { ps_->setNewContactHistory(p_idx, v);}
+   void setNewContactHistory(const size_t p_idx, std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory> const & v) { ps_->setNewContactHistory(p_idx, v);}
    
-   const walberla::real_t& getTemperature(const size_t p_idx) const {return ps_->getTemperature(p_idx);}
+   walberla::real_t const & getTemperature(const size_t p_idx) const {return ps_->getTemperature(p_idx);}
    walberla::real_t& getTemperatureRef(const size_t p_idx) {return ps_->getTemperatureRef(p_idx);}
-   void setTemperature(const size_t p_idx, const walberla::real_t& v) { ps_->setTemperature(p_idx, v);}
+   void setTemperature(const size_t p_idx, walberla::real_t const & v) { ps_->setTemperature(p_idx, v);}
    
-   const walberla::real_t& getHeatFlux(const size_t p_idx) const {return ps_->getHeatFlux(p_idx);}
+   walberla::real_t const & getHeatFlux(const size_t p_idx) const {return ps_->getHeatFlux(p_idx);}
    walberla::real_t& getHeatFluxRef(const size_t p_idx) {return ps_->getHeatFluxRef(p_idx);}
-   void setHeatFlux(const size_t p_idx, const walberla::real_t& v) { ps_->setHeatFlux(p_idx, v);}
+   void setHeatFlux(const size_t p_idx, walberla::real_t const & v) { ps_->setHeatFlux(p_idx, v);}
    
-   const walberla::mesa_pd::Vec3& getDv(const size_t p_idx) const {return ps_->getDv(p_idx);}
+   walberla::mesa_pd::Vec3 const & getDv(const size_t p_idx) const {return ps_->getDv(p_idx);}
    walberla::mesa_pd::Vec3& getDvRef(const size_t p_idx) {return ps_->getDvRef(p_idx);}
-   void setDv(const size_t p_idx, const walberla::mesa_pd::Vec3& v) { ps_->setDv(p_idx, v);}
+   void setDv(const size_t p_idx, walberla::mesa_pd::Vec3 const & v) { ps_->setDv(p_idx, v);}
    
-   const walberla::mesa_pd::Vec3& getDw(const size_t p_idx) const {return ps_->getDw(p_idx);}
+   walberla::mesa_pd::Vec3 const & getDw(const size_t p_idx) const {return ps_->getDw(p_idx);}
    walberla::mesa_pd::Vec3& getDwRef(const size_t p_idx) {return ps_->getDwRef(p_idx);}
-   void setDw(const size_t p_idx, const walberla::mesa_pd::Vec3& v) { ps_->setDw(p_idx, v);}
+   void setDw(const size_t p_idx, walberla::mesa_pd::Vec3 const & v) { ps_->setDw(p_idx, v);}
    
-   const std::unordered_set<walberla::mpi::MPIRank>& getNeighborState(const size_t p_idx) const {return ps_->getNeighborState(p_idx);}
+   std::unordered_set<walberla::mpi::MPIRank> const & getNeighborState(const size_t p_idx) const {return ps_->getNeighborState(p_idx);}
    std::unordered_set<walberla::mpi::MPIRank>& getNeighborStateRef(const size_t p_idx) {return ps_->getNeighborStateRef(p_idx);}
-   void setNeighborState(const size_t p_idx, const std::unordered_set<walberla::mpi::MPIRank>& v) { ps_->setNeighborState(p_idx, v);}
+   void setNeighborState(const size_t p_idx, std::unordered_set<walberla::mpi::MPIRank> const & v) { ps_->setNeighborState(p_idx, v);}
    
 
    id_t getInvalidUid() const {return UniqueID<data::Particle>::invalidID();}
@@ -189,100 +193,104 @@ class SingleParticleAccessor : public IAccessor
 {
 public:
    virtual ~SingleParticleAccessor() = default;
-   const walberla::id_t& getUid(const size_t /*p_idx*/) const {return uid_;}
-   void setUid(const size_t /*p_idx*/, const walberla::id_t& v) { uid_ = v;}
+   walberla::id_t const & getUid(const size_t /*p_idx*/) const {return uid_;}
+   void setUid(const size_t /*p_idx*/, walberla::id_t const & v) { uid_ = v;}
    walberla::id_t& getUidRef(const size_t /*p_idx*/) {return uid_;}
    
-   const walberla::mesa_pd::Vec3& getPosition(const size_t /*p_idx*/) const {return position_;}
-   void setPosition(const size_t /*p_idx*/, const walberla::mesa_pd::Vec3& v) { position_ = v;}
+   walberla::mesa_pd::Vec3 const & getPosition(const size_t /*p_idx*/) const {return position_;}
+   void setPosition(const size_t /*p_idx*/, walberla::mesa_pd::Vec3 const & v) { position_ = v;}
    walberla::mesa_pd::Vec3& getPositionRef(const size_t /*p_idx*/) {return position_;}
    
-   const walberla::real_t& getInteractionRadius(const size_t /*p_idx*/) const {return interactionRadius_;}
-   void setInteractionRadius(const size_t /*p_idx*/, const walberla::real_t& v) { interactionRadius_ = v;}
+   walberla::real_t const & getInteractionRadius(const size_t /*p_idx*/) const {return interactionRadius_;}
+   void setInteractionRadius(const size_t /*p_idx*/, walberla::real_t const & v) { interactionRadius_ = v;}
    walberla::real_t& getInteractionRadiusRef(const size_t /*p_idx*/) {return interactionRadius_;}
    
-   const walberla::mesa_pd::data::particle_flags::FlagT& getFlags(const size_t /*p_idx*/) const {return flags_;}
-   void setFlags(const size_t /*p_idx*/, const walberla::mesa_pd::data::particle_flags::FlagT& v) { flags_ = v;}
+   walberla::mesa_pd::data::particle_flags::FlagT const & getFlags(const size_t /*p_idx*/) const {return flags_;}
+   void setFlags(const size_t /*p_idx*/, walberla::mesa_pd::data::particle_flags::FlagT const & v) { flags_ = v;}
    walberla::mesa_pd::data::particle_flags::FlagT& getFlagsRef(const size_t /*p_idx*/) {return flags_;}
    
-   const int& getOwner(const size_t /*p_idx*/) const {return owner_;}
-   void setOwner(const size_t /*p_idx*/, const int& v) { owner_ = v;}
+   int const & getOwner(const size_t /*p_idx*/) const {return owner_;}
+   void setOwner(const size_t /*p_idx*/, int const & v) { owner_ = v;}
    int& getOwnerRef(const size_t /*p_idx*/) {return owner_;}
    
-   const std::unordered_set<walberla::mpi::MPIRank>& getGhostOwners(const size_t /*p_idx*/) const {return ghostOwners_;}
-   void setGhostOwners(const size_t /*p_idx*/, const std::unordered_set<walberla::mpi::MPIRank>& v) { ghostOwners_ = v;}
+   std::unordered_set<walberla::mpi::MPIRank> const & getGhostOwners(const size_t /*p_idx*/) const {return ghostOwners_;}
+   void setGhostOwners(const size_t /*p_idx*/, std::unordered_set<walberla::mpi::MPIRank> const & v) { ghostOwners_ = v;}
    std::unordered_set<walberla::mpi::MPIRank>& getGhostOwnersRef(const size_t /*p_idx*/) {return ghostOwners_;}
    
-   const size_t& getShapeID(const size_t /*p_idx*/) const {return shapeID_;}
-   void setShapeID(const size_t /*p_idx*/, const size_t& v) { shapeID_ = v;}
+   size_t const & getShapeID(const size_t /*p_idx*/) const {return shapeID_;}
+   void setShapeID(const size_t /*p_idx*/, size_t const & v) { shapeID_ = v;}
    size_t& getShapeIDRef(const size_t /*p_idx*/) {return shapeID_;}
    
-   const walberla::mesa_pd::Rot3& getRotation(const size_t /*p_idx*/) const {return rotation_;}
-   void setRotation(const size_t /*p_idx*/, const walberla::mesa_pd::Rot3& v) { rotation_ = v;}
+   walberla::mesa_pd::Rot3 const & getRotation(const size_t /*p_idx*/) const {return rotation_;}
+   void setRotation(const size_t /*p_idx*/, walberla::mesa_pd::Rot3 const & v) { rotation_ = v;}
    walberla::mesa_pd::Rot3& getRotationRef(const size_t /*p_idx*/) {return rotation_;}
    
-   const walberla::mesa_pd::Vec3& getAngularVelocity(const size_t /*p_idx*/) const {return angularVelocity_;}
-   void setAngularVelocity(const size_t /*p_idx*/, const walberla::mesa_pd::Vec3& v) { angularVelocity_ = v;}
+   walberla::mesa_pd::Vec3 const & getAngularVelocity(const size_t /*p_idx*/) const {return angularVelocity_;}
+   void setAngularVelocity(const size_t /*p_idx*/, walberla::mesa_pd::Vec3 const & v) { angularVelocity_ = v;}
    walberla::mesa_pd::Vec3& getAngularVelocityRef(const size_t /*p_idx*/) {return angularVelocity_;}
    
-   const walberla::mesa_pd::Vec3& getTorque(const size_t /*p_idx*/) const {return torque_;}
-   void setTorque(const size_t /*p_idx*/, const walberla::mesa_pd::Vec3& v) { torque_ = v;}
+   walberla::mesa_pd::Vec3 const & getTorque(const size_t /*p_idx*/) const {return torque_;}
+   void setTorque(const size_t /*p_idx*/, walberla::mesa_pd::Vec3 const & v) { torque_ = v;}
    walberla::mesa_pd::Vec3& getTorqueRef(const size_t /*p_idx*/) {return torque_;}
    
-   const walberla::mesa_pd::Vec3& getLinearVelocity(const size_t /*p_idx*/) const {return linearVelocity_;}
-   void setLinearVelocity(const size_t /*p_idx*/, const walberla::mesa_pd::Vec3& v) { linearVelocity_ = v;}
+   walberla::mesa_pd::Vec3 const & getLinearVelocity(const size_t /*p_idx*/) const {return linearVelocity_;}
+   void setLinearVelocity(const size_t /*p_idx*/, walberla::mesa_pd::Vec3 const & v) { linearVelocity_ = v;}
    walberla::mesa_pd::Vec3& getLinearVelocityRef(const size_t /*p_idx*/) {return linearVelocity_;}
    
-   const walberla::real_t& getInvMass(const size_t /*p_idx*/) const {return invMass_;}
-   void setInvMass(const size_t /*p_idx*/, const walberla::real_t& v) { invMass_ = v;}
+   walberla::real_t const & getInvMass(const size_t /*p_idx*/) const {return invMass_;}
+   void setInvMass(const size_t /*p_idx*/, walberla::real_t const & v) { invMass_ = v;}
    walberla::real_t& getInvMassRef(const size_t /*p_idx*/) {return invMass_;}
    
-   const walberla::mesa_pd::Vec3& getForce(const size_t /*p_idx*/) const {return force_;}
-   void setForce(const size_t /*p_idx*/, const walberla::mesa_pd::Vec3& v) { force_ = v;}
+   walberla::mesa_pd::Vec3 const & getForce(const size_t /*p_idx*/) const {return force_;}
+   void setForce(const size_t /*p_idx*/, walberla::mesa_pd::Vec3 const & v) { force_ = v;}
    walberla::mesa_pd::Vec3& getForceRef(const size_t /*p_idx*/) {return force_;}
    
-   const walberla::mesa_pd::Vec3& getOldForce(const size_t /*p_idx*/) const {return oldForce_;}
-   void setOldForce(const size_t /*p_idx*/, const walberla::mesa_pd::Vec3& v) { oldForce_ = v;}
+   walberla::mesa_pd::Vec3 const & getOldForce(const size_t /*p_idx*/) const {return oldForce_;}
+   void setOldForce(const size_t /*p_idx*/, walberla::mesa_pd::Vec3 const & v) { oldForce_ = v;}
    walberla::mesa_pd::Vec3& getOldForceRef(const size_t /*p_idx*/) {return oldForce_;}
    
-   const walberla::mesa_pd::Vec3& getOldTorque(const size_t /*p_idx*/) const {return oldTorque_;}
-   void setOldTorque(const size_t /*p_idx*/, const walberla::mesa_pd::Vec3& v) { oldTorque_ = v;}
+   walberla::mesa_pd::Vec3 const & getOldTorque(const size_t /*p_idx*/) const {return oldTorque_;}
+   void setOldTorque(const size_t /*p_idx*/, walberla::mesa_pd::Vec3 const & v) { oldTorque_ = v;}
    walberla::mesa_pd::Vec3& getOldTorqueRef(const size_t /*p_idx*/) {return oldTorque_;}
    
-   const uint_t& getType(const size_t /*p_idx*/) const {return type_;}
-   void setType(const size_t /*p_idx*/, const uint_t& v) { type_ = v;}
+   blockforest::Block* const & getCurrentBlock(const size_t /*p_idx*/) const {return currentBlock_;}
+   void setCurrentBlock(const size_t /*p_idx*/, blockforest::Block* const & v) { currentBlock_ = v;}
+   blockforest::Block*& getCurrentBlockRef(const size_t /*p_idx*/) {return currentBlock_;}
+   
+   uint_t const & getType(const size_t /*p_idx*/) const {return type_;}
+   void setType(const size_t /*p_idx*/, uint_t const & v) { type_ = v;}
    uint_t& getTypeRef(const size_t /*p_idx*/) {return type_;}
    
-   const int& getNextParticle(const size_t /*p_idx*/) const {return nextParticle_;}
-   void setNextParticle(const size_t /*p_idx*/, const int& v) { nextParticle_ = v;}
+   int const & getNextParticle(const size_t /*p_idx*/) const {return nextParticle_;}
+   void setNextParticle(const size_t /*p_idx*/, int const & v) { nextParticle_ = v;}
    int& getNextParticleRef(const size_t /*p_idx*/) {return nextParticle_;}
    
-   const std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& getOldContactHistory(const size_t /*p_idx*/) const {return oldContactHistory_;}
-   void setOldContactHistory(const size_t /*p_idx*/, const std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& v) { oldContactHistory_ = v;}
+   std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory> const & getOldContactHistory(const size_t /*p_idx*/) const {return oldContactHistory_;}
+   void setOldContactHistory(const size_t /*p_idx*/, std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory> const & v) { oldContactHistory_ = v;}
    std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& getOldContactHistoryRef(const size_t /*p_idx*/) {return oldContactHistory_;}
    
-   const std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& getNewContactHistory(const size_t /*p_idx*/) const {return newContactHistory_;}
-   void setNewContactHistory(const size_t /*p_idx*/, const std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& v) { newContactHistory_ = v;}
+   std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory> const & getNewContactHistory(const size_t /*p_idx*/) const {return newContactHistory_;}
+   void setNewContactHistory(const size_t /*p_idx*/, std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory> const & v) { newContactHistory_ = v;}
    std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& getNewContactHistoryRef(const size_t /*p_idx*/) {return newContactHistory_;}
    
-   const walberla::real_t& getTemperature(const size_t /*p_idx*/) const {return temperature_;}
-   void setTemperature(const size_t /*p_idx*/, const walberla::real_t& v) { temperature_ = v;}
+   walberla::real_t const & getTemperature(const size_t /*p_idx*/) const {return temperature_;}
+   void setTemperature(const size_t /*p_idx*/, walberla::real_t const & v) { temperature_ = v;}
    walberla::real_t& getTemperatureRef(const size_t /*p_idx*/) {return temperature_;}
    
-   const walberla::real_t& getHeatFlux(const size_t /*p_idx*/) const {return heatFlux_;}
-   void setHeatFlux(const size_t /*p_idx*/, const walberla::real_t& v) { heatFlux_ = v;}
+   walberla::real_t const & getHeatFlux(const size_t /*p_idx*/) const {return heatFlux_;}
+   void setHeatFlux(const size_t /*p_idx*/, walberla::real_t const & v) { heatFlux_ = v;}
    walberla::real_t& getHeatFluxRef(const size_t /*p_idx*/) {return heatFlux_;}
    
-   const walberla::mesa_pd::Vec3& getDv(const size_t /*p_idx*/) const {return dv_;}
-   void setDv(const size_t /*p_idx*/, const walberla::mesa_pd::Vec3& v) { dv_ = v;}
+   walberla::mesa_pd::Vec3 const & getDv(const size_t /*p_idx*/) const {return dv_;}
+   void setDv(const size_t /*p_idx*/, walberla::mesa_pd::Vec3 const & v) { dv_ = v;}
    walberla::mesa_pd::Vec3& getDvRef(const size_t /*p_idx*/) {return dv_;}
    
-   const walberla::mesa_pd::Vec3& getDw(const size_t /*p_idx*/) const {return dw_;}
-   void setDw(const size_t /*p_idx*/, const walberla::mesa_pd::Vec3& v) { dw_ = v;}
+   walberla::mesa_pd::Vec3 const & getDw(const size_t /*p_idx*/) const {return dw_;}
+   void setDw(const size_t /*p_idx*/, walberla::mesa_pd::Vec3 const & v) { dw_ = v;}
    walberla::mesa_pd::Vec3& getDwRef(const size_t /*p_idx*/) {return dw_;}
    
-   const std::unordered_set<walberla::mpi::MPIRank>& getNeighborState(const size_t /*p_idx*/) const {return neighborState_;}
-   void setNeighborState(const size_t /*p_idx*/, const std::unordered_set<walberla::mpi::MPIRank>& v) { neighborState_ = v;}
+   std::unordered_set<walberla::mpi::MPIRank> const & getNeighborState(const size_t /*p_idx*/) const {return neighborState_;}
+   void setNeighborState(const size_t /*p_idx*/, std::unordered_set<walberla::mpi::MPIRank> const & v) { neighborState_ = v;}
    std::unordered_set<walberla::mpi::MPIRank>& getNeighborStateRef(const size_t /*p_idx*/) {return neighborState_;}
    
 
@@ -311,6 +319,7 @@ private:
    walberla::mesa_pd::Vec3 force_;
    walberla::mesa_pd::Vec3 oldForce_;
    walberla::mesa_pd::Vec3 oldTorque_;
+   blockforest::Block* currentBlock_;
    uint_t type_;
    int nextParticle_;
    std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory> oldContactHistory_;
diff --git a/src/mesa_pd/data/ParticleStorage.h b/src/mesa_pd/data/ParticleStorage.h
index 1d4c4ba08a13352e975afd17d033f110dd6feebf..0837b343210818b13a9ec1509d707ee555988447 100644
--- a/src/mesa_pd/data/ParticleStorage.h
+++ b/src/mesa_pd/data/ParticleStorage.h
@@ -38,6 +38,7 @@
 #include <mesa_pd/data/DataTypes.h>
 #include <mesa_pd/data/IAccessor.h>
 #include <mesa_pd/data/Flags.h>
+#include <blockforest/BlockForest.h>
 #include <mesa_pd/data/STLOverloads.h>
 
 #include <core/Abort.h>
@@ -85,6 +86,7 @@ public:
       using force_type = walberla::mesa_pd::Vec3;
       using oldForce_type = walberla::mesa_pd::Vec3;
       using oldTorque_type = walberla::mesa_pd::Vec3;
+      using currentBlock_type = blockforest::Block*;
       using type_type = uint_t;
       using nextParticle_type = int;
       using oldContactHistory_type = std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>;
@@ -96,101 +98,105 @@ public:
       using neighborState_type = std::unordered_set<walberla::mpi::MPIRank>;
 
       
-      const uid_type& getUid() const {return storage_.getUid(i_);}
+      uid_type const & getUid() const {return storage_.getUid(i_);}
       uid_type& getUidRef() {return storage_.getUidRef(i_);}
-      void setUid(const uid_type& v) { storage_.setUid(i_, v);}
+      void setUid(uid_type const & v) { storage_.setUid(i_, v);}
       
-      const position_type& getPosition() const {return storage_.getPosition(i_);}
+      position_type const & getPosition() const {return storage_.getPosition(i_);}
       position_type& getPositionRef() {return storage_.getPositionRef(i_);}
-      void setPosition(const position_type& v) { storage_.setPosition(i_, v);}
+      void setPosition(position_type const & v) { storage_.setPosition(i_, v);}
       
-      const interactionRadius_type& getInteractionRadius() const {return storage_.getInteractionRadius(i_);}
+      interactionRadius_type const & getInteractionRadius() const {return storage_.getInteractionRadius(i_);}
       interactionRadius_type& getInteractionRadiusRef() {return storage_.getInteractionRadiusRef(i_);}
-      void setInteractionRadius(const interactionRadius_type& v) { storage_.setInteractionRadius(i_, v);}
+      void setInteractionRadius(interactionRadius_type const & v) { storage_.setInteractionRadius(i_, v);}
       
-      const flags_type& getFlags() const {return storage_.getFlags(i_);}
+      flags_type const & getFlags() const {return storage_.getFlags(i_);}
       flags_type& getFlagsRef() {return storage_.getFlagsRef(i_);}
-      void setFlags(const flags_type& v) { storage_.setFlags(i_, v);}
+      void setFlags(flags_type const & v) { storage_.setFlags(i_, v);}
       
-      const owner_type& getOwner() const {return storage_.getOwner(i_);}
+      owner_type const & getOwner() const {return storage_.getOwner(i_);}
       owner_type& getOwnerRef() {return storage_.getOwnerRef(i_);}
-      void setOwner(const owner_type& v) { storage_.setOwner(i_, v);}
+      void setOwner(owner_type const & v) { storage_.setOwner(i_, v);}
       
-      const ghostOwners_type& getGhostOwners() const {return storage_.getGhostOwners(i_);}
+      ghostOwners_type const & getGhostOwners() const {return storage_.getGhostOwners(i_);}
       ghostOwners_type& getGhostOwnersRef() {return storage_.getGhostOwnersRef(i_);}
-      void setGhostOwners(const ghostOwners_type& v) { storage_.setGhostOwners(i_, v);}
+      void setGhostOwners(ghostOwners_type const & v) { storage_.setGhostOwners(i_, v);}
       
-      const shapeID_type& getShapeID() const {return storage_.getShapeID(i_);}
+      shapeID_type const & getShapeID() const {return storage_.getShapeID(i_);}
       shapeID_type& getShapeIDRef() {return storage_.getShapeIDRef(i_);}
-      void setShapeID(const shapeID_type& v) { storage_.setShapeID(i_, v);}
+      void setShapeID(shapeID_type const & v) { storage_.setShapeID(i_, v);}
       
-      const rotation_type& getRotation() const {return storage_.getRotation(i_);}
+      rotation_type const & getRotation() const {return storage_.getRotation(i_);}
       rotation_type& getRotationRef() {return storage_.getRotationRef(i_);}
-      void setRotation(const rotation_type& v) { storage_.setRotation(i_, v);}
+      void setRotation(rotation_type const & v) { storage_.setRotation(i_, v);}
       
-      const angularVelocity_type& getAngularVelocity() const {return storage_.getAngularVelocity(i_);}
+      angularVelocity_type const & getAngularVelocity() const {return storage_.getAngularVelocity(i_);}
       angularVelocity_type& getAngularVelocityRef() {return storage_.getAngularVelocityRef(i_);}
-      void setAngularVelocity(const angularVelocity_type& v) { storage_.setAngularVelocity(i_, v);}
+      void setAngularVelocity(angularVelocity_type const & v) { storage_.setAngularVelocity(i_, v);}
       
-      const torque_type& getTorque() const {return storage_.getTorque(i_);}
+      torque_type const & getTorque() const {return storage_.getTorque(i_);}
       torque_type& getTorqueRef() {return storage_.getTorqueRef(i_);}
-      void setTorque(const torque_type& v) { storage_.setTorque(i_, v);}
+      void setTorque(torque_type const & v) { storage_.setTorque(i_, v);}
       
-      const linearVelocity_type& getLinearVelocity() const {return storage_.getLinearVelocity(i_);}
+      linearVelocity_type const & getLinearVelocity() const {return storage_.getLinearVelocity(i_);}
       linearVelocity_type& getLinearVelocityRef() {return storage_.getLinearVelocityRef(i_);}
-      void setLinearVelocity(const linearVelocity_type& v) { storage_.setLinearVelocity(i_, v);}
+      void setLinearVelocity(linearVelocity_type const & v) { storage_.setLinearVelocity(i_, v);}
       
-      const invMass_type& getInvMass() const {return storage_.getInvMass(i_);}
+      invMass_type const & getInvMass() const {return storage_.getInvMass(i_);}
       invMass_type& getInvMassRef() {return storage_.getInvMassRef(i_);}
-      void setInvMass(const invMass_type& v) { storage_.setInvMass(i_, v);}
+      void setInvMass(invMass_type const & v) { storage_.setInvMass(i_, v);}
       
-      const force_type& getForce() const {return storage_.getForce(i_);}
+      force_type const & getForce() const {return storage_.getForce(i_);}
       force_type& getForceRef() {return storage_.getForceRef(i_);}
-      void setForce(const force_type& v) { storage_.setForce(i_, v);}
+      void setForce(force_type const & v) { storage_.setForce(i_, v);}
       
-      const oldForce_type& getOldForce() const {return storage_.getOldForce(i_);}
+      oldForce_type const & getOldForce() const {return storage_.getOldForce(i_);}
       oldForce_type& getOldForceRef() {return storage_.getOldForceRef(i_);}
-      void setOldForce(const oldForce_type& v) { storage_.setOldForce(i_, v);}
+      void setOldForce(oldForce_type const & v) { storage_.setOldForce(i_, v);}
       
-      const oldTorque_type& getOldTorque() const {return storage_.getOldTorque(i_);}
+      oldTorque_type const & getOldTorque() const {return storage_.getOldTorque(i_);}
       oldTorque_type& getOldTorqueRef() {return storage_.getOldTorqueRef(i_);}
-      void setOldTorque(const oldTorque_type& v) { storage_.setOldTorque(i_, v);}
+      void setOldTorque(oldTorque_type const & v) { storage_.setOldTorque(i_, v);}
       
-      const type_type& getType() const {return storage_.getType(i_);}
+      currentBlock_type const & getCurrentBlock() const {return storage_.getCurrentBlock(i_);}
+      currentBlock_type& getCurrentBlockRef() {return storage_.getCurrentBlockRef(i_);}
+      void setCurrentBlock(currentBlock_type const & v) { storage_.setCurrentBlock(i_, v);}
+      
+      type_type const & getType() const {return storage_.getType(i_);}
       type_type& getTypeRef() {return storage_.getTypeRef(i_);}
-      void setType(const type_type& v) { storage_.setType(i_, v);}
+      void setType(type_type const & v) { storage_.setType(i_, v);}
       
-      const nextParticle_type& getNextParticle() const {return storage_.getNextParticle(i_);}
+      nextParticle_type const & getNextParticle() const {return storage_.getNextParticle(i_);}
       nextParticle_type& getNextParticleRef() {return storage_.getNextParticleRef(i_);}
-      void setNextParticle(const nextParticle_type& v) { storage_.setNextParticle(i_, v);}
+      void setNextParticle(nextParticle_type const & v) { storage_.setNextParticle(i_, v);}
       
-      const oldContactHistory_type& getOldContactHistory() const {return storage_.getOldContactHistory(i_);}
+      oldContactHistory_type const & getOldContactHistory() const {return storage_.getOldContactHistory(i_);}
       oldContactHistory_type& getOldContactHistoryRef() {return storage_.getOldContactHistoryRef(i_);}
-      void setOldContactHistory(const oldContactHistory_type& v) { storage_.setOldContactHistory(i_, v);}
+      void setOldContactHistory(oldContactHistory_type const & v) { storage_.setOldContactHistory(i_, v);}
       
-      const newContactHistory_type& getNewContactHistory() const {return storage_.getNewContactHistory(i_);}
+      newContactHistory_type const & getNewContactHistory() const {return storage_.getNewContactHistory(i_);}
       newContactHistory_type& getNewContactHistoryRef() {return storage_.getNewContactHistoryRef(i_);}
-      void setNewContactHistory(const newContactHistory_type& v) { storage_.setNewContactHistory(i_, v);}
+      void setNewContactHistory(newContactHistory_type const & v) { storage_.setNewContactHistory(i_, v);}
       
-      const temperature_type& getTemperature() const {return storage_.getTemperature(i_);}
+      temperature_type const & getTemperature() const {return storage_.getTemperature(i_);}
       temperature_type& getTemperatureRef() {return storage_.getTemperatureRef(i_);}
-      void setTemperature(const temperature_type& v) { storage_.setTemperature(i_, v);}
+      void setTemperature(temperature_type const & v) { storage_.setTemperature(i_, v);}
       
-      const heatFlux_type& getHeatFlux() const {return storage_.getHeatFlux(i_);}
+      heatFlux_type const & getHeatFlux() const {return storage_.getHeatFlux(i_);}
       heatFlux_type& getHeatFluxRef() {return storage_.getHeatFluxRef(i_);}
-      void setHeatFlux(const heatFlux_type& v) { storage_.setHeatFlux(i_, v);}
+      void setHeatFlux(heatFlux_type const & v) { storage_.setHeatFlux(i_, v);}
       
-      const dv_type& getDv() const {return storage_.getDv(i_);}
+      dv_type const & getDv() const {return storage_.getDv(i_);}
       dv_type& getDvRef() {return storage_.getDvRef(i_);}
-      void setDv(const dv_type& v) { storage_.setDv(i_, v);}
+      void setDv(dv_type const & v) { storage_.setDv(i_, v);}
       
-      const dw_type& getDw() const {return storage_.getDw(i_);}
+      dw_type const & getDw() const {return storage_.getDw(i_);}
       dw_type& getDwRef() {return storage_.getDwRef(i_);}
-      void setDw(const dw_type& v) { storage_.setDw(i_, v);}
+      void setDw(dw_type const & v) { storage_.setDw(i_, v);}
       
-      const neighborState_type& getNeighborState() const {return storage_.getNeighborState(i_);}
+      neighborState_type const & getNeighborState() const {return storage_.getNeighborState(i_);}
       neighborState_type& getNeighborStateRef() {return storage_.getNeighborStateRef(i_);}
-      void setNeighborState(const neighborState_type& v) { storage_.setNeighborState(i_, v);}
+      void setNeighborState(neighborState_type const & v) { storage_.setNeighborState(i_, v);}
       
 
       size_t getIdx() const {return i_;}
@@ -267,6 +273,7 @@ public:
    using force_type = walberla::mesa_pd::Vec3;
    using oldForce_type = walberla::mesa_pd::Vec3;
    using oldTorque_type = walberla::mesa_pd::Vec3;
+   using currentBlock_type = blockforest::Block*;
    using type_type = uint_t;
    using nextParticle_type = int;
    using oldContactHistory_type = std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>;
@@ -278,101 +285,105 @@ public:
    using neighborState_type = std::unordered_set<walberla::mpi::MPIRank>;
 
    
-   const uid_type& getUid(const size_t idx) const {return uid_[idx];}
+   uid_type const & getUid(const size_t idx) const {return uid_[idx];}
    uid_type& getUidRef(const size_t idx) {return uid_[idx];}
-   void setUid(const size_t idx, const uid_type& v) { uid_[idx] = v; }
+   void setUid(const size_t idx, uid_type const & v) { uid_[idx] = v; }
    
-   const position_type& getPosition(const size_t idx) const {return position_[idx];}
+   position_type const & getPosition(const size_t idx) const {return position_[idx];}
    position_type& getPositionRef(const size_t idx) {return position_[idx];}
-   void setPosition(const size_t idx, const position_type& v) { position_[idx] = v; }
+   void setPosition(const size_t idx, position_type const & v) { position_[idx] = v; }
    
-   const interactionRadius_type& getInteractionRadius(const size_t idx) const {return interactionRadius_[idx];}
+   interactionRadius_type const & getInteractionRadius(const size_t idx) const {return interactionRadius_[idx];}
    interactionRadius_type& getInteractionRadiusRef(const size_t idx) {return interactionRadius_[idx];}
-   void setInteractionRadius(const size_t idx, const interactionRadius_type& v) { interactionRadius_[idx] = v; }
+   void setInteractionRadius(const size_t idx, interactionRadius_type const & v) { interactionRadius_[idx] = v; }
    
-   const flags_type& getFlags(const size_t idx) const {return flags_[idx];}
+   flags_type const & getFlags(const size_t idx) const {return flags_[idx];}
    flags_type& getFlagsRef(const size_t idx) {return flags_[idx];}
-   void setFlags(const size_t idx, const flags_type& v) { flags_[idx] = v; }
+   void setFlags(const size_t idx, flags_type const & v) { flags_[idx] = v; }
    
-   const owner_type& getOwner(const size_t idx) const {return owner_[idx];}
+   owner_type const & getOwner(const size_t idx) const {return owner_[idx];}
    owner_type& getOwnerRef(const size_t idx) {return owner_[idx];}
-   void setOwner(const size_t idx, const owner_type& v) { owner_[idx] = v; }
+   void setOwner(const size_t idx, owner_type const & v) { owner_[idx] = v; }
    
-   const ghostOwners_type& getGhostOwners(const size_t idx) const {return ghostOwners_[idx];}
+   ghostOwners_type const & getGhostOwners(const size_t idx) const {return ghostOwners_[idx];}
    ghostOwners_type& getGhostOwnersRef(const size_t idx) {return ghostOwners_[idx];}
-   void setGhostOwners(const size_t idx, const ghostOwners_type& v) { ghostOwners_[idx] = v; }
+   void setGhostOwners(const size_t idx, ghostOwners_type const & v) { ghostOwners_[idx] = v; }
    
-   const shapeID_type& getShapeID(const size_t idx) const {return shapeID_[idx];}
+   shapeID_type const & getShapeID(const size_t idx) const {return shapeID_[idx];}
    shapeID_type& getShapeIDRef(const size_t idx) {return shapeID_[idx];}
-   void setShapeID(const size_t idx, const shapeID_type& v) { shapeID_[idx] = v; }
+   void setShapeID(const size_t idx, shapeID_type const & v) { shapeID_[idx] = v; }
    
-   const rotation_type& getRotation(const size_t idx) const {return rotation_[idx];}
+   rotation_type const & getRotation(const size_t idx) const {return rotation_[idx];}
    rotation_type& getRotationRef(const size_t idx) {return rotation_[idx];}
-   void setRotation(const size_t idx, const rotation_type& v) { rotation_[idx] = v; }
+   void setRotation(const size_t idx, rotation_type const & v) { rotation_[idx] = v; }
    
-   const angularVelocity_type& getAngularVelocity(const size_t idx) const {return angularVelocity_[idx];}
+   angularVelocity_type const & getAngularVelocity(const size_t idx) const {return angularVelocity_[idx];}
    angularVelocity_type& getAngularVelocityRef(const size_t idx) {return angularVelocity_[idx];}
-   void setAngularVelocity(const size_t idx, const angularVelocity_type& v) { angularVelocity_[idx] = v; }
+   void setAngularVelocity(const size_t idx, angularVelocity_type const & v) { angularVelocity_[idx] = v; }
    
-   const torque_type& getTorque(const size_t idx) const {return torque_[idx];}
+   torque_type const & getTorque(const size_t idx) const {return torque_[idx];}
    torque_type& getTorqueRef(const size_t idx) {return torque_[idx];}
-   void setTorque(const size_t idx, const torque_type& v) { torque_[idx] = v; }
+   void setTorque(const size_t idx, torque_type const & v) { torque_[idx] = v; }
    
-   const linearVelocity_type& getLinearVelocity(const size_t idx) const {return linearVelocity_[idx];}
+   linearVelocity_type const & getLinearVelocity(const size_t idx) const {return linearVelocity_[idx];}
    linearVelocity_type& getLinearVelocityRef(const size_t idx) {return linearVelocity_[idx];}
-   void setLinearVelocity(const size_t idx, const linearVelocity_type& v) { linearVelocity_[idx] = v; }
+   void setLinearVelocity(const size_t idx, linearVelocity_type const & v) { linearVelocity_[idx] = v; }
    
-   const invMass_type& getInvMass(const size_t idx) const {return invMass_[idx];}
+   invMass_type const & getInvMass(const size_t idx) const {return invMass_[idx];}
    invMass_type& getInvMassRef(const size_t idx) {return invMass_[idx];}
-   void setInvMass(const size_t idx, const invMass_type& v) { invMass_[idx] = v; }
+   void setInvMass(const size_t idx, invMass_type const & v) { invMass_[idx] = v; }
    
-   const force_type& getForce(const size_t idx) const {return force_[idx];}
+   force_type const & getForce(const size_t idx) const {return force_[idx];}
    force_type& getForceRef(const size_t idx) {return force_[idx];}
-   void setForce(const size_t idx, const force_type& v) { force_[idx] = v; }
+   void setForce(const size_t idx, force_type const & v) { force_[idx] = v; }
    
-   const oldForce_type& getOldForce(const size_t idx) const {return oldForce_[idx];}
+   oldForce_type const & getOldForce(const size_t idx) const {return oldForce_[idx];}
    oldForce_type& getOldForceRef(const size_t idx) {return oldForce_[idx];}
-   void setOldForce(const size_t idx, const oldForce_type& v) { oldForce_[idx] = v; }
+   void setOldForce(const size_t idx, oldForce_type const & v) { oldForce_[idx] = v; }
    
-   const oldTorque_type& getOldTorque(const size_t idx) const {return oldTorque_[idx];}
+   oldTorque_type const & getOldTorque(const size_t idx) const {return oldTorque_[idx];}
    oldTorque_type& getOldTorqueRef(const size_t idx) {return oldTorque_[idx];}
-   void setOldTorque(const size_t idx, const oldTorque_type& v) { oldTorque_[idx] = v; }
+   void setOldTorque(const size_t idx, oldTorque_type const & v) { oldTorque_[idx] = v; }
+   
+   currentBlock_type const & getCurrentBlock(const size_t idx) const {return currentBlock_[idx];}
+   currentBlock_type& getCurrentBlockRef(const size_t idx) {return currentBlock_[idx];}
+   void setCurrentBlock(const size_t idx, currentBlock_type const & v) { currentBlock_[idx] = v; }
    
-   const type_type& getType(const size_t idx) const {return type_[idx];}
+   type_type const & getType(const size_t idx) const {return type_[idx];}
    type_type& getTypeRef(const size_t idx) {return type_[idx];}
-   void setType(const size_t idx, const type_type& v) { type_[idx] = v; }
+   void setType(const size_t idx, type_type const & v) { type_[idx] = v; }
    
-   const nextParticle_type& getNextParticle(const size_t idx) const {return nextParticle_[idx];}
+   nextParticle_type const & getNextParticle(const size_t idx) const {return nextParticle_[idx];}
    nextParticle_type& getNextParticleRef(const size_t idx) {return nextParticle_[idx];}
-   void setNextParticle(const size_t idx, const nextParticle_type& v) { nextParticle_[idx] = v; }
+   void setNextParticle(const size_t idx, nextParticle_type const & v) { nextParticle_[idx] = v; }
    
-   const oldContactHistory_type& getOldContactHistory(const size_t idx) const {return oldContactHistory_[idx];}
+   oldContactHistory_type const & getOldContactHistory(const size_t idx) const {return oldContactHistory_[idx];}
    oldContactHistory_type& getOldContactHistoryRef(const size_t idx) {return oldContactHistory_[idx];}
-   void setOldContactHistory(const size_t idx, const oldContactHistory_type& v) { oldContactHistory_[idx] = v; }
+   void setOldContactHistory(const size_t idx, oldContactHistory_type const & v) { oldContactHistory_[idx] = v; }
    
-   const newContactHistory_type& getNewContactHistory(const size_t idx) const {return newContactHistory_[idx];}
+   newContactHistory_type const & getNewContactHistory(const size_t idx) const {return newContactHistory_[idx];}
    newContactHistory_type& getNewContactHistoryRef(const size_t idx) {return newContactHistory_[idx];}
-   void setNewContactHistory(const size_t idx, const newContactHistory_type& v) { newContactHistory_[idx] = v; }
+   void setNewContactHistory(const size_t idx, newContactHistory_type const & v) { newContactHistory_[idx] = v; }
    
-   const temperature_type& getTemperature(const size_t idx) const {return temperature_[idx];}
+   temperature_type const & getTemperature(const size_t idx) const {return temperature_[idx];}
    temperature_type& getTemperatureRef(const size_t idx) {return temperature_[idx];}
-   void setTemperature(const size_t idx, const temperature_type& v) { temperature_[idx] = v; }
+   void setTemperature(const size_t idx, temperature_type const & v) { temperature_[idx] = v; }
    
-   const heatFlux_type& getHeatFlux(const size_t idx) const {return heatFlux_[idx];}
+   heatFlux_type const & getHeatFlux(const size_t idx) const {return heatFlux_[idx];}
    heatFlux_type& getHeatFluxRef(const size_t idx) {return heatFlux_[idx];}
-   void setHeatFlux(const size_t idx, const heatFlux_type& v) { heatFlux_[idx] = v; }
+   void setHeatFlux(const size_t idx, heatFlux_type const & v) { heatFlux_[idx] = v; }
    
-   const dv_type& getDv(const size_t idx) const {return dv_[idx];}
+   dv_type const & getDv(const size_t idx) const {return dv_[idx];}
    dv_type& getDvRef(const size_t idx) {return dv_[idx];}
-   void setDv(const size_t idx, const dv_type& v) { dv_[idx] = v; }
+   void setDv(const size_t idx, dv_type const & v) { dv_[idx] = v; }
    
-   const dw_type& getDw(const size_t idx) const {return dw_[idx];}
+   dw_type const & getDw(const size_t idx) const {return dw_[idx];}
    dw_type& getDwRef(const size_t idx) {return dw_[idx];}
-   void setDw(const size_t idx, const dw_type& v) { dw_[idx] = v; }
+   void setDw(const size_t idx, dw_type const & v) { dw_[idx] = v; }
    
-   const neighborState_type& getNeighborState(const size_t idx) const {return neighborState_[idx];}
+   neighborState_type const & getNeighborState(const size_t idx) const {return neighborState_[idx];}
    neighborState_type& getNeighborStateRef(const size_t idx) {return neighborState_[idx];}
-   void setNeighborState(const size_t idx, const neighborState_type& v) { neighborState_[idx] = v; }
+   void setNeighborState(const size_t idx, neighborState_type const & v) { neighborState_[idx] = v; }
    
 
    /**
@@ -480,6 +491,7 @@ public:
    std::vector<force_type> force_ {};
    std::vector<oldForce_type> oldForce_ {};
    std::vector<oldTorque_type> oldTorque_ {};
+   std::vector<currentBlock_type> currentBlock_ {};
    std::vector<type_type> type_ {};
    std::vector<nextParticle_type> nextParticle_ {};
    std::vector<oldContactHistory_type> oldContactHistory_ {};
@@ -513,6 +525,7 @@ ParticleStorage::Particle& ParticleStorage::Particle::operator=(const ParticleSt
    getForceRef() = rhs.getForce();
    getOldForceRef() = rhs.getOldForce();
    getOldTorqueRef() = rhs.getOldTorque();
+   getCurrentBlockRef() = rhs.getCurrentBlock();
    getTypeRef() = rhs.getType();
    getNextParticleRef() = rhs.getNextParticle();
    getOldContactHistoryRef() = rhs.getOldContactHistory();
@@ -543,6 +556,7 @@ ParticleStorage::Particle& ParticleStorage::Particle::operator=(ParticleStorage:
    getForceRef() = std::move(rhs.getForceRef());
    getOldForceRef() = std::move(rhs.getOldForceRef());
    getOldTorqueRef() = std::move(rhs.getOldTorqueRef());
+   getCurrentBlockRef() = std::move(rhs.getCurrentBlockRef());
    getTypeRef() = std::move(rhs.getTypeRef());
    getNextParticleRef() = std::move(rhs.getNextParticleRef());
    getOldContactHistoryRef() = std::move(rhs.getOldContactHistoryRef());
@@ -574,6 +588,7 @@ void swap(ParticleStorage::Particle lhs, ParticleStorage::Particle rhs)
    std::swap(lhs.getForceRef(), rhs.getForceRef());
    std::swap(lhs.getOldForceRef(), rhs.getOldForceRef());
    std::swap(lhs.getOldTorqueRef(), rhs.getOldTorqueRef());
+   std::swap(lhs.getCurrentBlockRef(), rhs.getCurrentBlockRef());
    std::swap(lhs.getTypeRef(), rhs.getTypeRef());
    std::swap(lhs.getNextParticleRef(), rhs.getNextParticleRef());
    std::swap(lhs.getOldContactHistoryRef(), rhs.getOldContactHistoryRef());
@@ -605,6 +620,7 @@ std::ostream& operator<<( std::ostream& os, const ParticleStorage::Particle& p )
          "force               : " << p.getForce() << "\n" <<
          "oldForce            : " << p.getOldForce() << "\n" <<
          "oldTorque           : " << p.getOldTorque() << "\n" <<
+         "currentBlock        : " << p.getCurrentBlock() << "\n" <<
          "type                : " << p.getType() << "\n" <<
          "nextParticle        : " << p.getNextParticle() << "\n" <<
          "oldContactHistory   : " << p.getOldContactHistory() << "\n" <<
@@ -706,6 +722,7 @@ inline ParticleStorage::iterator ParticleStorage::create(const id_t& uid)
    force_.emplace_back(real_t(0));
    oldForce_.emplace_back(real_t(0));
    oldTorque_.emplace_back(real_t(0));
+   currentBlock_.emplace_back(nullptr);
    type_.emplace_back(0);
    nextParticle_.emplace_back(-1);
    oldContactHistory_.emplace_back();
@@ -762,6 +779,7 @@ inline ParticleStorage::iterator ParticleStorage::erase(iterator& it)
    force_.pop_back();
    oldForce_.pop_back();
    oldTorque_.pop_back();
+   currentBlock_.pop_back();
    type_.pop_back();
    nextParticle_.pop_back();
    oldContactHistory_.pop_back();
@@ -805,6 +823,7 @@ inline void ParticleStorage::reserve(const size_t size)
    force_.reserve(size);
    oldForce_.reserve(size);
    oldTorque_.reserve(size);
+   currentBlock_.reserve(size);
    type_.reserve(size);
    nextParticle_.reserve(size);
    oldContactHistory_.reserve(size);
@@ -833,6 +852,7 @@ inline void ParticleStorage::clear()
    force_.clear();
    oldForce_.clear();
    oldTorque_.clear();
+   currentBlock_.clear();
    type_.clear();
    nextParticle_.clear();
    oldContactHistory_.clear();
@@ -862,6 +882,7 @@ inline size_t ParticleStorage::size() const
    //WALBERLA_ASSERT_EQUAL( uid_.size(), force.size() );
    //WALBERLA_ASSERT_EQUAL( uid_.size(), oldForce.size() );
    //WALBERLA_ASSERT_EQUAL( uid_.size(), oldTorque.size() );
+   //WALBERLA_ASSERT_EQUAL( uid_.size(), currentBlock.size() );
    //WALBERLA_ASSERT_EQUAL( uid_.size(), type.size() );
    //WALBERLA_ASSERT_EQUAL( uid_.size(), nextParticle.size() );
    //WALBERLA_ASSERT_EQUAL( uid_.size(), oldContactHistory.size() );
@@ -1061,7 +1082,7 @@ public:
    using return_type = walberla::id_t;
    walberla::id_t& operator()(data::Particle& p) const {return p.getUidRef();}
    walberla::id_t& operator()(data::Particle&& p) const {return p.getUidRef();}
-   const walberla::id_t& operator()(const data::Particle& p) const {return p.getUid();}
+   walberla::id_t const & operator()(const data::Particle& p) const {return p.getUid();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticlePosition
@@ -1070,7 +1091,7 @@ public:
    using return_type = walberla::mesa_pd::Vec3;
    walberla::mesa_pd::Vec3& operator()(data::Particle& p) const {return p.getPositionRef();}
    walberla::mesa_pd::Vec3& operator()(data::Particle&& p) const {return p.getPositionRef();}
-   const walberla::mesa_pd::Vec3& operator()(const data::Particle& p) const {return p.getPosition();}
+   walberla::mesa_pd::Vec3 const & operator()(const data::Particle& p) const {return p.getPosition();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleInteractionRadius
@@ -1079,7 +1100,7 @@ public:
    using return_type = walberla::real_t;
    walberla::real_t& operator()(data::Particle& p) const {return p.getInteractionRadiusRef();}
    walberla::real_t& operator()(data::Particle&& p) const {return p.getInteractionRadiusRef();}
-   const walberla::real_t& operator()(const data::Particle& p) const {return p.getInteractionRadius();}
+   walberla::real_t const & operator()(const data::Particle& p) const {return p.getInteractionRadius();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleFlags
@@ -1088,7 +1109,7 @@ public:
    using return_type = walberla::mesa_pd::data::particle_flags::FlagT;
    walberla::mesa_pd::data::particle_flags::FlagT& operator()(data::Particle& p) const {return p.getFlagsRef();}
    walberla::mesa_pd::data::particle_flags::FlagT& operator()(data::Particle&& p) const {return p.getFlagsRef();}
-   const walberla::mesa_pd::data::particle_flags::FlagT& operator()(const data::Particle& p) const {return p.getFlags();}
+   walberla::mesa_pd::data::particle_flags::FlagT const & operator()(const data::Particle& p) const {return p.getFlags();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleOwner
@@ -1097,7 +1118,7 @@ public:
    using return_type = int;
    int& operator()(data::Particle& p) const {return p.getOwnerRef();}
    int& operator()(data::Particle&& p) const {return p.getOwnerRef();}
-   const int& operator()(const data::Particle& p) const {return p.getOwner();}
+   int const & operator()(const data::Particle& p) const {return p.getOwner();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleGhostOwners
@@ -1106,7 +1127,7 @@ public:
    using return_type = std::unordered_set<walberla::mpi::MPIRank>;
    std::unordered_set<walberla::mpi::MPIRank>& operator()(data::Particle& p) const {return p.getGhostOwnersRef();}
    std::unordered_set<walberla::mpi::MPIRank>& operator()(data::Particle&& p) const {return p.getGhostOwnersRef();}
-   const std::unordered_set<walberla::mpi::MPIRank>& operator()(const data::Particle& p) const {return p.getGhostOwners();}
+   std::unordered_set<walberla::mpi::MPIRank> const & operator()(const data::Particle& p) const {return p.getGhostOwners();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleShapeID
@@ -1115,7 +1136,7 @@ public:
    using return_type = size_t;
    size_t& operator()(data::Particle& p) const {return p.getShapeIDRef();}
    size_t& operator()(data::Particle&& p) const {return p.getShapeIDRef();}
-   const size_t& operator()(const data::Particle& p) const {return p.getShapeID();}
+   size_t const & operator()(const data::Particle& p) const {return p.getShapeID();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleRotation
@@ -1124,7 +1145,7 @@ public:
    using return_type = walberla::mesa_pd::Rot3;
    walberla::mesa_pd::Rot3& operator()(data::Particle& p) const {return p.getRotationRef();}
    walberla::mesa_pd::Rot3& operator()(data::Particle&& p) const {return p.getRotationRef();}
-   const walberla::mesa_pd::Rot3& operator()(const data::Particle& p) const {return p.getRotation();}
+   walberla::mesa_pd::Rot3 const & operator()(const data::Particle& p) const {return p.getRotation();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleAngularVelocity
@@ -1133,7 +1154,7 @@ public:
    using return_type = walberla::mesa_pd::Vec3;
    walberla::mesa_pd::Vec3& operator()(data::Particle& p) const {return p.getAngularVelocityRef();}
    walberla::mesa_pd::Vec3& operator()(data::Particle&& p) const {return p.getAngularVelocityRef();}
-   const walberla::mesa_pd::Vec3& operator()(const data::Particle& p) const {return p.getAngularVelocity();}
+   walberla::mesa_pd::Vec3 const & operator()(const data::Particle& p) const {return p.getAngularVelocity();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleTorque
@@ -1142,7 +1163,7 @@ public:
    using return_type = walberla::mesa_pd::Vec3;
    walberla::mesa_pd::Vec3& operator()(data::Particle& p) const {return p.getTorqueRef();}
    walberla::mesa_pd::Vec3& operator()(data::Particle&& p) const {return p.getTorqueRef();}
-   const walberla::mesa_pd::Vec3& operator()(const data::Particle& p) const {return p.getTorque();}
+   walberla::mesa_pd::Vec3 const & operator()(const data::Particle& p) const {return p.getTorque();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleLinearVelocity
@@ -1151,7 +1172,7 @@ public:
    using return_type = walberla::mesa_pd::Vec3;
    walberla::mesa_pd::Vec3& operator()(data::Particle& p) const {return p.getLinearVelocityRef();}
    walberla::mesa_pd::Vec3& operator()(data::Particle&& p) const {return p.getLinearVelocityRef();}
-   const walberla::mesa_pd::Vec3& operator()(const data::Particle& p) const {return p.getLinearVelocity();}
+   walberla::mesa_pd::Vec3 const & operator()(const data::Particle& p) const {return p.getLinearVelocity();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleInvMass
@@ -1160,7 +1181,7 @@ public:
    using return_type = walberla::real_t;
    walberla::real_t& operator()(data::Particle& p) const {return p.getInvMassRef();}
    walberla::real_t& operator()(data::Particle&& p) const {return p.getInvMassRef();}
-   const walberla::real_t& operator()(const data::Particle& p) const {return p.getInvMass();}
+   walberla::real_t const & operator()(const data::Particle& p) const {return p.getInvMass();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleForce
@@ -1169,7 +1190,7 @@ public:
    using return_type = walberla::mesa_pd::Vec3;
    walberla::mesa_pd::Vec3& operator()(data::Particle& p) const {return p.getForceRef();}
    walberla::mesa_pd::Vec3& operator()(data::Particle&& p) const {return p.getForceRef();}
-   const walberla::mesa_pd::Vec3& operator()(const data::Particle& p) const {return p.getForce();}
+   walberla::mesa_pd::Vec3 const & operator()(const data::Particle& p) const {return p.getForce();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleOldForce
@@ -1178,7 +1199,7 @@ public:
    using return_type = walberla::mesa_pd::Vec3;
    walberla::mesa_pd::Vec3& operator()(data::Particle& p) const {return p.getOldForceRef();}
    walberla::mesa_pd::Vec3& operator()(data::Particle&& p) const {return p.getOldForceRef();}
-   const walberla::mesa_pd::Vec3& operator()(const data::Particle& p) const {return p.getOldForce();}
+   walberla::mesa_pd::Vec3 const & operator()(const data::Particle& p) const {return p.getOldForce();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleOldTorque
@@ -1187,7 +1208,16 @@ public:
    using return_type = walberla::mesa_pd::Vec3;
    walberla::mesa_pd::Vec3& operator()(data::Particle& p) const {return p.getOldTorqueRef();}
    walberla::mesa_pd::Vec3& operator()(data::Particle&& p) const {return p.getOldTorqueRef();}
-   const walberla::mesa_pd::Vec3& operator()(const data::Particle& p) const {return p.getOldTorque();}
+   walberla::mesa_pd::Vec3 const & operator()(const data::Particle& p) const {return p.getOldTorque();}
+};
+///Predicate that selects a certain property from a Particle
+class SelectParticleCurrentBlock
+{
+public:
+   using return_type = blockforest::Block*;
+   blockforest::Block*& operator()(data::Particle& p) const {return p.getCurrentBlockRef();}
+   blockforest::Block*& operator()(data::Particle&& p) const {return p.getCurrentBlockRef();}
+   blockforest::Block* const & operator()(const data::Particle& p) const {return p.getCurrentBlock();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleType
@@ -1196,7 +1226,7 @@ public:
    using return_type = uint_t;
    uint_t& operator()(data::Particle& p) const {return p.getTypeRef();}
    uint_t& operator()(data::Particle&& p) const {return p.getTypeRef();}
-   const uint_t& operator()(const data::Particle& p) const {return p.getType();}
+   uint_t const & operator()(const data::Particle& p) const {return p.getType();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleNextParticle
@@ -1205,7 +1235,7 @@ public:
    using return_type = int;
    int& operator()(data::Particle& p) const {return p.getNextParticleRef();}
    int& operator()(data::Particle&& p) const {return p.getNextParticleRef();}
-   const int& operator()(const data::Particle& p) const {return p.getNextParticle();}
+   int const & operator()(const data::Particle& p) const {return p.getNextParticle();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleOldContactHistory
@@ -1214,7 +1244,7 @@ public:
    using return_type = std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>;
    std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& operator()(data::Particle& p) const {return p.getOldContactHistoryRef();}
    std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& operator()(data::Particle&& p) const {return p.getOldContactHistoryRef();}
-   const std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& operator()(const data::Particle& p) const {return p.getOldContactHistory();}
+   std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory> const & operator()(const data::Particle& p) const {return p.getOldContactHistory();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleNewContactHistory
@@ -1223,7 +1253,7 @@ public:
    using return_type = std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>;
    std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& operator()(data::Particle& p) const {return p.getNewContactHistoryRef();}
    std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& operator()(data::Particle&& p) const {return p.getNewContactHistoryRef();}
-   const std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory>& operator()(const data::Particle& p) const {return p.getNewContactHistory();}
+   std::map<walberla::id_t, walberla::mesa_pd::data::ContactHistory> const & operator()(const data::Particle& p) const {return p.getNewContactHistory();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleTemperature
@@ -1232,7 +1262,7 @@ public:
    using return_type = walberla::real_t;
    walberla::real_t& operator()(data::Particle& p) const {return p.getTemperatureRef();}
    walberla::real_t& operator()(data::Particle&& p) const {return p.getTemperatureRef();}
-   const walberla::real_t& operator()(const data::Particle& p) const {return p.getTemperature();}
+   walberla::real_t const & operator()(const data::Particle& p) const {return p.getTemperature();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleHeatFlux
@@ -1241,7 +1271,7 @@ public:
    using return_type = walberla::real_t;
    walberla::real_t& operator()(data::Particle& p) const {return p.getHeatFluxRef();}
    walberla::real_t& operator()(data::Particle&& p) const {return p.getHeatFluxRef();}
-   const walberla::real_t& operator()(const data::Particle& p) const {return p.getHeatFlux();}
+   walberla::real_t const & operator()(const data::Particle& p) const {return p.getHeatFlux();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleDv
@@ -1250,7 +1280,7 @@ public:
    using return_type = walberla::mesa_pd::Vec3;
    walberla::mesa_pd::Vec3& operator()(data::Particle& p) const {return p.getDvRef();}
    walberla::mesa_pd::Vec3& operator()(data::Particle&& p) const {return p.getDvRef();}
-   const walberla::mesa_pd::Vec3& operator()(const data::Particle& p) const {return p.getDv();}
+   walberla::mesa_pd::Vec3 const & operator()(const data::Particle& p) const {return p.getDv();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleDw
@@ -1259,7 +1289,7 @@ public:
    using return_type = walberla::mesa_pd::Vec3;
    walberla::mesa_pd::Vec3& operator()(data::Particle& p) const {return p.getDwRef();}
    walberla::mesa_pd::Vec3& operator()(data::Particle&& p) const {return p.getDwRef();}
-   const walberla::mesa_pd::Vec3& operator()(const data::Particle& p) const {return p.getDw();}
+   walberla::mesa_pd::Vec3 const & operator()(const data::Particle& p) const {return p.getDw();}
 };
 ///Predicate that selects a certain property from a Particle
 class SelectParticleNeighborState
@@ -1268,7 +1298,7 @@ public:
    using return_type = std::unordered_set<walberla::mpi::MPIRank>;
    std::unordered_set<walberla::mpi::MPIRank>& operator()(data::Particle& p) const {return p.getNeighborStateRef();}
    std::unordered_set<walberla::mpi::MPIRank>& operator()(data::Particle&& p) const {return p.getNeighborStateRef();}
-   const std::unordered_set<walberla::mpi::MPIRank>& operator()(const data::Particle& p) const {return p.getNeighborState();}
+   std::unordered_set<walberla::mpi::MPIRank> const & operator()(const data::Particle& p) const {return p.getNeighborState();}
 };
 
 } //namespace data
diff --git a/src/mesa_pd/data/SparseLinkedCells.h b/src/mesa_pd/data/SparseLinkedCells.h
new file mode 100644
index 0000000000000000000000000000000000000000..7185c2988b41d7b4b3f0fe78290d0c62a3d68dd7
--- /dev/null
+++ b/src/mesa_pd/data/SparseLinkedCells.h
@@ -0,0 +1,400 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file SparseLinkedCells.h
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+//======================================================================================================================
+//
+//  THIS FILE IS GENERATED - PLEASE CHANGE THE TEMPLATE !!!
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/data/IAccessor.h>
+#include <mesa_pd/data/ParticleStorage.h>
+
+#include <core/Abort.h>
+#include <core/debug/Debug.h>
+#include <core/math/AABB.h>
+#include <stencil/D3Q27.h>
+
+#include <atomic>
+#include <cmath>
+#include <vector>
+
+namespace walberla {
+namespace mesa_pd {
+namespace data {
+
+struct SparseLinkedCells
+{
+   SparseLinkedCells(const math::AABB& domain, const real_t cellDiameter)
+      : SparseLinkedCells(domain, Vec3(cellDiameter,cellDiameter,cellDiameter))
+   {}
+   SparseLinkedCells(const math::AABB& domain, const Vec3& cellDiameter);
+
+   void clear();
+
+   /**
+    * Calls the provided functor \p func for all particle pairs.
+    *
+    * Additional arguments can be provided. No pairs with twice the same particle.
+    * Call syntax for the provided functor
+    * \code
+    * func( *this, i, j, std::forward<Args>(args)... );
+    * \endcode
+    * \param openmp enables/disables OpenMP parallelization of the kernel call
+    */
+   template <typename Selector, typename Accessor, typename Func, typename... Args>
+   void forEachParticlePair(const bool openmp,
+                            const Selector& selector,
+                            Accessor& acForLC,
+                            Func&& func,
+                            Args&&... args) const;
+   /**
+    * Calls the provided functor \p func for all particle pairs.
+    *
+    * Additional arguments can be provided. No pairs with twice the same particle are generated.
+    * No pair is called twice!
+    * Call syntax for the provided functor
+    * \code
+    * func( *this, i, j, std::forward<Args>(args)... );
+    * \endcode
+    * \param openmp enables/disables OpenMP parallelization of the kernel call
+    */
+   template <typename Selector, typename Accessor, typename Func, typename... Args>
+   void forEachParticlePairHalf(const bool openmp,
+                                const Selector& selector,
+                                Accessor& acForLC,
+                                Func&& func,
+                                Args&&... args) const;
+
+   math::AABB   domain_ {}; ///< local domain covered by this data structure
+   Vector3<int> numCellsPerDim_ {}; ///< number of linked cells per dimension
+   Vec3         cellDiameter_ {};
+   Vec3         invCellDiameter_ {};
+   std::atomic<int> infiniteParticles_ {}; ///< data structure for particles to large for the cells
+   std::vector< std::atomic<int> > cells_ {}; ///< actual cell data structure
+   std::vector<size_t> nonEmptyCells_ {}; ///< list of cells containing particles
+};
+
+inline
+math::AABB getCellAABB(const SparseLinkedCells& ll,
+                       const int64_t hash0,
+                       const int64_t hash1,
+                       const int64_t hash2)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(hash0, 0);
+   WALBERLA_ASSERT_LESS(hash0, ll.numCellsPerDim_[0]);
+   WALBERLA_ASSERT_GREATER_EQUAL(hash1, 0);
+   WALBERLA_ASSERT_LESS(hash1, ll.numCellsPerDim_[1]);
+   WALBERLA_ASSERT_GREATER_EQUAL(hash2, 0);
+   WALBERLA_ASSERT_LESS(hash2, ll.numCellsPerDim_[2]);
+   const auto& minCorner = ll.domain_.minCorner();
+   const real_t xMin = ll.cellDiameter_[0] * real_c(hash0) + minCorner[0];
+   const real_t yMin = ll.cellDiameter_[1] * real_c(hash1) + minCorner[1];
+   const real_t zMin = ll.cellDiameter_[2] * real_c(hash2) + minCorner[2];
+   const real_t xMax = ll.cellDiameter_[0] * real_c(hash0 + 1) + minCorner[0];
+   const real_t yMax = ll.cellDiameter_[1] * real_c(hash1 + 1) + minCorner[1];
+   const real_t zMax = ll.cellDiameter_[2] * real_c(hash2 + 1) + minCorner[2];
+   return math::AABB(xMin, yMin, zMin, xMax, yMax, zMax);
+}
+
+inline
+uint_t getCellIdx(const SparseLinkedCells& ll,
+                  const int64_t hash0,
+                  const int64_t hash1,
+                  const int64_t hash2)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(hash0, 0);
+   WALBERLA_ASSERT_LESS(hash0, ll.numCellsPerDim_[0]);
+   WALBERLA_ASSERT_GREATER_EQUAL(hash1, 0);
+   WALBERLA_ASSERT_LESS(hash1, ll.numCellsPerDim_[1]);
+   WALBERLA_ASSERT_GREATER_EQUAL(hash2, 0);
+   WALBERLA_ASSERT_LESS(hash2, ll.numCellsPerDim_[2]);
+   return uint_c(hash2 * ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0] + hash1 * ll.numCellsPerDim_[0] + hash0);
+}
+
+inline
+void getCellCoordinates(const SparseLinkedCells& ll,
+                        const uint64_t idx,
+                        int64_t& hash0,
+                        int64_t& hash1,
+                        int64_t& hash2)
+{
+   hash2 = int64_c(idx) / (ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0]);
+   hash1 = (int64_c(idx) - (hash2 * ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0])) / (ll.numCellsPerDim_[0]);
+   hash0 = int64_c(idx) - hash2 * ll.numCellsPerDim_[1] * ll.numCellsPerDim_[0] - hash1 * ll.numCellsPerDim_[0];
+
+   WALBERLA_ASSERT_GREATER_EQUAL(hash0, 0);
+   WALBERLA_ASSERT_LESS(hash0, ll.numCellsPerDim_[0]);
+   WALBERLA_ASSERT_GREATER_EQUAL(hash1, 0);
+   WALBERLA_ASSERT_LESS(hash1, ll.numCellsPerDim_[1]);
+   WALBERLA_ASSERT_GREATER_EQUAL(hash2, 0);
+   WALBERLA_ASSERT_LESS(hash2, ll.numCellsPerDim_[2]);
+}
+
+inline
+SparseLinkedCells::SparseLinkedCells(const math::AABB& domain, const Vec3& cellDiameter)
+   : domain_(domain)
+   , numCellsPerDim_( static_cast<int>(std::ceil( domain.sizes()[0] / cellDiameter[0])),
+     static_cast<int>(std::ceil( domain.sizes()[1] / cellDiameter[1])),
+     static_cast<int>(std::ceil( domain.sizes()[2] / cellDiameter[2])) )
+   , cellDiameter_( domain.sizes()[0] / real_c(numCellsPerDim_[0]),
+     domain.sizes()[1] / real_c(numCellsPerDim_[1]),
+     domain.sizes()[2] / real_c(numCellsPerDim_[2]) )
+   , invCellDiameter_( real_t(1) / cellDiameter_[0], real_t(1) / cellDiameter_[1], real_t(1) / cellDiameter_[2] )
+   , cells_(uint_c(numCellsPerDim_[0]*numCellsPerDim_[1]*numCellsPerDim_[2]))
+   , nonEmptyCells_(uint_c(numCellsPerDim_[0]*numCellsPerDim_[1]*numCellsPerDim_[2]))
+{
+   //precondition
+   WALBERLA_CHECK_GREATER_EQUAL(cellDiameter[0], real_t(0));
+   WALBERLA_CHECK_GREATER_EQUAL(cellDiameter[1], real_t(0));
+   WALBERLA_CHECK_GREATER_EQUAL(cellDiameter[2], real_t(0));
+
+   //postcondition
+   WALBERLA_CHECK_GREATER_EQUAL(cellDiameter_[0], real_t(0));
+   WALBERLA_CHECK_LESS_EQUAL(cellDiameter_[0], cellDiameter[0]);
+
+   WALBERLA_CHECK_GREATER_EQUAL(numCellsPerDim_[0], 0);
+   WALBERLA_CHECK_GREATER_EQUAL(cellDiameter_[1], real_t(0));
+   WALBERLA_CHECK_LESS_EQUAL(cellDiameter_[1], cellDiameter[1]);
+
+   WALBERLA_CHECK_GREATER_EQUAL(numCellsPerDim_[1], 0);
+   WALBERLA_CHECK_GREATER_EQUAL(cellDiameter_[2], real_t(0));
+   WALBERLA_CHECK_LESS_EQUAL(cellDiameter_[2], cellDiameter[2]);
+
+   WALBERLA_CHECK_GREATER_EQUAL(numCellsPerDim_[2], 0);
+
+   nonEmptyCells_.clear();
+   std::fill(cells_.begin(), cells_.end(), -1);
+}
+
+void SparseLinkedCells::clear()
+{
+   for (const auto v : nonEmptyCells_)
+   {
+      WALBERLA_ASSERT_LESS(v, cells_.size());
+      cells_[v] = -1;
+   }
+   nonEmptyCells_.clear();
+   infiniteParticles_ = -1;
+}
+template <typename Selector, typename Accessor, typename Func, typename... Args>
+inline void SparseLinkedCells::forEachParticlePair(const bool openmp, const Selector& selector, Accessor& acForLC, Func&& func, Args&&... args) const
+{
+   static_assert(std::is_base_of<data::IAccessor, Accessor>::value, "please provide a valid accessor");
+   WALBERLA_UNUSED(openmp);
+
+   for (const auto cellIdx : nonEmptyCells_)
+   {
+      int64_t x = 0;
+      int64_t y = 0;
+      int64_t z = 0;
+      getCellCoordinates(*this, cellIdx, x, y, z);
+      int p_idx = cells_[cellIdx]; ///< current particle index
+      int np_idx = -1; ///< particle to be checked against
+
+      while (p_idx != -1)
+      {
+         WALBERLA_ASSERT_GREATER_EQUAL(p_idx, 0);
+         WALBERLA_ASSERT_LESS(p_idx, acForLC.size());
+
+         // check particles in own cell
+         np_idx = acForLC.getNextParticle(uint_c(p_idx)); ///< neighbor particle index
+         while (np_idx != -1)
+         {
+            WALBERLA_ASSERT_GREATER_EQUAL(np_idx, 0);
+            WALBERLA_ASSERT_LESS(np_idx, acForLC.size());
+
+            if (selector(uint_c(p_idx), uint_c(np_idx), acForLC))
+            {
+               func(uint_c(p_idx), uint_c(np_idx), std::forward<Args>(args)...);
+               func(uint_c(np_idx), uint_c(p_idx), std::forward<Args>(args)...);
+            }
+
+            // go to next particle
+            np_idx = acForLC.getNextParticle(uint_c(np_idx));
+         }
+
+         // check particles in infiniteParticles list
+         np_idx = infiniteParticles_; ///< neighbor particle index
+         while (np_idx != -1)
+         {
+            WALBERLA_ASSERT_GREATER_EQUAL(np_idx, 0);
+            WALBERLA_ASSERT_LESS(np_idx, acForLC.size());
+
+            if (selector(uint_c(p_idx), uint_c(np_idx), acForLC))
+            {
+               func(uint_c(p_idx), uint_c(np_idx), std::forward<Args>(args)...);
+               func(uint_c(np_idx), uint_c(p_idx), std::forward<Args>(args)...);
+            }
+
+            // go to next particle
+            np_idx = acForLC.getNextParticle(uint_c(np_idx));
+         }
+
+         // go to next particle
+         p_idx = acForLC.getNextParticle(uint_c(p_idx));
+      }
+
+      // check particles in neighboring cells (only positive ones)
+      for (auto dir : stencil::D3Q27::dir_pos)
+      {
+         const int64_t nx = x + int64_c(stencil::cx[dir]);
+         const int64_t ny = y + int64_c(stencil::cy[dir]);
+         const int64_t nz = z + int64_c(stencil::cz[dir]);
+         if (nx < 0) continue;
+         if (ny < 0) continue;
+         if (nz < 0) continue;
+         if (nx >= numCellsPerDim_[0]) continue;
+         if (ny >= numCellsPerDim_[1]) continue;
+         if (nz >= numCellsPerDim_[2]) continue;
+
+         const uint64_t ncell_idx = getCellIdx(*this, nx, ny, nz); ///< neighbor cell index
+
+         p_idx = cells_[cellIdx]; ///< current particle index
+         WALBERLA_ASSERT_GREATER_EQUAL(p_idx, 0);
+         WALBERLA_ASSERT_LESS(p_idx, acForLC.size());
+         while (p_idx != -1)
+         {
+            np_idx = cells_[ncell_idx]; ///< neighbor particle index
+            while (np_idx != -1)
+            {
+               WALBERLA_ASSERT_GREATER_EQUAL(np_idx, 0);
+               WALBERLA_ASSERT_LESS(np_idx, acForLC.size());
+
+               if (selector(uint_c(p_idx), uint_c(np_idx), acForLC))
+               {
+                  func(uint_c(p_idx), uint_c(np_idx), std::forward<Args>(args)...);
+                  func(uint_c(np_idx), uint_c(p_idx), std::forward<Args>(args)...);
+               }
+
+               // go to next particle
+               np_idx = acForLC.getNextParticle(uint_c(np_idx));
+            }
+
+            // go to next particle
+            p_idx = acForLC.getNextParticle(uint_c(p_idx));
+         }
+      }
+   }
+}
+template <typename Selector, typename Accessor, typename Func, typename... Args>
+inline void SparseLinkedCells::forEachParticlePairHalf(const bool openmp, const Selector& selector, Accessor& acForLC, Func&& func, Args&&... args) const
+{
+   static_assert(std::is_base_of<data::IAccessor, Accessor>::value, "please provide a valid accessor");
+   WALBERLA_UNUSED(openmp);
+
+   for (const auto cellIdx : nonEmptyCells_)
+   {
+      int64_t x = 0;
+      int64_t y = 0;
+      int64_t z = 0;
+      getCellCoordinates(*this, cellIdx, x, y, z);
+      int p_idx = cells_[cellIdx]; ///< current particle index
+      int np_idx = -1; ///< particle to be checked against
+
+      while (p_idx != -1)
+      {
+         WALBERLA_ASSERT_GREATER_EQUAL(p_idx, 0);
+         WALBERLA_ASSERT_LESS(p_idx, acForLC.size());
+
+         // check particles in own cell
+         np_idx = acForLC.getNextParticle(uint_c(p_idx)); ///< neighbor particle index
+         while (np_idx != -1)
+         {
+            WALBERLA_ASSERT_GREATER_EQUAL(np_idx, 0);
+            WALBERLA_ASSERT_LESS(np_idx, acForLC.size());
+
+            if (selector(uint_c(p_idx), uint_c(np_idx), acForLC))
+            {
+               func(uint_c(p_idx), uint_c(np_idx), std::forward<Args>(args)...);
+            }
+
+            // go to next particle
+            np_idx = acForLC.getNextParticle(uint_c(np_idx));
+         }
+
+         // check particles in infiniteParticles list
+         np_idx = infiniteParticles_; ///< neighbor particle index
+         while (np_idx != -1)
+         {
+            WALBERLA_ASSERT_GREATER_EQUAL(np_idx, 0);
+            WALBERLA_ASSERT_LESS(np_idx, acForLC.size());
+
+            if (selector(uint_c(p_idx), uint_c(np_idx), acForLC))
+            {
+               func(uint_c(p_idx), uint_c(np_idx), std::forward<Args>(args)...);
+            }
+
+            // go to next particle
+            np_idx = acForLC.getNextParticle(uint_c(np_idx));
+         }
+
+         // go to next particle
+         p_idx = acForLC.getNextParticle(uint_c(p_idx));
+      }
+
+      // check particles in neighboring cells (only positive ones)
+      for (auto dir : stencil::D3Q27::dir_pos)
+      {
+         const int64_t nx = x + int64_c(stencil::cx[dir]);
+         const int64_t ny = y + int64_c(stencil::cy[dir]);
+         const int64_t nz = z + int64_c(stencil::cz[dir]);
+         if (nx < 0) continue;
+         if (ny < 0) continue;
+         if (nz < 0) continue;
+         if (nx >= numCellsPerDim_[0]) continue;
+         if (ny >= numCellsPerDim_[1]) continue;
+         if (nz >= numCellsPerDim_[2]) continue;
+
+         const uint64_t ncell_idx = getCellIdx(*this, nx, ny, nz); ///< neighbor cell index
+
+         p_idx = cells_[cellIdx]; ///< current particle index
+         WALBERLA_ASSERT_GREATER_EQUAL(p_idx, 0);
+         WALBERLA_ASSERT_LESS(p_idx, acForLC.size());
+         while (p_idx != -1)
+         {
+            np_idx = cells_[ncell_idx]; ///< neighbor particle index
+            while (np_idx != -1)
+            {
+               WALBERLA_ASSERT_GREATER_EQUAL(np_idx, 0);
+               WALBERLA_ASSERT_LESS(np_idx, acForLC.size());
+
+               if (selector(uint_c(p_idx), uint_c(np_idx), acForLC))
+               {
+                  func(uint_c(p_idx), uint_c(np_idx), std::forward<Args>(args)...);
+               }
+
+               // go to next particle
+               np_idx = acForLC.getNextParticle(uint_c(np_idx));
+            }
+
+            // go to next particle
+            p_idx = acForLC.getNextParticle(uint_c(p_idx));
+         }
+      }
+   }
+}
+
+} //namespace data
+} //namespace mesa_pd
+} //namespace walberla
\ No newline at end of file
diff --git a/src/mesa_pd/domain/BlockForestDomain.cpp b/src/mesa_pd/domain/BlockForestDomain.cpp
index 4339671d40ed67464635cd3957c2bd028970b6c7..12e341fd98c58bc6b312d593bea9c569b398c12b 100644
--- a/src/mesa_pd/domain/BlockForestDomain.cpp
+++ b/src/mesa_pd/domain/BlockForestDomain.cpp
@@ -32,6 +32,12 @@ namespace domain {
 /// \post neighborSubdomains_ is sorted by rank
 BlockForestDomain::BlockForestDomain(const std::shared_ptr<blockforest::BlockForest>& blockForest)
    : blockForest_(blockForest)
+{
+   refresh();
+}
+
+/// \post neighborSubdomains_ is sorted by rank
+void BlockForestDomain::refresh()
 {
    ownRank_ = mpi::MPIManager::instance()->rank();
 
@@ -41,6 +47,9 @@ BlockForestDomain::BlockForestDomain(const std::shared_ptr<blockforest::BlockFor
 
    if (blockForest_->empty()) return;
 
+   localAABBs_.clear();
+   neighborSubdomains_.clear();
+   neighborProcesses_.clear();
    unionOfLocalAABBs_ = blockForest_->begin()->getAABB();
    for (auto& iBlk : *blockForest_)
    {
diff --git a/src/mesa_pd/domain/BlockForestDomain.h b/src/mesa_pd/domain/BlockForestDomain.h
index 897b56348a8b2874a9384bb0f99fdd3ce982a914..e4d01fbb9863317c249c4f20b0aad0d54a523de0 100644
--- a/src/mesa_pd/domain/BlockForestDomain.h
+++ b/src/mesa_pd/domain/BlockForestDomain.h
@@ -35,6 +35,14 @@ class BlockForestDomain : public IDomain
 public:
    BlockForestDomain(const std::shared_ptr<blockforest::BlockForest>& blockForest);
 
+   /**
+    * @brief If the BlockForest is changed this function has to be called in order to
+    * update all interal caches!
+    *
+    * Updates the local caches for local and neighbor AABBs.
+    */
+   void refresh();
+
    bool   isContainedInProcessSubdomain(const uint_t rank, const Vec3& pt) const override;
    bool   isContainedInLocalSubdomain(const Vec3& pt, const real_t& radius) const override;
    /// Is the sphere defined by \p pt and \p radius completely inside the local subdomin?
@@ -48,6 +56,7 @@ public:
    bool   intersectsWithProcessSubdomain(const uint_t rank, const Vec3& pt, const real_t& radius) const override;
    void   correctParticlePosition(Vec3& pt) const override;
 
+   const math::AABB& getUnionOfLocalAABBs() const {return unionOfLocalAABBs_;}
    size_t getNumLocalAABBs() const {return localAABBs_.size();}
    size_t getNumNeighborSubdomains() const {return neighborSubdomains_.size();}
    size_t getNumNeighborProcesses() const {return neighborProcesses_.size();}
diff --git a/src/mesa_pd/domain/InfoCollection.h b/src/mesa_pd/domain/InfoCollection.h
index d247e65df2f019f154fed83a4b89159c229b3a85..be0d280ffdcc3125bc9c0d545324fd7ef4ead20b 100644
--- a/src/mesa_pd/domain/InfoCollection.h
+++ b/src/mesa_pd/domain/InfoCollection.h
@@ -36,7 +36,7 @@ void createWithNeighborhood(Accessor& ac, const BlockForest& bf, pe::InfoCollect
 {
    ic.clear();
 
-   mpi::BufferSystem bs( MPIManager::instance()->comm(), 756 );
+   walberla::mpi::BufferSystem bs( MPIManager::instance()->comm(), 756 );
 
    for (size_t idx = 0; idx < ac.size(); ++idx)
    {
@@ -55,23 +55,24 @@ void createWithNeighborhood(Accessor& ac, const BlockForest& bf, pe::InfoCollect
                ++info.computationalWeight;
             }
 
-         }
-
-         for (uint_t branchID = 0; branchID < 8; ++branchID)
-         {
-            const auto childID   = BlockID(block->getId(), branchID);
-            const auto childAABB = bf.getAABBFromBlockId(childID);
-            pe::BlockInfo& childInfo = ic[childID];
-            if (childAABB.contains(ac.getPosition(idx)))
+            for (uint_t branchID = 0; branchID < 8; ++branchID)
             {
-               if (data::particle_flags::isSet( ac.getFlags(idx), data::particle_flags::GHOST))
-               {
-                  ++childInfo.communicationWeight;
-               } else
+               const auto childID   = BlockID(block->getId(), branchID);
+               const auto childAABB = bf.getAABBFromBlockId(childID);
+               pe::BlockInfo& childInfo = ic[childID];
+               if (childAABB.contains(ac.getPosition(idx)))
                {
-                  ++childInfo.computationalWeight;
+                  if (data::particle_flags::isSet( ac.getFlags(idx), data::particle_flags::GHOST))
+                  {
+                     ++childInfo.communicationWeight;
+                  } else
+                  {
+                     ++childInfo.computationalWeight;
+                  }
+                  break; //particle can only be located within one child
                }
             }
+            break; //particle can only be located within one block
          }
       }
    }
diff --git a/src/mesa_pd/kernel/AssocToBlock.h b/src/mesa_pd/kernel/AssocToBlock.h
new file mode 100644
index 0000000000000000000000000000000000000000..9936d57c6ca9b1f6b905ab657b6a59738c4bdb38
--- /dev/null
+++ b/src/mesa_pd/kernel/AssocToBlock.h
@@ -0,0 +1,94 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file AssocToBlock.h
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/data/IAccessor.h>
+
+#include <blockforest/BlockForest.h>
+
+namespace walberla {
+namespace mesa_pd {
+namespace kernel {
+
+/**
+ * Kernel which updates the currentBlock property of all local properties.
+ * All particles are checked against the blocks in the BlockForest and the property
+ * is set accordingly.
+ *
+ * \attention This kernel must only be run on local particles. Ghost particles do not have
+ * a corresponding block!
+ * \post currentBlock property of all local particles is up-to-date.
+ * \ingroup mesa_pd_kernel
+ */
+class AssocToBlock
+{
+public:
+   explicit AssocToBlock(const std::shared_ptr<BlockForest>& bf) : bf_(bf) {}
+
+   template <typename Accessor>
+   void operator()(const size_t i, Accessor& ac) const;
+private:
+   std::shared_ptr<BlockForest> bf_ = nullptr;
+};
+
+template <typename Accessor>
+inline void AssocToBlock::operator()(const size_t idx,
+                                     Accessor& ac) const
+{
+   blockforest::Block*& currentBlock = ac.getCurrentBlockRef(idx);
+
+   if (currentBlock != nullptr)
+   {
+      if (currentBlock->getAABB().contains(ac.getPosition(idx)))
+      {
+         return;
+      } else
+      {
+         currentBlock = nullptr;
+      }
+   }
+
+   for (auto& blk : bf_->getBlockMap())
+   {
+      if (blk.second->getAABB().contains(ac.getPosition(idx)))
+      {
+         currentBlock = blk.second.get();
+         return;
+      }
+   }
+
+   //cannot happen if called only for local particles!
+   //no "owning" block was found within the BlockForest...
+   if  (currentBlock == nullptr)
+   {
+      WALBERLA_LOG_DEVEL( ac.getPosition(idx) );
+      for (auto& blk : bf_->getBlockMap())
+      {
+         WALBERLA_LOG_DEVEL(blk.second->getAABB());
+      }
+   }
+   WALBERLA_CHECK_NOT_NULLPTR(currentBlock, ac.getPosition(idx));
+}
+
+} //namespace kernel
+} //namespace mesa_pd
+} //namespace walberla
diff --git a/src/mesa_pd/kernel/InsertParticleIntoLinkedCells.h b/src/mesa_pd/kernel/InsertParticleIntoLinkedCells.h
index 8ec5b660a98c2575101573e439b094e6371dbb9a..6f693bce6f1c33f202fae455b9920258c176a364 100644
--- a/src/mesa_pd/kernel/InsertParticleIntoLinkedCells.h
+++ b/src/mesa_pd/kernel/InsertParticleIntoLinkedCells.h
@@ -81,8 +81,8 @@ inline void InsertParticleIntoLinkedCells::operator()(const size_t p_idx, Access
       if (hash1 >= lc.numCellsPerDim_[1]) hash1 = lc.numCellsPerDim_[1] - 1;
       if (hash2 < 0) hash2 = 0;
       if (hash2 >= lc.numCellsPerDim_[2]) hash2 = lc.numCellsPerDim_[2] - 1;
-      int cell_idx = getCellIdx(lc, hash0, hash1, hash2);
-      ac.setNextParticle(p_idx, lc.cells_[uint_c(cell_idx)].exchange(int_c(p_idx)));
+      uint_t cell_idx = getCellIdx(lc, hash0, hash1, hash2);
+      ac.setNextParticle(p_idx, lc.cells_[cell_idx].exchange(int_c(p_idx)));
    }
 }
 
diff --git a/src/mesa_pd/kernel/InsertParticleIntoSparseLinkedCells.h b/src/mesa_pd/kernel/InsertParticleIntoSparseLinkedCells.h
new file mode 100644
index 0000000000000000000000000000000000000000..a473a6fbbdbf6adf200ec208a7153cc20530f514
--- /dev/null
+++ b/src/mesa_pd/kernel/InsertParticleIntoSparseLinkedCells.h
@@ -0,0 +1,95 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file InsertParticleIntoSparseLinkedCells.h
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+//======================================================================================================================
+//
+//  THIS FILE IS GENERATED - PLEASE CHANGE THE TEMPLATE !!!
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/data/IAccessor.h>
+#include <mesa_pd/data/SparseLinkedCells.h>
+
+#include <vector>
+
+namespace walberla {
+namespace mesa_pd {
+namespace kernel {
+
+/**
+ * Inserts a particle into the data::SparseLinkedCells data structure
+ *
+ * \attention Make sure to data::SparseLinkedCells::clear() the data structure before
+ * reinserting new particles.
+ *
+ * This kernel requires the following particle accessor interface
+ * \code
+ * const walberla::mesa_pd::Vec3& getPosition(const size_t p_idx) const;
+ *
+ * const walberla::mesa_pd::data::particle_flags::FlagT& getFlags(const size_t p_idx) const;
+ *
+ * const size_t& getNextParticle(const size_t p_idx) const;
+ * void setNextParticle(const size_t p_idx, const size_t& v);
+ *
+ * \endcode
+ * \ingroup mesa_pd_kernel
+ */
+class InsertParticleIntoSparseLinkedCells
+{
+public:
+   template <typename Accessor>
+   void operator()(const size_t p_idx, Accessor& ac, data::SparseLinkedCells& lc) const;
+};
+
+template <typename Accessor>
+inline void InsertParticleIntoSparseLinkedCells::operator()(const size_t p_idx, Accessor& ac, data::SparseLinkedCells& lc) const
+{
+   static_assert(std::is_base_of<data::IAccessor, Accessor>::value, "please provide a valid accessor");
+
+   const auto& minCorner = lc.domain_.minCorner();
+   if (data::particle_flags::isSet(ac.getFlags(p_idx), data::particle_flags::INFINITE))
+   {
+      ac.setNextParticle(p_idx, lc.infiniteParticles_.exchange(int_c(p_idx)));
+   } else
+   {
+      int hash0 = static_cast<int>(std::floor((ac.getPosition(p_idx)[0] - minCorner[0]) * lc.invCellDiameter_[0]));
+      int hash1 = static_cast<int>(std::floor((ac.getPosition(p_idx)[1] - minCorner[1]) * lc.invCellDiameter_[1]));
+      int hash2 = static_cast<int>(std::floor((ac.getPosition(p_idx)[2] - minCorner[2]) * lc.invCellDiameter_[2]));
+      if (hash0 < 0) hash0 = 0;
+      if (hash0 >= lc.numCellsPerDim_[0]) hash0 = lc.numCellsPerDim_[0] - 1;
+      if (hash1 < 0) hash1 = 0;
+      if (hash1 >= lc.numCellsPerDim_[1]) hash1 = lc.numCellsPerDim_[1] - 1;
+      if (hash2 < 0) hash2 = 0;
+      if (hash2 >= lc.numCellsPerDim_[2]) hash2 = lc.numCellsPerDim_[2] - 1;
+      uint64_t cell_idx = getCellIdx(lc, hash0, hash1, hash2);
+      ac.setNextParticle(p_idx, lc.cells_[cell_idx].exchange(int_c(p_idx)));
+      if (ac.getNextParticle(p_idx) == -1)
+      {
+         lc.nonEmptyCells_.emplace_back(cell_idx);
+      }
+   }
+}
+
+} //namespace kernel
+} //namespace mesa_pd
+} //namespace walberla
\ No newline at end of file
diff --git a/src/mesa_pd/mpi/SyncNextNeighborsBlockForest.cpp b/src/mesa_pd/mpi/SyncNextNeighborsBlockForest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..08bfc37ef2fe59ac0e128809d0d0bbdcd9ccf5a7
--- /dev/null
+++ b/src/mesa_pd/mpi/SyncNextNeighborsBlockForest.cpp
@@ -0,0 +1,298 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file SyncNextNeighborsBlockForest.cpp
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+//======================================================================================================================
+//
+//  THIS FILE IS GENERATED - PLEASE CHANGE THE TEMPLATE !!!
+//
+//======================================================================================================================
+
+#include "SyncNextNeighborsBlockForest.h"
+#include <mesa_pd/domain/BlockForestDomain.h>
+
+#include <mesa_pd/mpi/RemoveAndNotify.h>
+
+namespace walberla {
+namespace mesa_pd {
+namespace mpi {
+
+void correctParticlePosition(Vec3& pt,
+                             const Vec3& center,
+                             const math::AABB& domain,
+                             const std::array<bool, 3>& periodic)
+{
+   const Vec3 dis = pt - center;
+
+   if (periodic[0] && (-domain.xSize() * 0.5 > dis[0])) pt[0] += domain.xSize();
+   if (periodic[0] && (+domain.xSize() * 0.5 < dis[0])) pt[0] -= domain.xSize();
+
+   if (periodic[1] && (-domain.ySize() * 0.5 > dis[1])) pt[1] += domain.ySize();
+   if (periodic[1] && (+domain.ySize() * 0.5 < dis[1])) pt[1] -= domain.ySize();
+
+   if (periodic[2] && (-domain.zSize() * 0.5 > dis[2])) pt[2] += domain.zSize();
+   if (periodic[2] && (+domain.zSize() * 0.5 < dis[2])) pt[2] -= domain.zSize();
+}
+
+void SyncNextNeighborsBlockForest::operator()(data::ParticleStorage& ps,
+                                              const std::shared_ptr<blockforest::BlockForest>& bf,
+                                              const std::shared_ptr<domain::BlockForestDomain>& domain,
+                                              const real_t dx) const
+{
+   if (numProcesses_ == 1) return;
+
+   bs = walberla::mpi::BufferSystem( walberla::mpi::MPIManager::instance()->comm() );
+
+   for (auto& blk : bf->getBlockMap())
+   {
+      for( uint_t i = uint_t(0); i != blk.second->getNeighborhoodSize(); ++i )
+      {
+         auto nbProcessRank = blk.second->getNeighborProcess(i);
+         if (bs.sendBuffer(nbProcessRank).isEmpty())
+         {
+            // fill empty buffers with a dummy byte to force transmission
+            bs.sendBuffer(nbProcessRank) << walberla::uint8_c(0);
+         }
+      }
+   }
+
+   generateSynchronizationMessages(ps, bf, dx);
+
+   // size of buffer is unknown and changes with each send
+   bs.setReceiverInfoFromSendBufferState(false, true);
+   bs.sendAll();
+
+   // Receiving the updates for the remote rigid bodies from the connected processes
+   WALBERLA_LOG_DETAIL( "Parsing of particle synchronization response starts..." );
+   ParseMessage parseMessage;
+   for( auto it = bs.begin(); it != bs.end(); ++it )
+   {
+      walberla::uint8_t tmp;
+      it.buffer() >> tmp;
+      while( !it.buffer().isEmpty() )
+      {
+         parseMessage(it.rank(), it.buffer(), ps, *domain);
+      }
+   }
+   WALBERLA_LOG_DETAIL( "Parsing of particle synchronization response ended." );
+}
+
+void SyncNextNeighborsBlockForest::generateSynchronizationMessages(data::ParticleStorage& ps,
+                                                                   const std::shared_ptr<blockforest::BlockForest>& bf,
+                                                                   const real_t dx) const
+{
+   const uint_t ownRank = uint_c(rank_);
+   std::array<bool, 3> periodic;
+   periodic[0] = bf->isPeriodic(0);
+   periodic[1] = bf->isPeriodic(1);
+   periodic[2] = bf->isPeriodic(2);
+
+   WALBERLA_LOG_DETAIL( "Assembling of particle synchronization message starts..." );
+
+   // position update
+   for( auto pIt = ps.begin(); pIt != ps.end(); )
+   {
+      //skip all ghost particles
+      if (data::particle_flags::isSet( pIt->getFlags(), data::particle_flags::GHOST))
+      {
+         ++pIt;
+         continue;
+      }
+
+      //skip all particles that do not communicate (create ghost particles) on other processes
+      if (data::particle_flags::isSet( pIt->getFlags(), data::particle_flags::NON_COMMUNICATING))
+      {
+         ++pIt;
+         continue;
+      }
+
+      auto& currentBlock = pIt->getCurrentBlockRef();
+      WALBERLA_CHECK_NOT_NULLPTR(currentBlock);
+      if (isInsideAABB(pIt->getPosition(), pIt->getInteractionRadius() + dx, currentBlock->getAABB()))
+      {
+         //no sync needed
+         //just delete ghost particles if there are any
+
+         for (const auto& ghostOwner : pIt->getGhostOwners() )
+         {
+            auto& buffer( bs.sendBuffer(static_cast<walberla::mpi::MPIRank>(ghostOwner)) );
+
+            WALBERLA_LOG_DETAIL( "Sending removal notification for particle " << pIt->getUid() << " to process " << ghostOwner );
+
+            packNotification(buffer, ParticleRemovalNotification( *pIt ));
+         }
+
+         pIt->getGhostOwnersRef().clear();
+
+         ++pIt;
+         continue;
+      }
+
+      //correct position to make sure particle is always inside the domain!
+      //everything is decided by the master particle therefore ghost particles are not touched
+      if (!data::particle_flags::isSet( pIt->getFlags(), data::particle_flags::FIXED) &&
+          !data::particle_flags::isSet( pIt->getFlags(), data::particle_flags::GHOST))
+      {
+         bf->mapToPeriodicDomain( pIt->getPositionRef() );
+      }
+
+      // Note: At this point we know that the particle was locally owned before the position update.
+      WALBERLA_CHECK_EQUAL(pIt->getOwner(), ownRank);
+
+      WALBERLA_LOG_DETAIL( "Processing local particle " << pIt->getUid() );
+
+      // Update nearest neighbor processes.
+      auto isInsideDomain = isInsideAABB(pIt->getPosition(), pIt->getInteractionRadius(), bf->getDomain());
+      std::vector<int> ranksAlreadyTreated{int_c(ownRank)};
+      for( uint_t nb = uint_t(0); nb < currentBlock->getNeighborhoodSize(); ++nb )
+      {
+         auto nbProcessRank = currentBlock->getNeighborProcess(nb);
+         if (std::find(ranksAlreadyTreated.begin(), ranksAlreadyTreated.end(), int_c(nbProcessRank)) != ranksAlreadyTreated.end())
+         {
+            continue;
+         }
+         auto nbAABB = currentBlock->getNeighborAABB(nb);
+         auto sqDistance = isInsideDomain
+                           ? sqDistancePointToAABB(pIt->getPosition(), nbAABB)
+                           : sqDistancePointToAABBPeriodic(pIt->getPosition(), nbAABB, bf->getDomain(), periodic);
+         auto tmp = pIt->getInteractionRadius() + dx;
+         if( sqDistance <  tmp*tmp)
+         {
+            ranksAlreadyTreated.emplace_back(int_c(nbProcessRank));
+            auto ghostOwnerIt = std::find( pIt->getGhostOwners().begin(), pIt->getGhostOwners().end(), nbProcessRank );
+            if( ghostOwnerIt != pIt->getGhostOwners().end() )
+            {
+               // already a ghost there -> update
+               auto& buffer( bs.sendBuffer(nbProcessRank) );
+               WALBERLA_LOG_DETAIL( "Sending update notification for particle " << pIt->getUid() << " to process " << (nbProcessRank) );
+               packNotification(buffer, ParticleUpdateNotification( *pIt ));
+            } else
+            {
+               // no ghost there -> create ghost
+               auto& buffer( bs.sendBuffer(nbProcessRank) );
+               WALBERLA_LOG_DETAIL( "Sending shadow copy notification for particle " << pIt->getUid() << " to process " << (nbProcessRank) );
+               packNotification(buffer, ParticleCopyNotification( *pIt ));
+               pIt->getGhostOwnersRef().insert( int_c(nbProcessRank) );
+            }
+         }
+      }
+      for (auto ghostOwnerIt = pIt->getGhostOwnersRef().begin();
+           ghostOwnerIt != pIt->getGhostOwnersRef().end();
+           )
+      {
+         if (std::find(ranksAlreadyTreated.begin(),
+                       ranksAlreadyTreated.end(),
+                       int_c(*ghostOwnerIt)) == ranksAlreadyTreated.end())
+         {
+            // In case the rigid particle no longer intersects the remote process nor interacts with it but is registered,
+            // send removal notification.
+            auto& buffer( bs.sendBuffer(*ghostOwnerIt) );
+
+            WALBERLA_LOG_DETAIL( "Sending removal notification for particle " << pIt->getUid() << " to process " << *ghostOwnerIt );
+
+            packNotification(buffer, ParticleRemovalNotification( *pIt ));
+
+            ghostOwnerIt = pIt->getGhostOwnersRef().erase(ghostOwnerIt);
+
+            continue;
+         }
+         ++ghostOwnerIt;
+      }
+
+      //particle has left subdomain?
+      if (currentBlock->getAABB().contains(pIt->getPosition()))
+      {
+         // particle still is locally owned after position update.
+         WALBERLA_LOG_DETAIL( "Owner of particle " << pIt->getUid() << " is still process " << pIt->getOwner() );
+      } else
+      {
+         //find new owner
+         int ownerRank = -1;
+         for( uint_t i = uint_t(0); i != currentBlock->getNeighborhoodSize(); ++i )
+         {
+            if (currentBlock->getNeighborAABB(i).contains(pIt->getPosition()))
+            {
+               ownerRank = int_c(currentBlock->getNeighborProcess(i));
+            }
+         }
+
+         if( ownerRank != int_c(ownRank) )
+         {
+            WALBERLA_LOG_DETAIL( "Local particle " << pIt->getUid() << " is no longer on process " << ownRank << " but on process " << ownerRank );
+
+            if( ownerRank < 0 )
+            {
+               // No owner found: Outflow condition.
+               WALBERLA_LOG_DETAIL( "Sending deletion notifications for particle " << pIt->getUid() << " due to outflow." );
+
+               // Registered processes receive removal notification in the remove() routine.
+               pIt = removeAndNotify( bs, ps, pIt );
+
+               continue;
+            }
+
+            WALBERLA_LOG_DETAIL( "Sending migration notification for particle " << pIt->getUid() << " to process " << ownerRank << "." );
+            //WALBERLA_LOG_DETAIL( "Process registration list before migration: " << pIt->getGhostOwners() );
+
+            // Set new owner and transform to ghost particle
+            pIt->setOwner(ownerRank);
+            data::particle_flags::set( pIt->getFlagsRef(), data::particle_flags::GHOST );
+
+            // currently position is mapped to periodically to global domain,
+            // this might not be the correct position for a ghost particle
+            correctParticlePosition( pIt->getPositionRef(), currentBlock->getAABB().center(), bf->getDomain(), periodic );
+
+            // Correct registration list (exclude new owner and us - the old owner) and
+            // notify registered processes (except for new owner) of (remote) migration since they possess a ghost particle.
+            auto ownerIt = std::find( pIt->getGhostOwners().begin(), pIt->getGhostOwners().end(), ownerRank );
+            WALBERLA_CHECK_UNEQUAL(ownerIt, pIt->getGhostOwners().end(), "New owner has to be former ghost owner!" );
+
+            pIt->getGhostOwnersRef().erase( ownerIt );
+
+            for( auto ghostRank : pIt->getGhostOwners() )
+            {
+               auto& buffer( bs.sendBuffer(static_cast<walberla::mpi::MPIRank>(ghostRank)) );
+
+               WALBERLA_LOG_DETAIL( "Sending remote migration notification for particle " << pIt->getUid() <<
+                                    " to process " << ghostRank );
+
+               packNotification(buffer, ParticleRemoteMigrationNotification( *pIt, ownerRank ));
+            }
+
+            pIt->getGhostOwnersRef().insert( int_c(ownRank) );
+
+            // Send migration notification to new owner
+            auto& buffer( bs.sendBuffer(ownerRank) );
+            packNotification(buffer, ParticleMigrationNotification( *pIt ));
+
+            pIt->getGhostOwnersRef().clear();
+
+            continue;
+         }
+      }
+
+      ++pIt;
+   }
+
+   WALBERLA_LOG_DETAIL( "Assembling of particle synchronization message ended." );
+}
+
+}  // namespace mpi
+}  // namespace mesa_pd
+}  // namespace walberla
diff --git a/src/mesa_pd/mpi/SyncNextNeighborsBlockForest.h b/src/mesa_pd/mpi/SyncNextNeighborsBlockForest.h
new file mode 100644
index 0000000000000000000000000000000000000000..02ad7557d8c236a216e62db7197920bcecedb65f
--- /dev/null
+++ b/src/mesa_pd/mpi/SyncNextNeighborsBlockForest.h
@@ -0,0 +1,81 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file SyncNextNeighborsBlockForest.h
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+//======================================================================================================================
+//
+//  THIS FILE IS GENERATED - PLEASE CHANGE THE TEMPLATE !!!
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/data/Flags.h>
+#include <mesa_pd/data/ParticleStorage.h>
+#include <mesa_pd/domain/BlockForestDomain.h>
+#include <mesa_pd/mpi/notifications/PackNotification.h>
+#include <mesa_pd/mpi/notifications/ParseMessage.h>
+#include <mesa_pd/mpi/notifications/ParticleCopyNotification.h>
+#include <mesa_pd/mpi/notifications/ParticleMigrationNotification.h>
+#include <mesa_pd/mpi/notifications/ParticleRemoteMigrationNotification.h>
+#include <mesa_pd/mpi/notifications/ParticleRemovalNotification.h>
+#include <mesa_pd/mpi/notifications/ParticleUpdateNotification.h>
+
+#include <blockforest/BlockForest.h>
+
+#include <core/mpi/BufferSystem.h>
+#include <core/logging/Logging.h>
+
+namespace walberla {
+namespace mesa_pd {
+namespace mpi {
+
+/**
+ * Kernel which updates all ghost particles.
+ *
+ * \ingroup mesa_pd_mpi
+ */
+class SyncNextNeighborsBlockForest
+{
+public:
+   void operator()(data::ParticleStorage& ps,
+                   const std::shared_ptr<blockforest::BlockForest>& blockforest,
+                   const std::shared_ptr<domain::BlockForestDomain>& domain,
+                   const real_t dx = real_t(0)) const;
+
+   int64_t getBytesSent() const { return bs.getBytesSent(); }
+   int64_t getBytesReceived() const { return bs.getBytesReceived(); }
+
+   int64_t getNumberOfSends() const { return bs.getNumberOfSends(); }
+   int64_t getNumberOfReceives() const { return bs.getNumberOfReceives(); }
+private:
+   void generateSynchronizationMessages(data::ParticleStorage& ps,
+                                        const std::shared_ptr<blockforest::BlockForest>& blockforest,
+                                        const real_t dx) const;
+
+   mutable walberla::mpi::BufferSystem bs = walberla::mpi::BufferSystem( walberla::mpi::MPIManager::instance()->comm() );
+
+   int numProcesses_ = walberla::mpi::MPIManager::instance()->numProcesses();
+   int rank_         = walberla::mpi::MPIManager::instance()->rank();
+};
+
+}  // namespace mpi
+}  // namespace mesa_pd
+}  // namespace walberla
diff --git a/src/pe/fcd/AnalyticCollisionDetection.h b/src/pe/fcd/AnalyticCollisionDetection.h
index 848134d4855e784acdb7065164e76dd9c46da6ba..2fbda8d98e6a67f689445706cc91d929f0b5959d 100644
--- a/src/pe/fcd/AnalyticCollisionDetection.h
+++ b/src/pe/fcd/AnalyticCollisionDetection.h
@@ -33,7 +33,6 @@
 #include "pe/utility/BodyCast.h"
 
 #include "core/debug/Debug.h"
-#include "core/math/RotationMatrix.h"
 #include "core/math/Shims.h"
 #include "geometry/GeometricalFunctions.h"
 
diff --git a/tests/mesa_pd/CMakeLists.txt b/tests/mesa_pd/CMakeLists.txt
index 6fbc2d0259ae524ff24fda49b1b00e2366ba3a18..5555ac9cfdae03f92baf272ed9c59564d6aed5d5 100644
--- a/tests/mesa_pd/CMakeLists.txt
+++ b/tests/mesa_pd/CMakeLists.txt
@@ -40,9 +40,15 @@ waLBerla_execute_test( NAME   MESA_PD_ContactDetection PROCESSES 8 )
 waLBerla_compile_test( NAME   MESA_PD_Data_Flags FILES data/Flags.cpp DEPENDS core )
 waLBerla_execute_test( NAME   MESA_PD_Data_Flags )
 
+waLBerla_compile_test( NAME   MESA_PD_Data_LinkedCells FILES data/LinkedCells.cpp DEPENDS core )
+waLBerla_execute_test( NAME   MESA_PD_Data_LinkedCells )
+
 waLBerla_compile_test( NAME   MESA_PD_Data_ParticleStorage FILES data/ParticleStorage.cpp DEPENDS core )
 waLBerla_execute_test( NAME   MESA_PD_Data_ParticleStorage )
 
+waLBerla_compile_test( NAME   MESA_PD_Data_SparseLinkedCells FILES data/SparseLinkedCells.cpp DEPENDS core )
+waLBerla_execute_test( NAME   MESA_PD_Data_SparseLinkedCells )
+
 waLBerla_compile_test( NAME   MESA_PD_Domain_BlockForestDomain FILES domain/BlockForestDomain.cpp DEPENDS blockforest core )
 waLBerla_execute_test( NAME   MESA_PD_Domain_BlockForestDomain )
 
@@ -140,6 +146,9 @@ waLBerla_execute_test( NAME   MESA_PD_Kernel_SyncGhostOwnersLarge PROCESSES 27 )
 waLBerla_compile_test( NAME   MESA_PD_Kernel_SyncNextNeighbors FILES kernel/SyncNextNeighbors.cpp DEPENDS core )
 waLBerla_execute_test( NAME   MESA_PD_Kernel_SyncNextNeighbors PROCESSES 27 )
 
+waLBerla_compile_test( NAME   MESA_PD_Kernel_SyncNextNeighborsBlockForest FILES kernel/SyncNextNeighborsBlockForest.cpp DEPENDS core )
+waLBerla_execute_test( NAME   MESA_PD_Kernel_SyncNextNeighborsBlockForest PROCESSES 27 )
+
 waLBerla_compile_test( NAME   MESA_PD_Kernel_TemperatureIntegration FILES kernel/TemperatureIntegration.cpp DEPENDS core )
 waLBerla_execute_test( NAME   MESA_PD_Kernel_TemperatureIntegration )
 
diff --git a/tests/mesa_pd/data/LinkedCells.cpp b/tests/mesa_pd/data/LinkedCells.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0653c116d62ada8102e033d472aa773b8aa2b8d5
--- /dev/null
+++ b/tests/mesa_pd/data/LinkedCells.cpp
@@ -0,0 +1,65 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file   LinkedCells.cpp
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+#include <mesa_pd/data/LinkedCells.h>
+
+#include <core/Environment.h>
+#include <core/logging/Logging.h>
+
+#include <algorithm>
+#include <iostream>
+
+namespace walberla {
+namespace mesa_pd {
+
+int main( int argc, char ** argv )
+{
+   Environment env(argc, argv);
+   WALBERLA_UNUSED(env);
+   mpi::MPIManager::instance()->useWorldComm();
+
+   data::LinkedCells lc(math::AABB(real_t(0),real_t(0),real_t(0),
+                                   real_t(4.5),real_t(5.5),real_t(6.5)),
+                        real_t(1));
+
+   WALBERLA_CHECK_EQUAL(lc.numCellsPerDim_[0], 5);
+   WALBERLA_CHECK_EQUAL(lc.numCellsPerDim_[1], 6);
+   WALBERLA_CHECK_EQUAL(lc.numCellsPerDim_[2], 7);
+
+   auto cellIdx = data::getCellIdx(lc, 2, 1, 3);
+   WALBERLA_CHECK_EQUAL(cellIdx, 90 + 5 + 2);
+   int64_t x = 0;
+   int64_t y = 0;
+   int64_t z = 0;
+   data::getCellCoordinates(lc, cellIdx, x, y, z);
+   WALBERLA_CHECK_EQUAL(x, 2);
+   WALBERLA_CHECK_EQUAL(y, 1);
+   WALBERLA_CHECK_EQUAL(z, 3);
+
+   return EXIT_SUCCESS;
+}
+
+} //namespace mesa_pd
+} //namespace walberla
+
+int main( int argc, char ** argv )
+{
+   return walberla::mesa_pd::main(argc, argv);
+}
diff --git a/tests/mesa_pd/data/SparseLinkedCells.cpp b/tests/mesa_pd/data/SparseLinkedCells.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..090e739e2ce10b74c929dddf53f85a4b7115e5a7
--- /dev/null
+++ b/tests/mesa_pd/data/SparseLinkedCells.cpp
@@ -0,0 +1,65 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file   SparseLinkedCells.cpp
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+#include <mesa_pd/data/SparseLinkedCells.h>
+
+#include <core/Environment.h>
+#include <core/logging/Logging.h>
+
+#include <algorithm>
+#include <iostream>
+
+namespace walberla {
+namespace mesa_pd {
+
+int main( int argc, char ** argv )
+{
+   Environment env(argc, argv);
+   WALBERLA_UNUSED(env);
+   mpi::MPIManager::instance()->useWorldComm();
+
+   data::SparseLinkedCells lc(math::AABB(real_t(0),real_t(0),real_t(0),
+                                         real_t(4.5),real_t(5.5),real_t(6.5)),
+                              real_t(1));
+
+   WALBERLA_CHECK_EQUAL(lc.numCellsPerDim_[0], 5);
+   WALBERLA_CHECK_EQUAL(lc.numCellsPerDim_[1], 6);
+   WALBERLA_CHECK_EQUAL(lc.numCellsPerDim_[2], 7);
+
+   auto cellIdx = data::getCellIdx(lc, 2, 1, 3);
+   WALBERLA_CHECK_EQUAL(cellIdx, 90 + 5 + 2);
+   int64_t x = 0;
+   int64_t y = 0;
+   int64_t z = 0;
+   data::getCellCoordinates(lc, cellIdx, x, y, z);
+   WALBERLA_CHECK_EQUAL(x, 2);
+   WALBERLA_CHECK_EQUAL(y, 1);
+   WALBERLA_CHECK_EQUAL(z, 3);
+
+   return EXIT_SUCCESS;
+}
+
+} //namespace mesa_pd
+} //namespace walberla
+
+int main( int argc, char ** argv )
+{
+   return walberla::mesa_pd::main(argc, argv);
+}
diff --git a/tests/mesa_pd/kernel/DetectAndStoreContacts.cpp b/tests/mesa_pd/kernel/DetectAndStoreContacts.cpp
index 6fba0bba4a46153e8071ee1a8381be2b107498ac..25b192a539cf338a6de7b987cb748857393670b0 100644
--- a/tests/mesa_pd/kernel/DetectAndStoreContacts.cpp
+++ b/tests/mesa_pd/kernel/DetectAndStoreContacts.cpp
@@ -36,8 +36,7 @@
 #include <iostream>
 
 namespace walberla {
-
-using namespace walberla::mesa_pd;
+namespace mesa_pd {
 
 class ParticleAccessorWithShape : public data::ParticleAccessor
 {
@@ -120,9 +119,10 @@ int main( int argc, char ** argv )
    return EXIT_SUCCESS;
 }
 
+} //namespace mesa_pd
 } //namespace walberla
 
 int main( int argc, char ** argv )
 {
-   return walberla::main(argc, argv);
+   return walberla::mesa_pd::main(argc, argv);
 }
diff --git a/tests/mesa_pd/kernel/GenerateLinkedCells.cpp b/tests/mesa_pd/kernel/GenerateLinkedCells.cpp
index ef4724b5e057345b8007286c18ed603b1e071802..c54473cb0af6fb7a848ba5f829abaef5f6816874 100644
--- a/tests/mesa_pd/kernel/GenerateLinkedCells.cpp
+++ b/tests/mesa_pd/kernel/GenerateLinkedCells.cpp
@@ -80,9 +80,9 @@ int main( int argc, char ** argv )
          for (int y = 0; y < linkedCells.numCellsPerDim_[1]; ++y)
             for (int z = 0; z < linkedCells.numCellsPerDim_[2]; ++z)
             {
-               const int cell_idx = getCellIdx(linkedCells, x, y, z);
+               const uint_t cell_idx = getCellIdx(linkedCells, x, y, z);
                auto aabb = getCellAABB(linkedCells, x, y, z);
-               int p_idx = linkedCells.cells_[uint_c(cell_idx)];
+               int p_idx = linkedCells.cells_[cell_idx];
                while (p_idx != -1)
                {
                   ++particleCounter;
diff --git a/tests/mesa_pd/kernel/HCSITSKernels.cpp b/tests/mesa_pd/kernel/HCSITSKernels.cpp
index 2977c398d114905ad0623fd4057b6ba069179837..c3cb9fd6d800cb94e0e879a7b8ac5bd81321180b 100644
--- a/tests/mesa_pd/kernel/HCSITSKernels.cpp
+++ b/tests/mesa_pd/kernel/HCSITSKernels.cpp
@@ -50,90 +50,89 @@
 #include <iostream>
 
 namespace walberla {
+namespace mesa_pd {
 
-using namespace walberla::mesa_pd;
-
-   class ParticleAccessorWithShape : public data::ParticleAccessor
-   {
-      public:
-      ParticleAccessorWithShape(std::shared_ptr<data::ParticleStorage>& ps, std::shared_ptr<data::ShapeStorage>& ss)
-              : ParticleAccessor(ps)
-              , ss_(ss)
-      {}
-
-      const real_t& getMass(const size_t p_idx) const {return ss_->shapes[ps_->getShapeIDRef(p_idx)]->getMass();}
-      const real_t& getInvMass(const size_t p_idx) const {return ss_->shapes[ps_->getShapeIDRef(p_idx)]->getInvMass();}
-
-      const Mat3& getInertia(const size_t p_idx) const {return ss_->shapes[ps_->getShapeIDRef(p_idx)]->getInertiaBF();}
-      const Mat3& getInvInertia(const size_t p_idx) const {return ss_->shapes[ps_->getShapeIDRef(p_idx)]->getInvInertiaBF();}
-
-      data::BaseShape* getShape(const size_t p_idx) const {return ss_->shapes[ps_->getShapeIDRef(p_idx)].get();}
-      private:
-      std::shared_ptr<data::ShapeStorage> ss_;
-   };
-
-   template<typename PStorage, typename CStorage, typename PAccessor, typename CAccessor>
-   class TestHCSITSKernel {
-      public:
-      TestHCSITSKernel(PStorage &ps_, CStorage &cs_, PAccessor &pa_, CAccessor &ca_) : ps(ps_), cs(cs_), pa(pa_), ca(ca_),
-            erp(real_t(1.0)), model(kernel::HCSITSRelaxationStep::RelaxationModel::InelasticFrictionlessContact), contactThreshold(0), globalAcc(0) {}
-
-      void operator()(real_t dt){
-         // Perform Collision detection (call kernel, that stores contacts into cs)
-         kernel::DetectAndStoreContacts detectAndStore(cs);
-         cs.clear();
-
-         domain::InfiniteDomain domain;
-         collision_detection::AnalyticContactDetection acd;
-         acd.getContactThreshold() = contactThreshold;
-         ps.forEachParticlePairHalf(false, kernel::ExcludeInfiniteInfinite(), pa, detectAndStore, pa, domain, acd);
-
-         // Create Kernels
-         kernel::InitContactsForHCSITS initContacts(1);
-         initContacts.setFriction(0,0,real_t(0.2));
-         initContacts.setErp(real_t(erp));
-
-         kernel::InitParticlesForHCSITS initParticles;
-         initParticles.setGlobalAcceleration(globalAcc);
-
-         kernel::HCSITSRelaxationStep relaxationStep;
-         relaxationStep.setRelaxationModel(model);
-         relaxationStep.setCor(real_t(0.6)); // Only effective for PGSM
-
-         kernel::IntegrateParticlesHCSITS integration;
-
-         mesa_pd::mpi::ReduceProperty reductionKernel;
-         mesa_pd::mpi::BroadcastProperty broadcastKernel;
-
-         // Run the HCSITS loop
-         cs.forEachContact(false, kernel::SelectAll(), ca, initContacts, ca, pa);
-         ps.forEachParticle(false, kernel::SelectAll(), pa, initParticles, pa, dt);
-
-         VelocityUpdateNotification::Parameters::relaxationParam = real_t(1.0);
+class ParticleAccessorWithShape : public data::ParticleAccessor
+{
+public:
+   ParticleAccessorWithShape(std::shared_ptr<data::ParticleStorage>& ps, std::shared_ptr<data::ShapeStorage>& ss)
+      : ParticleAccessor(ps)
+      , ss_(ss)
+   {}
+
+   const real_t& getMass(const size_t p_idx) const {return ss_->shapes[ps_->getShapeIDRef(p_idx)]->getMass();}
+   const real_t& getInvMass(const size_t p_idx) const {return ss_->shapes[ps_->getShapeIDRef(p_idx)]->getInvMass();}
+
+   const Mat3& getInertia(const size_t p_idx) const {return ss_->shapes[ps_->getShapeIDRef(p_idx)]->getInertiaBF();}
+   const Mat3& getInvInertia(const size_t p_idx) const {return ss_->shapes[ps_->getShapeIDRef(p_idx)]->getInvInertiaBF();}
+
+   data::BaseShape* getShape(const size_t p_idx) const {return ss_->shapes[ps_->getShapeIDRef(p_idx)].get();}
+private:
+   std::shared_ptr<data::ShapeStorage> ss_;
+};
+
+template<typename PStorage, typename CStorage, typename PAccessor, typename CAccessor>
+class TestHCSITSKernel {
+public:
+   TestHCSITSKernel(PStorage &ps_, CStorage &cs_, PAccessor &pa_, CAccessor &ca_) : ps(ps_), cs(cs_), pa(pa_), ca(ca_),
+      erp(real_t(1.0)), model(kernel::HCSITSRelaxationStep::RelaxationModel::InelasticFrictionlessContact), contactThreshold(0), globalAcc(0) {}
+
+   void operator()(real_t dt){
+      // Perform Collision detection (call kernel, that stores contacts into cs)
+      kernel::DetectAndStoreContacts detectAndStore(cs);
+      cs.clear();
+
+      domain::InfiniteDomain domain;
+      collision_detection::AnalyticContactDetection acd;
+      acd.getContactThreshold() = contactThreshold;
+      ps.forEachParticlePairHalf(false, kernel::ExcludeInfiniteInfinite(), pa, detectAndStore, pa, domain, acd);
+
+      // Create Kernels
+      kernel::InitContactsForHCSITS initContacts(1);
+      initContacts.setFriction(0,0,real_t(0.2));
+      initContacts.setErp(real_t(erp));
+
+      kernel::InitParticlesForHCSITS initParticles;
+      initParticles.setGlobalAcceleration(globalAcc);
+
+      kernel::HCSITSRelaxationStep relaxationStep;
+      relaxationStep.setRelaxationModel(model);
+      relaxationStep.setCor(real_t(0.6)); // Only effective for PGSM
+
+      kernel::IntegrateParticlesHCSITS integration;
+
+      mesa_pd::mpi::ReduceProperty reductionKernel;
+      mesa_pd::mpi::BroadcastProperty broadcastKernel;
+
+      // Run the HCSITS loop
+      cs.forEachContact(false, kernel::SelectAll(), ca, initContacts, ca, pa);
+      ps.forEachParticle(false, kernel::SelectAll(), pa, initParticles, pa, dt);
+
+      VelocityUpdateNotification::Parameters::relaxationParam = real_t(1.0);
+      reductionKernel.operator()<VelocityCorrectionNotification>(ps);
+      broadcastKernel.operator()<VelocityUpdateNotification>(ps);
+
+      VelocityUpdateNotification::Parameters::relaxationParam = real_t(0.8);
+      for(int i = 0; i < 10; i++){
+         cs.forEachContact(false, kernel::SelectAll(), ca, relaxationStep, ca, pa, dt);
          reductionKernel.operator()<VelocityCorrectionNotification>(ps);
          broadcastKernel.operator()<VelocityUpdateNotification>(ps);
-
-         VelocityUpdateNotification::Parameters::relaxationParam = real_t(0.8);
-         for(int i = 0; i < 10; i++){
-            cs.forEachContact(false, kernel::SelectAll(), ca, relaxationStep, ca, pa, dt);
-            reductionKernel.operator()<VelocityCorrectionNotification>(ps);
-            broadcastKernel.operator()<VelocityUpdateNotification>(ps);
-         }
-         ps.forEachParticle(false, kernel::SelectAll(), pa, integration, pa, dt);
       }
+      ps.forEachParticle(false, kernel::SelectAll(), pa, integration, pa, dt);
+   }
 
-      private:
-         PStorage  &ps;
-         CStorage  &cs;
-         PAccessor &pa;
-         CAccessor &ca;
+private:
+   PStorage  &ps;
+   CStorage  &cs;
+   PAccessor &pa;
+   CAccessor &ca;
 
-      public:
-         real_t erp;
-         kernel::HCSITSRelaxationStep::RelaxationModel model;
-         real_t contactThreshold;
-         Vec3 globalAcc;
-   };
+public:
+   real_t erp;
+   kernel::HCSITSRelaxationStep::RelaxationModel model;
+   real_t contactThreshold;
+   Vec3 globalAcc;
+};
 
 
 void normalReactionTest(kernel::HCSITSRelaxationStep::RelaxationModel model)
@@ -176,11 +175,11 @@ void normalReactionTest(kernel::HCSITSRelaxationStep::RelaxationModel model)
    testHCSITS.model = model;
 
    WALBERLA_LOG_INFO(paccessor.getInvMass(0))
-   WALBERLA_LOG_INFO(paccessor.getInvMass(1))
+         WALBERLA_LOG_INFO(paccessor.getInvMass(1))
 
-   // plane at 5,5,5
-   // radius 1.1
-   p->setPosition(  Vec3(5,5,6) );
+         // plane at 5,5,5
+         // radius 1.1
+         p->setPosition(  Vec3(5,5,6) );
    p->setLinearVelocity( Vec3(0,0,0) );
    testHCSITS( real_c( real_t(1.0) ) );
    WALBERLA_CHECK_FLOAT_EQUAL( p->getPosition() , Vec3(5,5,real_t(6.1)) );
@@ -268,48 +267,48 @@ void normalReactionTest(kernel::HCSITSRelaxationStep::RelaxationModel model)
  * */
 void SphereSphereTest(kernel::HCSITSRelaxationStep::RelaxationModel model){
 
-      //init data structures
-      auto ps = std::make_shared<data::ParticleStorage>(100);
-      auto cs = std::make_shared<data::ContactStorage>(100);
-      auto ss = std::make_shared<data::ShapeStorage>();
-      ParticleAccessorWithShape paccessor(ps, ss);
-      data::ContactAccessor caccessor(cs);
-      auto density = real_t(7.874);
-      auto radius = real_t(1.1);
-
-      auto smallSphere = ss->create<data::Sphere>( radius );
-      ss->shapes[smallSphere]->updateMassAndInertia( density );
-
-      auto dt = real_t(1);
-
-      // Create two slightly overlapping spheres in a row (located at x=0,2)
-      auto p = ps->create();
-      p->getPositionRef()          = Vec3(real_t(0), real_t(0), real_t(0));
-      p->getShapeIDRef()           = smallSphere;
-      p->getOwnerRef()             = walberla::mpi::MPIManager::instance()->rank();
-      p->getLinearVelocityRef()    = Vec3(real_t(1), real_t(0), real_t(0));
-      p->getTypeRef()              = 0;
-      auto p2 = ps->create();
-      p2->getPositionRef()          = Vec3(real_t(2), real_t(0), real_t(0));
-      p2->getShapeIDRef()           = smallSphere;
-      p2->getOwnerRef()             = walberla::mpi::MPIManager::instance()->rank();
-      p2->getLinearVelocityRef() = Vec3(real_t(-1), real_t(0), real_t(0));
-      p2->getTypeRef()              = 0;
-      TestHCSITSKernel<data::ParticleStorage, data::ContactStorage, ParticleAccessorWithShape, data::ContactAccessor> testHCSITS(*ps, *cs, paccessor, caccessor);
-      testHCSITS.model = model;
-      testHCSITS(dt);
+   //init data structures
+   auto ps = std::make_shared<data::ParticleStorage>(100);
+   auto cs = std::make_shared<data::ContactStorage>(100);
+   auto ss = std::make_shared<data::ShapeStorage>();
+   ParticleAccessorWithShape paccessor(ps, ss);
+   data::ContactAccessor caccessor(cs);
+   auto density = real_t(7.874);
+   auto radius = real_t(1.1);
 
-      WALBERLA_CHECK_FLOAT_EQUAL(p->getPosition(), Vec3(real_t(-0.1),0,0));
-      WALBERLA_CHECK_FLOAT_EQUAL(p->getLinearVelocity(), Vec3(real_t(-0.1),0,0));
-      WALBERLA_CHECK_FLOAT_EQUAL(p->getAngularVelocity(), Vec3(0,0,0));
-      WALBERLA_CHECK_FLOAT_EQUAL(p2->getPosition(), Vec3(real_t(2.1),0,0));
-      WALBERLA_CHECK_FLOAT_EQUAL(p2->getLinearVelocity(), Vec3(real_t(0.1),0,0))
-      WALBERLA_CHECK_FLOAT_EQUAL(p2->getAngularVelocity(), Vec3(0,0,0));
+   auto smallSphere = ss->create<data::Sphere>( radius );
+   ss->shapes[smallSphere]->updateMassAndInertia( density );
 
-      WALBERLA_LOG_INFO(p->getPosition());
-      WALBERLA_LOG_INFO(p->getLinearVelocity());
-      WALBERLA_LOG_INFO(p2->getPosition());
-      WALBERLA_LOG_INFO(p2->getLinearVelocity());
+   auto dt = real_t(1);
+
+   // Create two slightly overlapping spheres in a row (located at x=0,2)
+   auto p = ps->create();
+   p->getPositionRef()          = Vec3(real_t(0), real_t(0), real_t(0));
+   p->getShapeIDRef()           = smallSphere;
+   p->getOwnerRef()             = walberla::mpi::MPIManager::instance()->rank();
+   p->getLinearVelocityRef()    = Vec3(real_t(1), real_t(0), real_t(0));
+   p->getTypeRef()              = 0;
+   auto p2 = ps->create();
+   p2->getPositionRef()          = Vec3(real_t(2), real_t(0), real_t(0));
+   p2->getShapeIDRef()           = smallSphere;
+   p2->getOwnerRef()             = walberla::mpi::MPIManager::instance()->rank();
+   p2->getLinearVelocityRef() = Vec3(real_t(-1), real_t(0), real_t(0));
+   p2->getTypeRef()              = 0;
+   TestHCSITSKernel<data::ParticleStorage, data::ContactStorage, ParticleAccessorWithShape, data::ContactAccessor> testHCSITS(*ps, *cs, paccessor, caccessor);
+   testHCSITS.model = model;
+   testHCSITS(dt);
+
+   WALBERLA_CHECK_FLOAT_EQUAL(p->getPosition(), Vec3(real_t(-0.1),0,0));
+   WALBERLA_CHECK_FLOAT_EQUAL(p->getLinearVelocity(), Vec3(real_t(-0.1),0,0));
+   WALBERLA_CHECK_FLOAT_EQUAL(p->getAngularVelocity(), Vec3(0,0,0));
+   WALBERLA_CHECK_FLOAT_EQUAL(p2->getPosition(), Vec3(real_t(2.1),0,0));
+   WALBERLA_CHECK_FLOAT_EQUAL(p2->getLinearVelocity(), Vec3(real_t(0.1),0,0))
+         WALBERLA_CHECK_FLOAT_EQUAL(p2->getAngularVelocity(), Vec3(0,0,0));
+
+   WALBERLA_LOG_INFO(p->getPosition());
+   WALBERLA_LOG_INFO(p->getLinearVelocity());
+   WALBERLA_LOG_INFO(p2->getPosition());
+   WALBERLA_LOG_INFO(p2->getLinearVelocity());
 }
 
 /**
@@ -477,9 +476,10 @@ int main( int argc, char ** argv )
    return EXIT_SUCCESS;
 }
 
+} //namespace mesa_pd
 } //namespace walberla
 
 int main( int argc, char ** argv )
 {
-   return walberla::main(argc, argv);
+   return walberla::mesa_pd::main(argc, argv);
 }
diff --git a/tests/mesa_pd/kernel/HeatConduction.cpp b/tests/mesa_pd/kernel/HeatConduction.cpp
index 466ae1f582b972d69418a769ca4ed554b315805e..9c8c4cd9e29419d051ef4b78f0b99c3b53c2a445 100644
--- a/tests/mesa_pd/kernel/HeatConduction.cpp
+++ b/tests/mesa_pd/kernel/HeatConduction.cpp
@@ -28,8 +28,7 @@
 #include <iostream>
 
 namespace walberla {
-
-using namespace walberla::mesa_pd;
+namespace mesa_pd {
 
 int main( int argc, char ** argv )
 {
@@ -71,9 +70,10 @@ int main( int argc, char ** argv )
    return EXIT_SUCCESS;
 }
 
+} //namespace mesa_pd
 } //namespace walberla
 
 int main( int argc, char ** argv )
 {
-   return walberla::main(argc, argv);
+   return walberla::mesa_pd::main(argc, argv);
 }
diff --git a/tests/mesa_pd/kernel/SyncNextNeighbors.cpp b/tests/mesa_pd/kernel/SyncNextNeighbors.cpp
index 93a7d6768ed2fb294c556cae0a6697d17c802d33..c6de32e05bc9cd98ffe189614ec9c9b58735f1e6 100644
--- a/tests/mesa_pd/kernel/SyncNextNeighbors.cpp
+++ b/tests/mesa_pd/kernel/SyncNextNeighbors.cpp
@@ -70,8 +70,15 @@ int main( int argc, char ** argv )
    //init domain partitioning
    auto forest = blockforest::createBlockForest( AABB(-15,-15,-15,15,15,15), // simulation domain
                                                  Vector3<uint_t>(3,3,3), // blocks in each direction
-                                                 Vector3<bool>(true, true, true) // periodicity
+                                                 Vector3<bool>(true, true, true), // periodicity
+                                                 27, // number of processes
+                                                 1 // initial refinement
                                                  );
+   math::AABB localDomain = forest->begin()->getAABB();
+   for (auto& iBlk : *forest)
+   {
+      localDomain.merge(iBlk.getAABB());
+   }
    domain::BlockForestDomain domain(forest);
    std::array< bool, 3 > periodic;
    periodic[0] = forest->isPeriodic(0);
@@ -118,10 +125,10 @@ int main( int argc, char ** argv )
       SNN(ps, domain);
 
       //check
-      if (sqDistancePointToAABBPeriodic(pos, forest->begin()->getAABB(), forest->getDomain(), periodic) <= radius * radius)
+      if (sqDistancePointToAABBPeriodic(pos, localDomain, forest->getDomain(), periodic) <= radius * radius)
       {
          WALBERLA_CHECK_EQUAL(ps.size(), 1);
-         if (forest->begin()->getAABB().contains(pos))
+         if (localDomain.contains(pos))
          {
             WALBERLA_CHECK(!data::particle_flags::isSet(ps.begin()->getFlags(), data::particle_flags::GHOST));
          } else
diff --git a/tests/mesa_pd/kernel/SyncNextNeighborsBlockForest.cpp b/tests/mesa_pd/kernel/SyncNextNeighborsBlockForest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6dfde66aa5e42a212d406c1798a119f55e40ae86
--- /dev/null
+++ b/tests/mesa_pd/kernel/SyncNextNeighborsBlockForest.cpp
@@ -0,0 +1,162 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file   SyncNextNeighborsBlockForest.cpp
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+#include <mesa_pd/data/ParticleAccessor.h>
+#include <mesa_pd/data/ParticleStorage.h>
+#include <mesa_pd/domain/BlockForestDomain.h>
+#include <mesa_pd/kernel/AssocToBlock.h>
+#include <mesa_pd/kernel/ParticleSelector.h>
+#include <mesa_pd/mpi/SyncNextNeighborsBlockForest.h>
+
+#include <blockforest/BlockForest.h>
+#include <blockforest/Initialization.h>
+#include <core/Environment.h>
+#include <core/logging/Logging.h>
+#include <core/mpi/Reduce.h>
+
+#include <iostream>
+#include <memory>
+
+namespace walberla {
+namespace mesa_pd {
+
+const real_t radius = real_t(1);
+
+walberla::id_t createSphere(data::ParticleStorage& ps, domain::IDomain& domain)
+{
+   walberla::id_t uid = 0;
+   auto owned = domain.isContainedInProcessSubdomain( uint_c(walberla::mpi::MPIManager::instance()->rank()), Vec3(0,0,0) );
+   if (owned)
+   {
+      data::Particle&& p          = *ps.create();
+      p.getPositionRef()          = Vec3(0,0,0);
+      p.getInteractionRadiusRef() = radius;
+      p.getRotationRef()          = Rot3(Quat());
+      p.getLinearVelocityRef()    = Vec3(1,2,3);
+      p.getAngularVelocityRef()   = Vec3(4,5,6);
+      p.getOwnerRef()             = walberla::mpi::MPIManager::instance()->rank();
+      uid = p.getUid();
+      WALBERLA_LOG_DETAIL("SPHERE CREATED");
+   }
+
+   walberla::mpi::allReduceInplace(uid, walberla::mpi::SUM);
+   return uid;
+}
+
+int main( int argc, char ** argv )
+{
+   Environment env(argc, argv);
+   WALBERLA_UNUSED(env);
+   walberla::mpi::MPIManager::instance()->useWorldComm();
+
+   //logging::Logging::instance()->setStreamLogLevel(logging::Logging::DETAIL);
+//   logging::Logging::instance()->includeLoggingToFile("MESA_PD_Kernel_SyncNextNeighbor");
+//   logging::Logging::instance()->setFileLogLevel(logging::Logging::DETAIL);
+
+   //init domain partitioning
+   auto forest = blockforest::createBlockForest( AABB(-15,-15,-15,15,15,15), // simulation domain
+                                                 Vector3<uint_t>(3,3,3), // blocks in each direction
+                                                 Vector3<bool>(true, true, true), // periodicity
+                                                 27, // number of processes
+                                                 1 // initial refinement
+                                                 );
+   //checking blocks distribution
+   WALBERLA_CHECK_EQUAL(forest->size(), 8);
+
+   auto domain = std::make_shared<domain::BlockForestDomain>(forest);
+   std::array< bool, 3 > periodic;
+   periodic[0] = forest->isPeriodic(0);
+   periodic[1] = forest->isPeriodic(1);
+   periodic[2] = forest->isPeriodic(2);
+
+   //init data structures
+   auto ps = std::make_shared<data::ParticleStorage>(100);
+   data::ParticleAccessor ac(ps);
+
+   //initialize particle
+   auto uid = createSphere(*ps, *domain);
+   WALBERLA_LOG_DEVEL_ON_ROOT("uid: " << uid);
+
+   //init kernels
+   kernel::AssocToBlock   assoc(forest);
+   mpi::SyncNextNeighborsBlockForest SNN;
+
+   std::vector<real_t> deltas { real_t(0.1),
+            real_t(4.9),
+            real_t(5.1),
+            real_t(9.9),
+            real_t(10.1),
+            real_t(14.9),
+            real_t(-14.9),
+            real_t(-10.1),
+            real_t(-9.9),
+            real_t(-5.1),
+            real_t(-4.9),
+            real_t(-0.1)};
+
+   for (auto delta : deltas)
+   {
+      WALBERLA_LOG_DEVEL_ON_ROOT(delta);
+
+      ps->forEachParticle(false, kernel::SelectLocal(), ac, assoc, ac);
+
+      auto pos = Vec3(1,-1,1) * delta;
+      WALBERLA_LOG_DETAIL("checking position: " << pos);
+      // owner moves particle to new position
+      auto pIt = ps->find(uid);
+      if (pIt != ps->end())
+      {
+         if (!data::particle_flags::isSet(pIt->getFlags(), data::particle_flags::GHOST))
+         {
+            pIt->setPosition(pos);
+         }
+      }
+
+      //sync
+      SNN(*ps, forest, domain);
+
+      //check
+      if (sqDistancePointToAABBPeriodic(pos, domain->getUnionOfLocalAABBs(), forest->getDomain(), periodic) <= radius * radius)
+      {
+         WALBERLA_CHECK_EQUAL(ps->size(), 1);
+         if (domain->getUnionOfLocalAABBs().contains(pos))
+         {
+            WALBERLA_CHECK(!data::particle_flags::isSet(ps->begin()->getFlags(), data::particle_flags::GHOST));
+         } else
+         {
+            WALBERLA_CHECK(data::particle_flags::isSet(ps->begin()->getFlags(), data::particle_flags::GHOST));
+         }
+      } else
+      {
+         WALBERLA_CHECK_EQUAL(ps->size(), 0);
+      }
+   }
+
+
+   return EXIT_SUCCESS;
+}
+
+} //namespace mesa_pd
+} //namespace walberla
+
+int main( int argc, char ** argv )
+{
+   return walberla::mesa_pd::main(argc, argv);
+}