diff --git a/src/core/timing/TimingNode.h b/src/core/timing/TimingNode.h
index 2f0597f7a591b12907932942e76f107b4f90015d..6f880d7e457919a74a33ada2a53cbd2e19220ea3 100644
--- a/src/core/timing/TimingNode.h
+++ b/src/core/timing/TimingNode.h
@@ -111,7 +111,7 @@ void TimingNode<TP>::swap(TimingNode<TP>& tt)
     }
 }
 
-/// Finds the spezified timer in the timing hierarchy
+/// Finds the specified timer in the timing hierarchy
 /// \param name timer name which may include more than one hierarchy separated by "."
 /// \code findTimer(tn, "firstLevel.secondLevel.thirdLevel.timerName"); \endcode
 /// \relates TimingNode
@@ -130,6 +130,31 @@ const Timer<TP>& findTimer( const TimingNode<TP>& tn, const std::string& name)
    }
 }
 
+/// Checks if the specified timer exists in the timing hierarchy
+/// \param name timer name which may include more than one hierarchy separated by "."
+/// \code timerExists(tn, "firstLevel.secondLevel.thirdLevel.timerName"); \endcode
+/// \relates TimingNode
+template< typename TP >  // Timing policy
+bool timerExists( const TimingNode<TP>& tn, const std::string& name )
+{
+   auto pos = name.find_first_of(".");
+   if (pos != std::string::npos)
+   {
+      if( tn.tree_.find(name.substr(0, pos)) != tn.tree_.end() )
+      {
+         return timerExists( tn.tree_.at(name.substr(0, pos)), name.substr(pos+1, std::string::npos));
+      }
+      else
+      {
+         return false;
+      }
+   }
+   else
+   {
+      return tn.tree_.find(name) != tn.tree_.end();
+   }
+}
+
 /// Resets the timer in the TimingNode structure and all sub timers
 /// \relates TimingNode
 template< typename TP >  // Timing policy
diff --git a/src/core/timing/TimingTree.h b/src/core/timing/TimingTree.h
index f34fc7f8845dd0e96c91ceb8038aafc5c4fb9f10..7e1e59c5bde590d14dc0b26a0f3bfe0a26a07907 100644
--- a/src/core/timing/TimingTree.h
+++ b/src/core/timing/TimingTree.h
@@ -59,13 +59,13 @@ public:
 
    void swap(TimingTree<TP>& tt);
 
-   /// Starts a timer beneath the current hirarchy level
+   /// Starts a timer beneath the current hierarchy level
    void start(const std::string& name);
-   /// Stops the last started timer and jumps back one hirarchy level
+   /// Stops the last started timer and jumps back one hierarchy level
    void stop(const std::string& name);
    /// Checks if specified timer is currently running.
    bool isTimerRunning(const std::string& name) const;
-   /// Resets the the timing hirarchy
+   /// Resets the the timing hierarchy
    void reset();
 
    //** Reduction ******************************************************************************************************
@@ -81,7 +81,9 @@ public:
 
    /// Returns the raw tree data structure
    const TimingNode<TP>& getRawData() const;
+
    const Timer<TP>& operator[](const std::string& name) const;
+   inline bool timerExists ( const std::string & n ) const;
 
    /// Returns the name of the currently running timer
    /// Might be expensive due to value search.
@@ -89,7 +91,7 @@ public:
 private:
    /// Tree data structure
    TimingNode<TP>  root_;
-   /// Pointer to the current hirarchy level.
+   /// Pointer to the current hierarchy level.
    TimingNode<TP>* current_;
 };
 
@@ -204,7 +206,7 @@ const TimingNode<TP>& TimingTree<TP>::getRawData() const
    return root_;
 }
 
-/// Finds the spezified timer in the timing hierarchy
+/// Finds the specified timer in the timing hierarchy
 /// \param name timer name which may include more than one hierarchy separated by "."
 /// \code tt["firstLevel.secondLevel.thirdLevel.timerName"].total(); \endcode
 template< typename TP >  // Timing policy
@@ -213,6 +215,15 @@ const Timer<TP>& TimingTree<TP>::operator[](const std::string& name) const
    return findTimer(root_, name);
 }
 
+/// Checks if the specified timer exists in the timing hierarchy
+/// \param name timer name which may include more than one hierarchy separated by "."
+/// \code tt.timerExists("firstLevel.secondLevel.thirdLevel.timerName"); \endcode
+template< typename TP >  // Timing policy
+bool TimingTree<TP>::timerExists(const std::string& name) const
+{
+   return walberla::timing::timerExists(root_, name);
+}
+
 template< typename TP >  // Timing policy
 std::string TimingTree<TP>::getCurrentTimerName() const
 {
diff --git a/src/field/allocation/FieldAllocator.h b/src/field/allocation/FieldAllocator.h
index 4ab58c0fe1d82195f28a64627d4577fdd9db3336..418fb6c0bd6d520f4a6a213a957000c27da23dc7 100644
--- a/src/field/allocation/FieldAllocator.h
+++ b/src/field/allocation/FieldAllocator.h
@@ -242,7 +242,7 @@ namespace field {
 
          virtual T * allocateMemory (  uint_t size )
          {
-            void * ptr = aligned_malloc_with_offset(size * sizeof(T), alignment, offset_ % alignment );
+            void * ptr = aligned_malloc_with_offset(size * sizeof(T) + alignment, alignment, offset_ % alignment );
             if(!ptr)
                throw std::bad_alloc();
 
diff --git a/src/field/python/FieldExport.impl.h b/src/field/python/FieldExport.impl.h
index 4dc9d0c3b0444561945dcfca2f1ed64fa5566bbd..a092b4103ecabc0882777e4e297be736cda3ef35 100644
--- a/src/field/python/FieldExport.impl.h
+++ b/src/field/python/FieldExport.impl.h
@@ -490,6 +490,65 @@ namespace internal {
    }
 
 
+   //===================================================================================================================
+   //
+   //  Aligned Allocation
+   //
+   //===================================================================================================================
+
+   template<typename T>
+   shared_ptr<field::FieldAllocator<T> > getAllocator(uint_t alignment)
+   {
+      if( alignment == 0 )
+         return shared_ptr<field::FieldAllocator<T> >(); // leave to default - auto-detection of alignment
+      else if ( alignment == 16 )
+         return make_shared< field::AllocateAligned<T, 16> >();
+      else if ( alignment == 32 )
+         return make_shared< field::AllocateAligned<T, 32> >();
+      else if ( alignment == 64 )
+         return make_shared< field::AllocateAligned<T, 64> >();
+      else if ( alignment == 128 )
+         return make_shared< field::AllocateAligned<T, 128> >();
+      else {
+         PyErr_SetString( PyExc_ValueError, "Alignment parameter has to be one of 0, 16, 32, 64, 128." );
+         throw boost::python::error_already_set();
+         return shared_ptr<field::FieldAllocator<T> >();
+      }
+   }
+
+   template< typename GhostLayerField_T >
+   class GhostLayerFieldDataHandling : public blockforest::AlwaysInitializeBlockDataHandling< GhostLayerField_T >
+   {
+   public:
+      typedef typename GhostLayerField_T::value_type Value_T;
+
+      GhostLayerFieldDataHandling( const weak_ptr<StructuredBlockStorage> &blocks, const uint_t nrOfGhostLayers,
+                                   const Value_T &initValue, const Layout layout, uint_t alignment = 0 ) :
+              blocks_( blocks ), nrOfGhostLayers_( nrOfGhostLayers ), initValue_( initValue ), layout_( layout ),
+              alignment_( alignment ) {}
+
+      GhostLayerField_T * initialize( IBlock * const block )
+      {
+         auto blocks = blocks_.lock();
+         WALBERLA_CHECK_NOT_NULLPTR( blocks, "Trying to access 'AlwaysInitializeBlockDataHandling' for a block "
+                                             "storage object that doesn't exist anymore" );
+         GhostLayerField_T * field = new GhostLayerField_T ( blocks->getNumberOfXCells( *block ),
+                                                             blocks->getNumberOfYCells( *block ),
+                                                             blocks->getNumberOfZCells( *block ),
+                                                             nrOfGhostLayers_, initValue_, layout_,
+                                                             getAllocator<Value_T>(alignment_) );
+         return field;
+      }
+
+   private:
+      weak_ptr< StructuredBlockStorage > blocks_;
+
+      uint_t  nrOfGhostLayers_;
+      Value_T initValue_;
+      Layout  layout_;
+      uint_t alignment_;
+   };
+
 
    //===================================================================================================================
    //
@@ -843,10 +902,10 @@ namespace internal {
    {
    public:
       CreateFieldExporter( uint_t xs, uint_t ys, uint_t zs, uint_t fs, uint_t gl,
-                           Layout layout, const boost::python::object & type,
-                           const shared_ptr<boost::python::object> & resultPointer )
+                           Layout layout, const boost::python::object & type, uint_t alignment,
+                           const shared_ptr<boost::python::object> & resultPointer  )
          : xs_( xs ), ys_(ys), zs_(zs), fs_(fs), gl_(gl),
-           layout_( layout),  type_( type ), resultPointer_( resultPointer )
+           layout_( layout),  type_( type ), alignment_(alignment), resultPointer_( resultPointer )
       {}
 
       template< typename FieldType>
@@ -862,7 +921,8 @@ namespace internal {
          if( python_coupling::isCppEqualToPythonType<T>( (PyTypeObject *)type_.ptr() )  )
          {
             T initVal = T(); //extract<T> ( initValue_ );
-            *resultPointer_ = object( make_shared< GhostLayerField<T,F_SIZE> >( xs_,ys_,zs_, gl_, initVal, layout_ )  );
+            *resultPointer_ = object( make_shared< GhostLayerField<T,F_SIZE> >( xs_,ys_,zs_, gl_, initVal, layout_,
+                                                                                getAllocator<T>(alignment_)));
          }
       }
 
@@ -874,6 +934,7 @@ namespace internal {
       uint_t gl_;
       Layout layout_;
       boost::python::object type_;
+      uint_t alignment_;
       shared_ptr<boost::python::object> resultPointer_;
    };
 
@@ -881,7 +942,8 @@ namespace internal {
    boost::python::object createPythonField( boost::python::list size,
                                             boost::python::object type,
                                             uint_t ghostLayers,
-                                            Layout layout)
+                                            Layout layout,
+                                            uint_t alignment)
    {
       using namespace boost::python;
       uint_t xSize = extract<uint_t> ( size[0] );
@@ -898,7 +960,7 @@ namespace internal {
       }
 
       auto result = make_shared<boost::python::object>();
-      CreateFieldExporter exporter( xSize,ySize, zSize, fSize, ghostLayers, layout, type, result );
+      CreateFieldExporter exporter( xSize,ySize, zSize, fSize, ghostLayers, layout, type, alignment, result );
       python_coupling::for_each_noncopyable_type< FieldTypes >  ( exporter );
 
       if ( *result == object()  )
@@ -952,12 +1014,13 @@ namespace internal {
    class AddToStorageExporter
    {
    public:
-      AddToStorageExporter( const shared_ptr<StructuredBlockStorage> & blocks,
+      AddToStorageExporter(const shared_ptr<StructuredBlockStorage> & blocks,
                            const std::string & name, uint_t fs, uint_t gl, Layout layout,
                            const boost::python::object & type,
-                           const boost::python::object & initObj )
+                           const boost::python::object & initObj,
+                           uint_t alignment )
          : blocks_( blocks ), name_( name ), fs_( fs ),
-           gl_(gl),layout_( layout),  type_( type ), initObj_( initObj), found_(false)
+           gl_(gl),layout_( layout),  type_( type ), initObj_( initObj), alignment_(alignment), found_(false)
       {}
 
       template< typename FieldType>
@@ -972,11 +1035,15 @@ namespace internal {
 
          if( !found_ && python_coupling::isCppEqualToPythonType<T>( (PyTypeObject *)type_.ptr() )  )
          {
-            if ( initObj_ == object() )
-               field::addToStorage< GhostLayerField<T,F_SIZE> >( blocks_, name_, T(), layout_, gl_ );
-            else
-               field::addToStorage< GhostLayerField<T,F_SIZE> >( blocks_, name_, extract<T>(initObj_), layout_, gl_ );
-
+            typedef internal::GhostLayerFieldDataHandling< GhostLayerField<T,F_SIZE > > DataHandling;
+            if ( initObj_ == object() ) {
+               auto dataHandling = walberla::make_shared< DataHandling >( blocks_, gl_, T(), layout_, alignment_ );
+               blocks_->addBlockData( dataHandling, name_ );
+            }
+            else {
+               auto dataHandling = walberla::make_shared< DataHandling >( blocks_, gl_, extract<T>(initObj_), layout_, alignment_ );
+               blocks_->addBlockData( dataHandling, name_ );
+            }
             found_ = true;
          }
       }
@@ -990,12 +1057,14 @@ namespace internal {
       Layout layout_;
       boost::python::object type_;
       boost::python::object initObj_;
+      uint_t alignment_;
       bool found_;
    };
 
    template<typename FieldTypes>
    void addToStorage( const shared_ptr<StructuredBlockStorage> & blocks, const std::string & name,
-                      boost::python::object type, uint_t fs, uint_t gl, Layout layout, boost::python::object initValue )
+                      boost::python::object type, uint_t fs, uint_t gl, Layout layout, boost::python::object initValue,
+                      uint_t alignment)
    {
       using namespace boost::python;
 
@@ -1005,7 +1074,7 @@ namespace internal {
       }
 
       auto result = make_shared<boost::python::object>();
-      AddToStorageExporter exporter( blocks, name, fs, gl, layout, type, initValue );
+      AddToStorageExporter exporter( blocks, name, fs, gl, layout, type, initValue, alignment );
       python_coupling::for_each_noncopyable_type< FieldTypes >  ( std::ref(exporter) );
 
       if ( ! exporter.successful() ) {
@@ -1252,7 +1321,8 @@ void exportFields()
    def( "createField", &internal::createPythonField<FieldTypes>, ( ( arg("size")                    ),
                                                                    ( arg("type")                    ),
                                                                    ( arg("ghostLayers") = uint_t(1) ),
-                                                                   ( arg("layout")      = zyxf      )   ) );
+                                                                   ( arg("layout")      = zyxf      ),
+                                                                   ( arg("alignment")   = 0         )) );
 
    def( "createFlagField", &internal::createPythonFlagField, ( ( arg("size")                      ),
                                                                ( arg("nrOfBits")    = uint_t(32)  ),
@@ -1264,7 +1334,8 @@ void exportFields()
                                                                   ( arg("fSize")       = 1         ),
                                                                   ( arg("ghostLayers") = uint_t(1) ),
                                                                   ( arg("layout")      = zyxf      ),
-                                                                  ( arg("initValue")   = object()  ) ) );
+                                                                  ( arg("initValue")   = object()  ),
+                                                                  ( arg("alignment")   = 0         ) ) );
 
    def( "addFlagFieldToStorage",&internal::addFlagFieldToStorage, ( ( arg("blocks")                  ),
                                                                     ( arg("name")                    ),
diff --git a/src/pe_coupling/utility/BodiesForceTorqueContainer.h b/src/pe_coupling/utility/BodiesForceTorqueContainer.h
index a32dc589d99d0a1138b4e5d1cab974338dc0933e..d69b6791b23734526521e10a3b42a6acb8b5f6bf 100644
--- a/src/pe_coupling/utility/BodiesForceTorqueContainer.h
+++ b/src/pe_coupling/utility/BodiesForceTorqueContainer.h
@@ -21,13 +21,13 @@
 
 #pragma once
 
+#include "blockforest/StructuredBlockForest.h"
 #include "core/math/Vector3.h"
-#include "domain_decomposition/StructuredBlockStorage.h"
 #include "pe/rigidbody/BodyIterators.h"
 #include "pe/synchronization/SyncForces.h"
 
 #include <map>
-#include <vector>
+#include <array>
 
 namespace walberla {
 namespace pe_coupling {
@@ -36,9 +36,14 @@ class BodiesForceTorqueContainer
 {  
 public:
 
-   BodiesForceTorqueContainer( const shared_ptr<StructuredBlockStorage> & blockStorage, const BlockDataID & bodyStorageID )
-   : blockStorage_( blockStorage ), bodyStorageID_( bodyStorageID )
-     { }
+   typedef std::map< walberla::id_t, std::array<real_t,6> > ForceTorqueStorage_T;
+
+   BodiesForceTorqueContainer( const shared_ptr<StructuredBlockForest> & blockForest, const BlockDataID & bodyStorageID )
+   : blockForest_( blockForest ), bodyStorageID_( bodyStorageID )
+   {
+      // has to be added to the forest (not the storage) to register correctly
+      bodyForceTorqueStorageID_ = blockForest->addBlockData(make_shared<blockforest::AlwaysCreateBlockDataHandling<ForceTorqueStorage_T> >(), "BodiesForceTorqueContainer");
+   }
 
    void operator()()
    {
@@ -50,38 +55,18 @@ public:
       // clear map
       clear();
 
-      // sum up all forces/torques from shadow copies on local body (owner)
-      pe::reduceForces( blockStorage_->getBlockStorage(), bodyStorageID_ );
-      // send total forces/torques to shadow owners
-      pe::distributeForces( blockStorage_->getBlockStorage(), bodyStorageID_ );
-
       // (re-)build map
-      for( auto blockIt = blockStorage_->begin(); blockIt != blockStorage_->end(); ++blockIt )
+      for( auto blockIt = blockForest_->begin(); blockIt != blockForest_->end(); ++blockIt )
       {
+         auto bodyForceTorqueStorage = blockIt->getData<ForceTorqueStorage_T>(bodyForceTorqueStorageID_);
+
          for( auto bodyIt = pe::BodyIterator::begin(*blockIt, bodyStorageID_); bodyIt != pe::BodyIterator::end(); ++bodyIt )
          {
-            auto & f = bodyForceTorqueMap_[ bodyIt->getSystemID() ];
+            auto & f = (*bodyForceTorqueStorage)[ bodyIt->getSystemID() ];
 
-            // only add if body has not been added already before (from another block)
-            if( f.empty() )
-            {
-               const auto & force = bodyIt->getForce();
-               f.push_back( force[0] );
-               f.push_back( force[1] );
-               f.push_back( force[2] );
-
-               const auto & torque = bodyIt->getTorque();
-               f.push_back( torque[0] );
-               f.push_back( torque[1] );
-               f.push_back( torque[2] );
-            }
-
-            // reset of force/torque on remote bodies necessary to erase the multiple occurrences of forces/torques on bodies
-            // (due to call to distributeForces() before)
-            if ( bodyIt->isRemote() )
-            {
-               bodyIt->resetForceAndTorque();
-            }
+            const auto & force = bodyIt->getForce();
+            const auto & torque = bodyIt->getTorque();
+            f = {{force[0], force[1], force[2], torque[0], torque[1], torque[2] }};
          }
       }
 
@@ -89,42 +74,52 @@ public:
 
    void setOnBodies()
    {
-      // owning process sets the force/torque on the bodies
-      for( auto blockIt = blockStorage_->begin(); blockIt != blockStorage_->end(); ++blockIt )
+      // set the force/torque stored in the block-local map onto all bodies
+      for( auto blockIt = blockForest_->begin(); blockIt != blockForest_->end(); ++blockIt )
       {
-         for( auto bodyIt = pe::LocalBodyIterator::begin(*blockIt, bodyStorageID_); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt )
+         auto bodyForceTorqueStorage = blockIt->getData<ForceTorqueStorage_T>(bodyForceTorqueStorageID_);
+
+         for( auto bodyIt = pe::BodyIterator::begin(*blockIt, bodyStorageID_); bodyIt != pe::BodyIterator::end(); ++bodyIt )
          {
-            const auto &f = bodyForceTorqueMap_[bodyIt->getSystemID()];
-            WALBERLA_ASSERT( !f.empty(), "When attempting to set force/torque on local body " << bodyIt->getSystemID() << " at position " << bodyIt->getPosition() << ", body was not found in map!");
-            bodyIt->addForce ( f[0], f[1], f[2] );
-            bodyIt->addTorque( f[3], f[4], f[5] );
+            const auto f = bodyForceTorqueStorage->find( bodyIt->getSystemID() );
+
+            if( f != bodyForceTorqueStorage->end() )
+            {
+               const auto & ftValues = f->second;
+               bodyIt->addForce ( ftValues[0], ftValues[1], ftValues[2] );
+               bodyIt->addTorque( ftValues[3], ftValues[4], ftValues[5] );
+            }
+            // else: new body has arrived that was not known before
          }
       }
    }
 
    void clear()
    {
-      bodyForceTorqueMap_.clear();
+      for( auto blockIt = blockForest_->begin(); blockIt != blockForest_->end(); ++blockIt )
+      {
+         auto bodyForceTorqueStorage = blockIt->getData<ForceTorqueStorage_T>(bodyForceTorqueStorageID_);
+         bodyForceTorqueStorage->clear();
+      }
    }
 
    void swap( BodiesForceTorqueContainer & other )
    {
-      std::swap( this->bodyForceTorqueMap_, other.bodyForceTorqueMap_ );
+      std::swap( bodyForceTorqueStorageID_, other.bodyForceTorqueStorageID_);
    }
 
 private:
 
-   shared_ptr<StructuredBlockStorage> blockStorage_;
+   shared_ptr<StructuredBlockStorage> blockForest_;
    const BlockDataID bodyStorageID_;
-   std::map< walberla::id_t, std::vector<real_t> > bodyForceTorqueMap_;
+   BlockDataID bodyForceTorqueStorageID_;
 };
 
 
 class BodyContainerSwapper
 {
 public:
-   BodyContainerSwapper( const shared_ptr<BodiesForceTorqueContainer> & cont1,
-                         const shared_ptr<BodiesForceTorqueContainer> & cont2 )
+   BodyContainerSwapper( const shared_ptr<BodiesForceTorqueContainer> & cont1, const shared_ptr<BodiesForceTorqueContainer> & cont2 )
    : cont1_( cont1 ), cont2_( cont2 )
    { }
 
diff --git a/src/pe_coupling/utility/BodySelectorFunctions.cpp b/src/pe_coupling/utility/BodySelectorFunctions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4c10f95028c47e88b3340aef74840337dc723992
--- /dev/null
+++ b/src/pe_coupling/utility/BodySelectorFunctions.cpp
@@ -0,0 +1,50 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file BodySelectorFunctions.cpp
+//! \ingroup pe_coupling
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#include "BodySelectorFunctions.h"
+
+#include "pe/rigidbody/RigidBody.h"
+
+namespace walberla {
+namespace pe_coupling {
+
+bool selectAllBodies(pe::BodyID /*bodyID*/)
+{
+   return true;
+}
+
+bool selectRegularBodies(pe::BodyID bodyID)
+{
+   return !bodyID->hasInfiniteMass() && !bodyID->isGlobal();
+}
+
+bool selectFixedBodies(pe::BodyID bodyID)
+{
+   return bodyID->hasInfiniteMass() && !bodyID->isGlobal();
+}
+
+bool selectGlobalBodies(pe::BodyID bodyID)
+{
+   return bodyID->isGlobal();
+}
+
+} // namespace pe_coupling
+} // namespace walberla
diff --git a/src/pe_coupling/utility/BodySelectorFunctions.h b/src/pe_coupling/utility/BodySelectorFunctions.h
index 75bc5c93b1e12c5c970c8b0f11556f08e5980d06..a811d416b2fe440d2ff4a0c062b18b7f0823eae2 100644
--- a/src/pe_coupling/utility/BodySelectorFunctions.h
+++ b/src/pe_coupling/utility/BodySelectorFunctions.h
@@ -21,29 +21,18 @@
 
 #pragma once
 
-#include "pe/Types.h"
+#include "pe/rigidbody/RigidBody.h"
 
 namespace walberla {
 namespace pe_coupling {
 
-bool selectAllBodies(pe::BodyID /*bodyID*/)
-{
-   return true;
-}
-
-bool selectRegularBodies(pe::BodyID bodyID)
-{
-   return !bodyID->hasInfiniteMass() && !bodyID->isGlobal();
-}
-
-bool selectFixedBodies(pe::BodyID bodyID)
-{
-   return bodyID->hasInfiniteMass() && !bodyID->isGlobal();
-}
-
-bool selectGlobalBodies(pe::BodyID bodyID)
-{
-   return bodyID->isGlobal();
-}
+bool selectAllBodies(pe::BodyID /*bodyID*/);
+
+bool selectRegularBodies(pe::BodyID bodyID);
+
+bool selectFixedBodies(pe::BodyID bodyID);
+
+bool selectGlobalBodies(pe::BodyID bodyID);
+
 } // namespace pe_coupling
 } // namespace walberla
diff --git a/src/pe_coupling/utility/TimeStep.h b/src/pe_coupling/utility/TimeStep.h
index d89e0a6e7aff337ba570c252e605fe4d006c8e9c..f32c306142dbe01c904f7e081e8aa49dd2cd3455 100644
--- a/src/pe_coupling/utility/TimeStep.h
+++ b/src/pe_coupling/utility/TimeStep.h
@@ -35,7 +35,7 @@
 #include <functional>
 
 #include <map>
-
+#include <array>
 
 namespace walberla {
 namespace pe_coupling {
@@ -88,43 +88,25 @@ public:
          // Since they are reset after the call to collision response, they have to be stored explicitly before.
          // Then they are set again after each intermediate step.
 
-         // sum up all forces/torques from shadow copies on local body (owner)
-         pe::reduceForces( blockStorage_->getBlockStorage(), bodyStorageID_ );
-         // send total forces/torques to shadow owners
-         pe::distributeForces( blockStorage_->getBlockStorage(), bodyStorageID_ );
-
          // generate map from all known bodies (process local) to total forces/torques
-         std::map< walberla::id_t, std::vector< real_t > > forceMap;
+         // this has to be done on a block-local basis, since the same body could reside on several blocks from this process
+         typedef domain_decomposition::IBlockID::IDType BlockID_T;
+         std::map< BlockID_T, std::map< walberla::id_t, std::array< real_t, 6 > > > forceTorqueMap;
+
          for( auto blockIt = blockStorage_->begin(); blockIt != blockStorage_->end(); ++blockIt )
          {
+            BlockID_T blockID = blockIt->getId().getID();
+            auto& blockLocalForceTorqueMap = forceTorqueMap[blockID];
+
             // iterate over local and remote bodies and store force/torque in map
-            // Remote bodies are required since during the then following collision response time steps,
-            // bodies can change ownership (i.e. a now remote body could become a local body ).
-            // Since only the owning process re-sets the force/torque later, remote body values have to be stored as well.
             for( auto bodyIt = pe::BodyIterator::begin(*blockIt, bodyStorageID_); bodyIt != pe::BodyIterator::end(); ++bodyIt )
             {
-               auto & f = forceMap[ bodyIt->getSystemID() ];
+               auto & f = blockLocalForceTorqueMap[ bodyIt->getSystemID() ];
 
-               // only add if body has not been added already before (from another block)
-               if( f.empty() )
-               {
-                  const auto & force = bodyIt->getForce();
-                  f.push_back( force[0] );
-                  f.push_back( force[1] );
-                  f.push_back( force[2] );
-
-                  const auto & torque = bodyIt->getTorque();
-                  f.push_back( torque[0] );
-                  f.push_back( torque[1] );
-                  f.push_back( torque[2] );
-               }
+               const auto & force = bodyIt->getForce();
+               const auto & torque = bodyIt->getTorque();
 
-               // reset of force/torque on remote bodies necessary to erase the multiple occurrences of forces/torques on bodies
-               // (due to call to distributeForces() before)
-               if ( bodyIt->isRemote() )
-               {
-                  bodyIt->resetForceAndTorque();
-               }
+               f = {{force[0], force[1], force[2], torque[0], torque[1], torque[2] }};
             }
          }
 
@@ -133,18 +115,26 @@ public:
          for( uint_t i = 0; i != numberOfSubIterations_; ++i )
          {
 
-            // in the first iteration, forces on local bodies are already set by force synchronization
+            // in the first iteration, forces are already set
             if( i != 0 )
             {
                for( auto blockIt = blockStorage_->begin(); blockIt != blockStorage_->end(); ++blockIt )
                {
-                  // only owning process sets force/torque on bodies
-                  for( auto bodyIt = pe::LocalBodyIterator::begin(*blockIt, bodyStorageID_); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt )
+                  BlockID_T blockID = blockIt->getId().getID();
+                  auto& blockLocalForceTorqueMap = forceTorqueMap[blockID];
+
+                  // re-set stored force/torque on bodies
+                  for( auto bodyIt = pe::BodyIterator::begin(*blockIt, bodyStorageID_); bodyIt != pe::BodyIterator::end(); ++bodyIt )
                   {
-                     const auto & f = forceMap[ bodyIt->getSystemID() ];
-                     WALBERLA_ASSERT( !f.empty(), "When attempting to set force/torque on local body " << bodyIt->getSystemID() << " at position " << bodyIt->getPosition() << ", body was not found in map!");
-                     bodyIt->addForce ( f[0], f[1], f[2] );
-                     bodyIt->addTorque( f[3], f[4], f[5] );
+
+                     const auto f = blockLocalForceTorqueMap.find( bodyIt->getSystemID() );
+
+                     if( f != blockLocalForceTorqueMap.end() )
+                     {
+                        const auto & ftValues = f->second;
+                        bodyIt->addForce ( ftValues[0], ftValues[1], ftValues[2] );
+                        bodyIt->addTorque( ftValues[3], ftValues[4], ftValues[5] );
+                     }
                   }
                }
             }
diff --git a/tests/core/timing/TimingTreeTest.cpp b/tests/core/timing/TimingTreeTest.cpp
index 862620f326fc8b0eeadcfb55c3b64c6221fd13be..74fa27e046074df0d4eb5923e0e0705545046854 100644
--- a/tests/core/timing/TimingTreeTest.cpp
+++ b/tests/core/timing/TimingTreeTest.cpp
@@ -13,7 +13,7 @@
 //  You should have received a copy of the GNU General Public License along
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file TimingPoolTest.cpp
+//! \file TimingTreeTest.cpp
 //! \ingroup core
 //! \author Martin Bauer <martin.bauer@fau.de>
 //
@@ -67,12 +67,36 @@ int main( int argc, char ** argv )
    tt.stop("AC");
    tt.stop("A");
 
+   WALBERLA_ASSERT(tt.timerExists("A"));
+   WALBERLA_ASSERT(tt.timerExists("A.AA"));
+   WALBERLA_ASSERT(tt.timerExists("A.AB.ABA"));
+   WALBERLA_ASSERT(tt.timerExists("A.AB.ABB"));
+   WALBERLA_ASSERT(tt.timerExists("A.AC.ACA"));
+   WALBERLA_ASSERT(!tt.timerExists("AAC"));
+   WALBERLA_ASSERT(!tt.timerExists("A.AA.C"));
+
    // check copy constructor
    timing::TimingTree<timing::StaticPolicy> tt2(tt);
    // check assignment operator
    timing::TimingTree<timing::StaticPolicy> tt3;
    tt3 = tt;
 
+   WALBERLA_ASSERT(tt2.timerExists("A"));
+   WALBERLA_ASSERT(tt2.timerExists("A.AA"));
+   WALBERLA_ASSERT(tt2.timerExists("A.AB.ABA"));
+   WALBERLA_ASSERT(tt2.timerExists("A.AB.ABB"));
+   WALBERLA_ASSERT(tt2.timerExists("A.AC.ACA"));
+   WALBERLA_ASSERT(!tt2.timerExists("AAC"));
+   WALBERLA_ASSERT(!tt2.timerExists("A.AA.C"));
+
+   WALBERLA_ASSERT(tt3.timerExists("A"));
+   WALBERLA_ASSERT(tt3.timerExists("A.AA"));
+   WALBERLA_ASSERT(tt3.timerExists("A.AB.ABA"));
+   WALBERLA_ASSERT(tt3.timerExists("A.AB.ABB"));
+   WALBERLA_ASSERT(tt3.timerExists("A.AC.ACA"));
+   WALBERLA_ASSERT(!tt3.timerExists("AAC"));
+   WALBERLA_ASSERT(!tt3.timerExists("A.AA.C"));
+
    tt2 = tt.getReduced( timing::REDUCE_TOTAL, 0 );
    tt2 = tt.getReduced( timing::REDUCE_TOTAL, 1 );
 
diff --git a/tests/pe_coupling/CMakeLists.txt b/tests/pe_coupling/CMakeLists.txt
index 5ed391186bbc50828a59fb2b0a2312a914c3b101..81fc07d822a0453b9b12454a07508c646fe0aa42 100644
--- a/tests/pe_coupling/CMakeLists.txt
+++ b/tests/pe_coupling/CMakeLists.txt
@@ -162,4 +162,17 @@ waLBerla_execute_test( NAME HinderedSettlingDynamicsDPMFuncTest COMMAND $<TARGET
 ###################################################################################################
 
 waLBerla_compile_test( FILES geometry/PeIntersectionRatioTest.cpp DEPENDS pe )
-waLBerla_execute_test( NAME PeIntersectionRatioTest COMMAND $<TARGET_FILE:PeIntersectionRatioTest> PROCESSES 1 )
\ No newline at end of file
+waLBerla_execute_test( NAME PeIntersectionRatioTest COMMAND $<TARGET_FILE:PeIntersectionRatioTest> PROCESSES 1 )
+
+###################################################################################################
+# Utility tests
+###################################################################################################
+
+waLBerla_compile_test( FILES utility/BodiesForceTorqueContainerTest.cpp DEPENDS blockforest pe timeloop )
+waLBerla_execute_test( NAME BodiesForceTorqueContainerTest COMMAND $<TARGET_FILE:BodiesForceTorqueContainerTest> PROCESSES 1 )
+waLBerla_execute_test( NAME BodiesForceTorqueContainerParallelTest COMMAND $<TARGET_FILE:BodiesForceTorqueContainerTest> PROCESSES 3 )
+
+waLBerla_compile_test( FILES utility/PeSubCyclingTest.cpp DEPENDS blockforest pe timeloop )
+waLBerla_execute_test( NAME PeSubCyclingTest COMMAND $<TARGET_FILE:PeSubCyclingTest> PROCESSES 1 )
+waLBerla_execute_test( NAME PeSubCyclingParallelTest COMMAND $<TARGET_FILE:PeSubCyclingTest> PROCESSES 3 )
+
diff --git a/tests/pe_coupling/utility/BodiesForceTorqueContainerTest.cpp b/tests/pe_coupling/utility/BodiesForceTorqueContainerTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ed015b72f6c78f0ecf445ea4e7e8295a036f1d1e
--- /dev/null
+++ b/tests/pe_coupling/utility/BodiesForceTorqueContainerTest.cpp
@@ -0,0 +1,487 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file BodiesForceTorqueContainerTest.cpp
+//! \ingroup pe_coupling
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/Initialization.h"
+
+#include "core/DataTypes.h"
+#include "core/Environment.h"
+#include "core/debug/TestSubsystem.h"
+#include "core/math/all.h"
+
+#include "pe/basic.h"
+#include "pe/utility/DestroyBody.h"
+
+#include <pe_coupling/utility/all.h>
+
+namespace force_torque_container_test
+{
+
+///////////
+// USING //
+///////////
+
+using namespace walberla;
+
+typedef boost::tuple<pe::Sphere> BodyTypeTuple ;
+
+
+/*!\brief Test cases for the force torque container provided by the coupling module
+ *
+ * Spheres at different positions are created and force(s) and torque(s) are applied.
+ * Then, they are store in the container and reset on the body.
+ * Then, in some cases, the sphere is moved to cross block borders, and the stored forces/torques are reapplied by the container again.
+ * The obtained force/torque values are compared against the originally set (and thus expected) values.
+ */
+//////////
+// MAIN //
+//////////
+int main( int argc, char **argv )
+{
+   debug::enterTestMode();
+
+   mpi::Environment env( argc, argv );
+
+   // uncomment to have logging
+   //logging::Logging::instance()->setLogLevel(logging::Logging::LogLevel::DETAIL);
+
+   const real_t dx     = real_t(1);
+   const real_t radius = real_t(5);
+
+   ///////////////////////////
+   // DATA STRUCTURES SETUP //
+   ///////////////////////////
+
+   Vector3<uint_t> blocksPerDirection(uint_t(3), uint_t(1), uint_t(1));
+   Vector3<uint_t> cellsPerBlock(uint_t(20), uint_t(20), uint_t(20));
+   Vector3<bool> periodicity(true, false, false);
+
+   // create fully periodic domain with refined blocks
+   auto blocks = blockforest::createUniformBlockGrid( blocksPerDirection[0], blocksPerDirection[1], blocksPerDirection[2],
+                                                      cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2],
+                                                      dx,
+                                                      0, false, false,
+                                                      periodicity[0], periodicity[1], periodicity[2],
+                                                      false );
+
+
+   // pe body storage
+   pe::SetBodyTypeIDs<BodyTypeTuple>::execute();
+   shared_ptr<pe::BodyStorage> globalBodyStorage = make_shared<pe::BodyStorage>();
+   auto bodyStorageID = blocks->addBlockData(pe::createStorageDataHandling<BodyTypeTuple>(), "Storage");
+   auto sphereMaterialID = pe::createMaterial( "sphereMat", real_t(1) , real_t(0.3), real_t(0.2), real_t(0.2), real_t(0.24), real_t(200), real_t(200), real_t(0), real_t(0) );
+
+   // pe coupling
+   const real_t overlap = real_t( 1.5 ) * dx;
+   std::function<void(void)> syncCall = std::bind( pe::syncNextNeighbors<BodyTypeTuple>, boost::ref(blocks->getBlockForest()), bodyStorageID, static_cast<WcTimingTree*>(NULL), overlap, false );
+
+   // sphere positions for test scenarios
+   Vector3<real_t> positionInsideBlock(real_t(10), real_t(10), real_t(10));
+   Vector3<real_t> positionAtBlockBorder(real_t(19.5), real_t(10), real_t(10));
+   Vector3<real_t> positionAtBlockBorderUpdated(real_t(20.5), real_t(10), real_t(10));
+
+   Vector3<real_t> positionAtBlockBorder2(real_t(20) + radius + overlap - real_t(0.5), real_t(10), real_t(10));
+   Vector3<real_t> positionAtBlockBorderUpdated2(real_t(20) + radius + overlap + real_t(0.5), real_t(10), real_t(10));
+
+
+   Vector3<real_t> testForce(real_t(2), real_t(1), real_t(0));
+   Vector3<real_t> torqueOffset = Vector3<real_t>(real_t(1), real_t(0), real_t(0));
+
+   pe_coupling::ForceTorqueOnBodiesResetter resetter(blocks, bodyStorageID);
+   shared_ptr<pe_coupling::BodiesForceTorqueContainer> container1 = make_shared<pe_coupling::BodiesForceTorqueContainer>(blocks, bodyStorageID);
+   shared_ptr<pe_coupling::BodiesForceTorqueContainer> container2 = make_shared<pe_coupling::BodiesForceTorqueContainer>(blocks, bodyStorageID);
+   pe_coupling::BodyContainerSwapper containerSwapper(container1, container2);
+
+   //////////////////
+   // Inside block //
+   //////////////////
+   {
+      std::string testIdentifier("Test: sphere inside block");
+      WALBERLA_LOG_DEVEL_ON_ROOT(testIdentifier << " - started");
+      pe::createSphere(*globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0,
+                       positionInsideBlock, radius, sphereMaterialID, false, true, false);
+
+      syncCall();
+
+      uint_t applicationCounter( 0 );
+
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            ++applicationCounter;
+            auto pos = bodyIt->getPosition();
+            bodyIt->addForceAtPos(testForce, pos+torqueOffset);
+         }
+      }
+      mpi::allReduceInplace(applicationCounter, mpi::SUM);
+
+      container1->store();
+
+      resetter();
+
+      containerSwapper();
+
+      container2->setOnBodies();
+
+      Vector3<real_t> expectedForce = applicationCounter * testForce;
+      Vector3<real_t> expectedTorque = applicationCounter * ( torqueOffset % testForce );
+
+      WALBERLA_LOG_DEVEL_ON_ROOT(" - expecting force: " << expectedForce);
+      WALBERLA_LOG_DEVEL_ON_ROOT(" - expecting torque: " << expectedTorque);
+
+      Vector3<real_t> actingForce(real_t(0));
+      Vector3<real_t> actingTorque(real_t(0));
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            actingForce += bodyIt->getForce();
+            actingTorque += bodyIt->getTorque();
+
+         }
+      }
+
+      mpi::allReduceInplace(actingForce[0], mpi::SUM);
+      mpi::allReduceInplace(actingForce[1], mpi::SUM);
+      mpi::allReduceInplace(actingForce[2], mpi::SUM);
+
+      mpi::allReduceInplace(actingTorque[0], mpi::SUM);
+      mpi::allReduceInplace(actingTorque[1], mpi::SUM);
+      mpi::allReduceInplace(actingTorque[2], mpi::SUM);
+
+      WALBERLA_ROOT_SECTION()
+      {
+         WALBERLA_CHECK_FLOAT_EQUAL(actingForce[0], expectedForce[0], "Mismatch in force0");
+         WALBERLA_CHECK_FLOAT_EQUAL(actingForce[1], expectedForce[1], "Mismatch in force1");
+         WALBERLA_CHECK_FLOAT_EQUAL(actingForce[2], expectedForce[2], "Mismatch in force2");
+
+         WALBERLA_CHECK_FLOAT_EQUAL(actingTorque[0], expectedTorque[0], "Mismatch in torque0");
+         WALBERLA_CHECK_FLOAT_EQUAL(actingTorque[1], expectedTorque[1], "Mismatch in torque1");
+         WALBERLA_CHECK_FLOAT_EQUAL(actingTorque[2], expectedTorque[2], "Mismatch in torque2");
+      }
+
+      // clean up
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            bodyIt->markForDeletion();
+         }
+      }
+      syncCall();
+
+      container1->clear();
+      container2->clear();
+
+      WALBERLA_LOG_DEVEL_ON_ROOT(testIdentifier << " - ended");
+
+   }
+
+   /////////////////////
+   // At block border //
+   /////////////////////
+   {
+      std::string testIdentifier("Test: sphere at block border");
+      WALBERLA_LOG_DEVEL_ON_ROOT(testIdentifier << " - started");
+      pe::createSphere(*globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0,
+                       positionAtBlockBorder, radius, sphereMaterialID, false, true, false);
+
+      syncCall();
+
+      uint_t applicationCounter( 0 );
+
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            ++applicationCounter;
+            auto pos = bodyIt->getPosition();
+            bodyIt->addForceAtPos(testForce, pos+torqueOffset);
+         }
+      }
+      mpi::allReduceInplace(applicationCounter, mpi::SUM);
+
+      container1->store();
+
+      resetter();
+
+      containerSwapper();
+
+      container2->setOnBodies();
+
+      Vector3<real_t> expectedForce = applicationCounter * testForce;
+      Vector3<real_t> expectedTorque = applicationCounter * ( torqueOffset % testForce );
+
+      WALBERLA_LOG_DEVEL_ON_ROOT(" - expecting force: " << expectedForce);
+      WALBERLA_LOG_DEVEL_ON_ROOT(" - expecting torque: " << expectedTorque);
+
+      Vector3<real_t> actingForce(real_t(0));
+      Vector3<real_t> actingTorque(real_t(0));
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            actingForce += bodyIt->getForce();
+            actingTorque += bodyIt->getTorque();
+         }
+      }
+
+      mpi::allReduceInplace(actingForce[0], mpi::SUM);
+      mpi::allReduceInplace(actingForce[1], mpi::SUM);
+      mpi::allReduceInplace(actingForce[2], mpi::SUM);
+
+      mpi::allReduceInplace(actingTorque[0], mpi::SUM);
+      mpi::allReduceInplace(actingTorque[1], mpi::SUM);
+      mpi::allReduceInplace(actingTorque[2], mpi::SUM);
+
+      WALBERLA_ROOT_SECTION()
+      {
+         WALBERLA_CHECK_FLOAT_EQUAL(actingForce[0], expectedForce[0], "Mismatch in force0");
+         WALBERLA_CHECK_FLOAT_EQUAL(actingForce[1], expectedForce[1], "Mismatch in force1");
+         WALBERLA_CHECK_FLOAT_EQUAL(actingForce[2], expectedForce[2], "Mismatch in force2");
+
+         WALBERLA_CHECK_FLOAT_EQUAL(actingTorque[0], expectedTorque[0], "Mismatch in torque0");
+         WALBERLA_CHECK_FLOAT_EQUAL(actingTorque[1], expectedTorque[1], "Mismatch in torque1");
+         WALBERLA_CHECK_FLOAT_EQUAL(actingTorque[2], expectedTorque[2], "Mismatch in torque2");
+      }
+
+      // clean up
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            bodyIt->markForDeletion();
+         }
+      }
+      syncCall();
+
+      container1->clear();
+      container2->clear();
+
+      WALBERLA_LOG_DEVEL_ON_ROOT(testIdentifier << " - ended");
+
+   }
+
+   ///////////////////////////////////////////
+   // At block border with updated position //
+   ///////////////////////////////////////////
+   {
+      std::string testIdentifier("Test: sphere at block border with updated position");
+      WALBERLA_LOG_DEVEL_ON_ROOT(testIdentifier << " - started");
+      pe::createSphere(*globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0,
+                       positionAtBlockBorder, radius, sphereMaterialID, false, true, false);
+
+      syncCall();
+
+      uint_t applicationCounter( 0 );
+
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            ++applicationCounter;
+            auto pos = bodyIt->getPosition();
+            bodyIt->addForceAtPos(testForce, pos+torqueOffset);
+         }
+      }
+      mpi::allReduceInplace(applicationCounter, mpi::SUM);
+
+      container1->store();
+
+      resetter();
+
+      containerSwapper();
+
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt ) {
+         for (auto bodyIt = pe::LocalBodyIterator::begin(*blockIt, bodyStorageID); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt) {
+            bodyIt->setPosition(positionAtBlockBorderUpdated);
+         }
+      }
+      syncCall();
+
+      container2->setOnBodies();
+
+      Vector3<real_t> expectedForce = applicationCounter * testForce;
+      Vector3<real_t> expectedTorque = applicationCounter * ( torqueOffset % testForce );
+
+      WALBERLA_LOG_DEVEL_ON_ROOT(" - expecting force: " << expectedForce);
+      WALBERLA_LOG_DEVEL_ON_ROOT(" - expecting torque: " << expectedTorque);
+
+      Vector3<real_t> actingForce(real_t(0));
+      Vector3<real_t> actingTorque(real_t(0));
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            actingForce += bodyIt->getForce();
+            actingTorque += bodyIt->getTorque();
+         }
+      }
+
+      mpi::allReduceInplace(actingForce[0], mpi::SUM);
+      mpi::allReduceInplace(actingForce[1], mpi::SUM);
+      mpi::allReduceInplace(actingForce[2], mpi::SUM);
+
+      mpi::allReduceInplace(actingTorque[0], mpi::SUM);
+      mpi::allReduceInplace(actingTorque[1], mpi::SUM);
+      mpi::allReduceInplace(actingTorque[2], mpi::SUM);
+
+      WALBERLA_ROOT_SECTION()
+      {
+         WALBERLA_CHECK_FLOAT_EQUAL(actingForce[0], expectedForce[0], "Mismatch in force0");
+         WALBERLA_CHECK_FLOAT_EQUAL(actingForce[1], expectedForce[1], "Mismatch in force1");
+         WALBERLA_CHECK_FLOAT_EQUAL(actingForce[2], expectedForce[2], "Mismatch in force2");
+
+         WALBERLA_CHECK_FLOAT_EQUAL(actingTorque[0], expectedTorque[0], "Mismatch in torque0");
+         WALBERLA_CHECK_FLOAT_EQUAL(actingTorque[1], expectedTorque[1], "Mismatch in torque1");
+         WALBERLA_CHECK_FLOAT_EQUAL(actingTorque[2], expectedTorque[2], "Mismatch in torque2");
+      }
+
+      // clean up
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            bodyIt->markForDeletion();
+         }
+      }
+      syncCall();
+
+      container1->clear();
+      container2->clear();
+
+      WALBERLA_LOG_DEVEL_ON_ROOT(testIdentifier << " - ended");
+
+   }
+
+   /////////////////////////////////////////////
+   // At block border with updated position 2 //
+   /////////////////////////////////////////////
+   {
+      std::string testIdentifier("Test: sphere at block border with updated position 2");
+      WALBERLA_LOG_DEVEL_ON_ROOT(testIdentifier << " - started");
+      pe::createSphere(*globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0,
+                       positionAtBlockBorder2, radius, sphereMaterialID, false, true, false);
+
+      syncCall();
+
+      uint_t applicationCounter( 0 );
+
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            ++applicationCounter;
+            auto pos = bodyIt->getPosition();
+            bodyIt->addForceAtPos(testForce, pos+torqueOffset);
+         }
+      }
+      mpi::allReduceInplace(applicationCounter, mpi::SUM);
+
+      container1->store();
+
+      resetter();
+
+      containerSwapper();
+
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt ) {
+         for (auto bodyIt = pe::LocalBodyIterator::begin(*blockIt, bodyStorageID); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt) {
+            bodyIt->setPosition(positionAtBlockBorderUpdated2);
+         }
+      }
+      syncCall();
+
+      container2->setOnBodies();
+
+      --applicationCounter; // sphere has vanished from one block
+
+      // in this case, the complete force can no longer be applied onto the body since the part from the block
+      // from which the body vanished is lost
+
+      // HOWEVER: this case will never appear in a coupled simulation since NO FORCE will be acting a body
+      // that is about to vanish from a block since it will not be mapped to the block from which it will vanish
+
+      // If you are expecting a different behavior, you need to change the container mechanism by first all-reducing the
+      // forces/torques to all processes/blocks that know this body such that every process/block has
+      // the full information and then the process/block that owns this body sets the complete force
+
+      Vector3<real_t> expectedForce = applicationCounter * testForce;
+      Vector3<real_t> expectedTorque = applicationCounter * ( torqueOffset % testForce );
+
+      WALBERLA_LOG_DEVEL_ON_ROOT(" - expecting force: " << expectedForce);
+      WALBERLA_LOG_DEVEL_ON_ROOT(" - expecting torque: " << expectedTorque);
+
+      Vector3<real_t> actingForce(real_t(0));
+      Vector3<real_t> actingTorque(real_t(0));
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            actingForce += bodyIt->getForce();
+            actingTorque += bodyIt->getTorque();
+         }
+      }
+
+      mpi::allReduceInplace(actingForce[0], mpi::SUM);
+      mpi::allReduceInplace(actingForce[1], mpi::SUM);
+      mpi::allReduceInplace(actingForce[2], mpi::SUM);
+
+      mpi::allReduceInplace(actingTorque[0], mpi::SUM);
+      mpi::allReduceInplace(actingTorque[1], mpi::SUM);
+      mpi::allReduceInplace(actingTorque[2], mpi::SUM);
+
+      WALBERLA_ROOT_SECTION()
+      {
+         WALBERLA_CHECK_FLOAT_EQUAL(actingForce[0], expectedForce[0], "Mismatch in force0");
+         WALBERLA_CHECK_FLOAT_EQUAL(actingForce[1], expectedForce[1], "Mismatch in force1");
+         WALBERLA_CHECK_FLOAT_EQUAL(actingForce[2], expectedForce[2], "Mismatch in force2");
+
+         WALBERLA_CHECK_FLOAT_EQUAL(actingTorque[0], expectedTorque[0], "Mismatch in torque0");
+         WALBERLA_CHECK_FLOAT_EQUAL(actingTorque[1], expectedTorque[1], "Mismatch in torque1");
+         WALBERLA_CHECK_FLOAT_EQUAL(actingTorque[2], expectedTorque[2], "Mismatch in torque2");
+      }
+
+      // clean up
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            bodyIt->markForDeletion();
+         }
+      }
+      syncCall();
+
+      container1->clear();
+      container2->clear();
+
+      WALBERLA_LOG_DEVEL_ON_ROOT(testIdentifier << " - ended");
+
+   }
+
+   return 0;
+
+}
+
+} //namespace force_torque_container_test
+
+int main( int argc, char **argv ){
+   force_torque_container_test::main(argc, argv);
+}
diff --git a/tests/pe_coupling/utility/PeSubCyclingTest.cpp b/tests/pe_coupling/utility/PeSubCyclingTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c21cdff12ee606166f60c42cc53e9a42435c2146
--- /dev/null
+++ b/tests/pe_coupling/utility/PeSubCyclingTest.cpp
@@ -0,0 +1,455 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PeSubCyclingTest.cpp
+//! \ingroup pe_coupling
+//! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/Initialization.h"
+
+#include "core/DataTypes.h"
+#include "core/Environment.h"
+#include "core/debug/TestSubsystem.h"
+#include "core/math/all.h"
+
+#include "pe/basic.h"
+#include "pe/utility/DestroyBody.h"
+#include "pe/cr/DEM.h"
+
+#include <pe_coupling/utility/all.h>
+
+namespace pe_sub_cycling_test
+{
+
+///////////
+// USING //
+///////////
+
+using namespace walberla;
+
+typedef boost::tuple<pe::Sphere> BodyTypeTuple ;
+
+/*!\brief test case to check functionality of sub cycling in the pe time step provided by the coupling module
+ *
+ * During this time step, currently acting forces/torques on a body are kept constant.
+ *
+ * This test first computes the resulting position offset as well as the linear and rotational velocity after a
+ * certain force has been applied to a sphere when using several 'real' pe time steps and re-applying the force 10 times.
+ *
+ * It then checks the pe time step sub-cycling functionality of the coupling module by creating same-sized spheres
+ * at different locations and applying the same force. But this time, 10 sub-cycles are carried out.
+ * As a result, the same position offset and velocities must be obtained as in the regular case.
+ *
+ */
+//////////
+// MAIN //
+//////////
+int main( int argc, char **argv )
+{
+   debug::enterTestMode();
+
+   mpi::Environment env( argc, argv );
+
+   // uncomment to have logging
+   //logging::Logging::instance()->setLogLevel(logging::Logging::LogLevel::DETAIL);
+
+   const real_t dx     = real_t(1);
+   const real_t radius = real_t(5);
+
+   ///////////////////////////
+   // DATA STRUCTURES SETUP //
+   ///////////////////////////
+
+   Vector3<uint_t> blocksPerDirection(uint_t(3), uint_t(1), uint_t(1));
+   Vector3<uint_t> cellsPerBlock(uint_t(20), uint_t(20), uint_t(20));
+   Vector3<bool> periodicity(true, false, false);
+
+   // create fully periodic domain with refined blocks
+   auto blocks = blockforest::createUniformBlockGrid( blocksPerDirection[0], blocksPerDirection[1], blocksPerDirection[2],
+                                                      cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2],
+                                                      dx,
+                                                      0, false, false,
+                                                      periodicity[0], periodicity[1], periodicity[2],
+                                                      false );
+
+
+   // pe body storage
+   pe::SetBodyTypeIDs<BodyTypeTuple>::execute();
+   shared_ptr<pe::BodyStorage> globalBodyStorage = make_shared<pe::BodyStorage>();
+   auto bodyStorageID = blocks->addBlockData(pe::createStorageDataHandling<BodyTypeTuple>(), "Storage");
+   auto sphereMaterialID = pe::createMaterial( "sphereMat", real_t(1) , real_t(0.3), real_t(0.2), real_t(0.2), real_t(0.24), real_t(200), real_t(200), real_t(0), real_t(0) );
+
+   auto ccdID = blocks->addBlockData(pe::ccd::createHashGridsDataHandling( globalBodyStorage, bodyStorageID ), "CCD");
+   auto fcdID = blocks->addBlockData(pe::fcd::createGenericFCDDataHandling<BodyTypeTuple, pe::fcd::AnalyticCollideFunctor>(), "FCD");
+   pe::cr::DEM cr(globalBodyStorage, blocks->getBlockStoragePointer(), bodyStorageID, ccdID, fcdID, NULL);
+
+   // set up synchronization procedure
+   const real_t overlap = real_t( 1.5 ) * dx;
+   std::function<void(void)> syncCall = std::bind( pe::syncNextNeighbors<BodyTypeTuple>, boost::ref(blocks->getBlockForest()), bodyStorageID, static_cast<WcTimingTree*>(NULL), overlap, false );
+
+
+   // sphere positions for test scenarios
+   Vector3<real_t> positionInsideBlock(real_t(10), real_t(10), real_t(10));
+   Vector3<real_t> positionAtBlockBorder(real_t(19.9), real_t(10), real_t(10));
+   Vector3<real_t> positionAtBlockBorder2(real_t(20) + radius + overlap - real_t(0.1), real_t(10), real_t(10));
+
+   Vector3<real_t> testForce(real_t(2), real_t(1), real_t(0));
+   Vector3<real_t> torqueOffset = Vector3<real_t>(real_t(1), real_t(0), real_t(0));
+
+   uint_t peSubCycles( 10 );
+   real_t dtPe( real_t(10) );
+   real_t dtPeSubCycle = dtPe / real_c(peSubCycles);
+
+   pe_coupling::TimeStep timestep(blocks, bodyStorageID, cr, syncCall, dtPe, peSubCycles);
+
+   // evaluate how far the sphere will travel with a specific force applied which is the reference distance for later
+   // (depends on the chosen time integrator in the DEM and thus can not generally be computed a priori here)
+
+   Vector3<real_t> expectedPosOffset(real_t(0));
+   Vector3<real_t> expectedLinearVel(real_t(0));
+   Vector3<real_t> expectedAngularVel(real_t(0));
+   {
+
+      Vector3<real_t> startPos = positionInsideBlock;
+
+      pe::createSphere(*globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0,
+                       startPos, radius, sphereMaterialID, false, true, false);
+
+      for( uint_t t = 0; t < peSubCycles; ++t )
+      {
+         for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+         {
+            for( auto bodyIt = pe::LocalBodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt )
+            {
+               bodyIt->addForceAtPos(testForce, bodyIt->getPosition() + torqueOffset);
+            }
+         }
+         cr.timestep(dtPeSubCycle);
+         syncCall();
+      }
+
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::LocalBodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt )
+         {
+            expectedPosOffset = bodyIt->getPosition() - startPos;
+            expectedLinearVel = bodyIt->getLinearVel();
+            expectedAngularVel = bodyIt->getAngularVel();
+         }
+      }
+
+      mpi::allReduceInplace(expectedPosOffset[0], mpi::SUM);
+      mpi::allReduceInplace(expectedPosOffset[1], mpi::SUM);
+      mpi::allReduceInplace(expectedPosOffset[2], mpi::SUM);
+      mpi::allReduceInplace(expectedLinearVel[0], mpi::SUM);
+      mpi::allReduceInplace(expectedLinearVel[1], mpi::SUM);
+      mpi::allReduceInplace(expectedLinearVel[2], mpi::SUM);
+      mpi::allReduceInplace(expectedAngularVel[0], mpi::SUM);
+      mpi::allReduceInplace(expectedAngularVel[1], mpi::SUM);
+      mpi::allReduceInplace(expectedAngularVel[2], mpi::SUM);
+
+      WALBERLA_LOG_DEVEL_ON_ROOT(" - expecting position offset: " << expectedPosOffset);
+      WALBERLA_LOG_DEVEL_ON_ROOT(" - expecting linear vel: " << expectedLinearVel);
+      WALBERLA_LOG_DEVEL_ON_ROOT(" - expecting angular vel: " << expectedAngularVel);
+
+      // clean up
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            bodyIt->markForDeletion();
+         }
+      }
+      syncCall();
+
+   }
+
+   //////////////////
+   // Inside block //
+   //////////////////
+   {
+      std::string testIdentifier("Test: sphere inside block");
+      WALBERLA_LOG_DEVEL_ON_ROOT(testIdentifier << " - started");
+
+      Vector3<real_t> startPos = positionInsideBlock;
+
+      pe::createSphere(*globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0,
+                       startPos, radius, sphereMaterialID, false, true, false);
+
+      syncCall();
+
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::LocalBodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt )
+         {
+            auto pos = bodyIt->getPosition();
+            bodyIt->addForceAtPos(testForce, pos+torqueOffset);
+         }
+      }
+
+      timestep();
+
+
+      Vector3<real_t> curPosOffset(real_t(0));
+      Vector3<real_t> curLinearVel(real_t(0));
+      Vector3<real_t> curAngularVel(real_t(0));
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::LocalBodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt )
+         {
+            curPosOffset = bodyIt->getPosition() - startPos;
+            curLinearVel = bodyIt->getLinearVel();
+            curAngularVel = bodyIt->getAngularVel();
+
+            WALBERLA_CHECK_FLOAT_EQUAL(curPosOffset[0], expectedPosOffset[0], "Mismatch in posOffset0");
+            WALBERLA_CHECK_FLOAT_EQUAL(curPosOffset[1], expectedPosOffset[1], "Mismatch in posOffset1");
+            WALBERLA_CHECK_FLOAT_EQUAL(curPosOffset[2], expectedPosOffset[2], "Mismatch in posOffset2");
+
+            WALBERLA_CHECK_FLOAT_EQUAL(curLinearVel[0], expectedLinearVel[0], "Mismatch in linearVel0");
+            WALBERLA_CHECK_FLOAT_EQUAL(curLinearVel[1], expectedLinearVel[1], "Mismatch in linearVel1");
+            WALBERLA_CHECK_FLOAT_EQUAL(curLinearVel[2], expectedLinearVel[2], "Mismatch in linearVel2");
+
+            WALBERLA_CHECK_FLOAT_EQUAL(curAngularVel[0], expectedAngularVel[0], "Mismatch in angularVel0");
+            WALBERLA_CHECK_FLOAT_EQUAL(curAngularVel[1], expectedAngularVel[1], "Mismatch in angularVel1");
+            WALBERLA_CHECK_FLOAT_EQUAL(curAngularVel[2], expectedAngularVel[2], "Mismatch in angularVel2");
+
+
+         }
+      }
+
+      // clean up
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            bodyIt->markForDeletion();
+         }
+      }
+      syncCall();
+
+      WALBERLA_LOG_DEVEL_ON_ROOT(testIdentifier << " - ended");
+
+   }
+
+   ///////////////////////
+   // At block border 1 //
+   ///////////////////////
+   {
+      std::string testIdentifier("Test: sphere at block border 1");
+      WALBERLA_LOG_DEVEL_ON_ROOT(testIdentifier << " - started");
+
+      Vector3<real_t> startPos = positionAtBlockBorder;
+
+      pe::createSphere(*globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0,
+                       startPos, radius, sphereMaterialID, false, true, false);
+
+      syncCall();
+
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::LocalBodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt )
+         {
+            auto pos = bodyIt->getPosition();
+            bodyIt->addForceAtPos(testForce, pos+torqueOffset);
+         }
+      }
+
+      timestep();
+
+
+      Vector3<real_t> curPosOffset(real_t(0));
+      Vector3<real_t> curLinearVel(real_t(0));
+      Vector3<real_t> curAngularVel(real_t(0));
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::LocalBodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt )
+         {
+            curPosOffset = bodyIt->getPosition() - startPos;
+            curLinearVel = bodyIt->getLinearVel();
+            curAngularVel = bodyIt->getAngularVel();
+
+            WALBERLA_CHECK_FLOAT_EQUAL(curPosOffset[0], expectedPosOffset[0], "Mismatch in posOffset0");
+            WALBERLA_CHECK_FLOAT_EQUAL(curPosOffset[1], expectedPosOffset[1], "Mismatch in posOffset1");
+            WALBERLA_CHECK_FLOAT_EQUAL(curPosOffset[2], expectedPosOffset[2], "Mismatch in posOffset2");
+
+            WALBERLA_CHECK_FLOAT_EQUAL(curLinearVel[0], expectedLinearVel[0], "Mismatch in linearVel0");
+            WALBERLA_CHECK_FLOAT_EQUAL(curLinearVel[1], expectedLinearVel[1], "Mismatch in linearVel1");
+            WALBERLA_CHECK_FLOAT_EQUAL(curLinearVel[2], expectedLinearVel[2], "Mismatch in linearVel2");
+
+            WALBERLA_CHECK_FLOAT_EQUAL(curAngularVel[0], expectedAngularVel[0], "Mismatch in angularVel0");
+            WALBERLA_CHECK_FLOAT_EQUAL(curAngularVel[1], expectedAngularVel[1], "Mismatch in angularVel1");
+            WALBERLA_CHECK_FLOAT_EQUAL(curAngularVel[2], expectedAngularVel[2], "Mismatch in angularVel2");
+
+         }
+      }
+
+      // clean up
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            bodyIt->markForDeletion();
+         }
+      }
+      syncCall();
+
+      WALBERLA_LOG_DEVEL_ON_ROOT(testIdentifier << " - ended");
+
+   }
+
+   ////////////////////////////
+   // At block border 1, mod //
+   ////////////////////////////
+   {
+      std::string testIdentifier("Test: sphere at block border 1 mod");
+      WALBERLA_LOG_DEVEL_ON_ROOT(testIdentifier << " - started");
+
+      Vector3<real_t> startPos = positionAtBlockBorder;
+
+      pe::createSphere(*globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0,
+                       startPos, radius, sphereMaterialID, false, true, false);
+
+      syncCall();
+
+      // also add on shadow copy, but only half of it to have same total force/torque
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            auto pos = bodyIt->getPosition();
+            bodyIt->addForceAtPos(real_t(0.5)*testForce, pos+torqueOffset);
+         }
+      }
+
+      timestep();
+
+
+      Vector3<real_t> curPosOffset(real_t(0));
+      Vector3<real_t> curLinearVel(real_t(0));
+      Vector3<real_t> curAngularVel(real_t(0));
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::LocalBodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt )
+         {
+            curPosOffset = bodyIt->getPosition() - startPos;
+            curLinearVel = bodyIt->getLinearVel();
+            curAngularVel = bodyIt->getAngularVel();
+
+            WALBERLA_CHECK_FLOAT_EQUAL(curPosOffset[0], expectedPosOffset[0], "Mismatch in posOffset0");
+            WALBERLA_CHECK_FLOAT_EQUAL(curPosOffset[1], expectedPosOffset[1], "Mismatch in posOffset1");
+            WALBERLA_CHECK_FLOAT_EQUAL(curPosOffset[2], expectedPosOffset[2], "Mismatch in posOffset2");
+
+            WALBERLA_CHECK_FLOAT_EQUAL(curLinearVel[0], expectedLinearVel[0], "Mismatch in linearVel0");
+            WALBERLA_CHECK_FLOAT_EQUAL(curLinearVel[1], expectedLinearVel[1], "Mismatch in linearVel1");
+            WALBERLA_CHECK_FLOAT_EQUAL(curLinearVel[2], expectedLinearVel[2], "Mismatch in linearVel2");
+
+            WALBERLA_CHECK_FLOAT_EQUAL(curAngularVel[0], expectedAngularVel[0], "Mismatch in angularVel0");
+            WALBERLA_CHECK_FLOAT_EQUAL(curAngularVel[1], expectedAngularVel[1], "Mismatch in angularVel1");
+            WALBERLA_CHECK_FLOAT_EQUAL(curAngularVel[2], expectedAngularVel[2], "Mismatch in angularVel2");
+
+         }
+      }
+
+      // clean up
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            bodyIt->markForDeletion();
+         }
+      }
+      syncCall();
+
+      WALBERLA_LOG_DEVEL_ON_ROOT(testIdentifier << " - ended");
+
+   }
+
+   ///////////////////////
+   // At block border 2 //
+   ///////////////////////
+   {
+      std::string testIdentifier("Test: sphere at block border 2");
+      WALBERLA_LOG_DEVEL_ON_ROOT(testIdentifier << " - started");
+
+      Vector3<real_t> startPos = positionAtBlockBorder2;
+
+      pe::createSphere(*globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0,
+                       startPos, radius, sphereMaterialID, false, true, false);
+
+      syncCall();
+
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::LocalBodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt )
+         {
+            auto pos = bodyIt->getPosition();
+            bodyIt->addForceAtPos(testForce, pos+torqueOffset);
+         }
+      }
+
+      timestep();
+
+
+      Vector3<real_t> curPosOffset(real_t(0));
+      Vector3<real_t> curLinearVel(real_t(0));
+      Vector3<real_t> curAngularVel(real_t(0));
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::LocalBodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt )
+         {
+            curPosOffset = bodyIt->getPosition() - startPos;
+            curLinearVel = bodyIt->getLinearVel();
+            curAngularVel = bodyIt->getAngularVel();
+
+            WALBERLA_CHECK_FLOAT_EQUAL(curPosOffset[0], expectedPosOffset[0], "Mismatch in posOffset0");
+            WALBERLA_CHECK_FLOAT_EQUAL(curPosOffset[1], expectedPosOffset[1], "Mismatch in posOffset1");
+            WALBERLA_CHECK_FLOAT_EQUAL(curPosOffset[2], expectedPosOffset[2], "Mismatch in posOffset2");
+
+            WALBERLA_CHECK_FLOAT_EQUAL(curLinearVel[0], expectedLinearVel[0], "Mismatch in linearVel0");
+            WALBERLA_CHECK_FLOAT_EQUAL(curLinearVel[1], expectedLinearVel[1], "Mismatch in linearVel1");
+            WALBERLA_CHECK_FLOAT_EQUAL(curLinearVel[2], expectedLinearVel[2], "Mismatch in linearVel2");
+
+            WALBERLA_CHECK_FLOAT_EQUAL(curAngularVel[0], expectedAngularVel[0], "Mismatch in angularVel0");
+            WALBERLA_CHECK_FLOAT_EQUAL(curAngularVel[1], expectedAngularVel[1], "Mismatch in angularVel1");
+            WALBERLA_CHECK_FLOAT_EQUAL(curAngularVel[2], expectedAngularVel[2], "Mismatch in angularVel2");
+
+         }
+      }
+
+      // clean up
+      for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+      {
+         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt )
+         {
+            bodyIt->markForDeletion();
+         }
+      }
+      syncCall();
+
+      WALBERLA_LOG_DEVEL_ON_ROOT(testIdentifier << " - ended");
+
+   }
+
+   return 0;
+
+}
+
+} //namespace pe_sub_cycling_test
+
+int main( int argc, char **argv ){
+   pe_sub_cycling_test::main(argc, argv);
+}