diff --git a/src/pe_coupling/utility/TimeStep.h b/src/pe_coupling/utility/TimeStep.h
index d89e0a6e7aff337ba570c252e605fe4d006c8e9c..f32c306142dbe01c904f7e081e8aa49dd2cd3455 100644
--- a/src/pe_coupling/utility/TimeStep.h
+++ b/src/pe_coupling/utility/TimeStep.h
@@ -35,7 +35,7 @@
 #include <functional>
 
 #include <map>
-
+#include <array>
 
 namespace walberla {
 namespace pe_coupling {
@@ -88,43 +88,25 @@ public:
          // Since they are reset after the call to collision response, they have to be stored explicitly before.
          // Then they are set again after each intermediate step.
 
-         // sum up all forces/torques from shadow copies on local body (owner)
-         pe::reduceForces( blockStorage_->getBlockStorage(), bodyStorageID_ );
-         // send total forces/torques to shadow owners
-         pe::distributeForces( blockStorage_->getBlockStorage(), bodyStorageID_ );
-
          // generate map from all known bodies (process local) to total forces/torques
-         std::map< walberla::id_t, std::vector< real_t > > forceMap;
+         // this has to be done on a block-local basis, since the same body could reside on several blocks from this process
+         typedef domain_decomposition::IBlockID::IDType BlockID_T;
+         std::map< BlockID_T, std::map< walberla::id_t, std::array< real_t, 6 > > > forceTorqueMap;
+
          for( auto blockIt = blockStorage_->begin(); blockIt != blockStorage_->end(); ++blockIt )
          {
+            BlockID_T blockID = blockIt->getId().getID();
+            auto& blockLocalForceTorqueMap = forceTorqueMap[blockID];
+
             // iterate over local and remote bodies and store force/torque in map
-            // Remote bodies are required since during the then following collision response time steps,
-            // bodies can change ownership (i.e. a now remote body could become a local body ).
-            // Since only the owning process re-sets the force/torque later, remote body values have to be stored as well.
             for( auto bodyIt = pe::BodyIterator::begin(*blockIt, bodyStorageID_); bodyIt != pe::BodyIterator::end(); ++bodyIt )
             {
-               auto & f = forceMap[ bodyIt->getSystemID() ];
+               auto & f = blockLocalForceTorqueMap[ bodyIt->getSystemID() ];
 
-               // only add if body has not been added already before (from another block)
-               if( f.empty() )
-               {
-                  const auto & force = bodyIt->getForce();
-                  f.push_back( force[0] );
-                  f.push_back( force[1] );
-                  f.push_back( force[2] );
-
-                  const auto & torque = bodyIt->getTorque();
-                  f.push_back( torque[0] );
-                  f.push_back( torque[1] );
-                  f.push_back( torque[2] );
-               }
+               const auto & force = bodyIt->getForce();
+               const auto & torque = bodyIt->getTorque();
 
-               // reset of force/torque on remote bodies necessary to erase the multiple occurrences of forces/torques on bodies
-               // (due to call to distributeForces() before)
-               if ( bodyIt->isRemote() )
-               {
-                  bodyIt->resetForceAndTorque();
-               }
+               f = {{force[0], force[1], force[2], torque[0], torque[1], torque[2] }};
             }
          }
 
@@ -133,18 +115,26 @@ public:
          for( uint_t i = 0; i != numberOfSubIterations_; ++i )
          {
 
-            // in the first iteration, forces on local bodies are already set by force synchronization
+            // in the first iteration, forces are already set
             if( i != 0 )
             {
                for( auto blockIt = blockStorage_->begin(); blockIt != blockStorage_->end(); ++blockIt )
                {
-                  // only owning process sets force/torque on bodies
-                  for( auto bodyIt = pe::LocalBodyIterator::begin(*blockIt, bodyStorageID_); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt )
+                  BlockID_T blockID = blockIt->getId().getID();
+                  auto& blockLocalForceTorqueMap = forceTorqueMap[blockID];
+
+                  // re-set stored force/torque on bodies
+                  for( auto bodyIt = pe::BodyIterator::begin(*blockIt, bodyStorageID_); bodyIt != pe::BodyIterator::end(); ++bodyIt )
                   {
-                     const auto & f = forceMap[ bodyIt->getSystemID() ];
-                     WALBERLA_ASSERT( !f.empty(), "When attempting to set force/torque on local body " << bodyIt->getSystemID() << " at position " << bodyIt->getPosition() << ", body was not found in map!");
-                     bodyIt->addForce ( f[0], f[1], f[2] );
-                     bodyIt->addTorque( f[3], f[4], f[5] );
+
+                     const auto f = blockLocalForceTorqueMap.find( bodyIt->getSystemID() );
+
+                     if( f != blockLocalForceTorqueMap.end() )
+                     {
+                        const auto & ftValues = f->second;
+                        bodyIt->addForce ( ftValues[0], ftValues[1], ftValues[2] );
+                        bodyIt->addTorque( ftValues[3], ftValues[4], ftValues[5] );
+                     }
                   }
                }
             }