diff --git a/python/mesa_pd.py b/python/mesa_pd.py
index 401fc3bc7b00f0870367758a68db435e5afa727c..7e807749289873c6d30073805d73b8c32f86f6d5 100755
--- a/python/mesa_pd.py
+++ b/python/mesa_pd.py
@@ -90,7 +90,11 @@ if __name__ == '__main__':
    kernels.append( kernel.ExplicitEuler() )
    kernels.append( kernel.ExplicitEulerWithShape() )
    kernels.append( kernel.ForceLJ() )
+   kernels.append( kernel.HCSITSRelaxationStep() )
    kernels.append( kernel.HeatConduction() )
+   kernels.append( kernel.InitParticlesForHCSITS() )
+   kernels.append( kernel.InitContactsForHCSITS() )
+   kernels.append( kernel.IntegrateParticlesHCSITS() )
    kernels.append( kernel.InsertParticleIntoLinkedCells() )
    kernels.append( kernel.LinearSpringDashpot() )
    kernels.append( kernel.NonLinearSpringDashpot() )
diff --git a/python/mesa_pd/kernel/HCSITSRelaxationStep.py b/python/mesa_pd/kernel/HCSITSRelaxationStep.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb80d20ac32ea60971076fa75d351a7110fa5d84
--- /dev/null
+++ b/python/mesa_pd/kernel/HCSITSRelaxationStep.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+
+from mesa_pd.accessor import Accessor
+from ..Container import Container
+from mesa_pd.utility import generateFile
+
+class HCSITSRelaxationStep(Container):
+   def __init__(self):
+      super().__init__()
+      self.addProperty("maxSubIterations", "size_t", defValue = "20")
+      self.addProperty("relaxationModel", "RelaxationModel", defValue = "InelasticFrictionlessContact")
+      self.addProperty("deltaMax", "real_t", defValue = "0")
+      self.addProperty("cor", "real_t", defValue = "real_t(0.2)")
+
+      self.accessor = Accessor()
+      self.accessor.require("uid",             "walberla::id_t",                                    access="g")
+      self.accessor.require("position",        "walberla::mesa_pd::Vec3",                           access="g")
+      self.accessor.require("linearVelocity",  "walberla::mesa_pd::Vec3",                           access="g")
+      self.accessor.require("angularVelocity", "walberla::mesa_pd::Vec3",                           access="g")
+      self.accessor.require("invMass",         "walberla::real_t",                                  access="g" )
+      self.accessor.require("invInertia",      "walberla::mesa_pd::Mat3",                           access="g" )
+      self.accessor.require("dv",              "walberla::mesa_pd::Vec3",                           access="gr" )
+      self.accessor.require("dw",              "walberla::mesa_pd::Vec3",                           access="gr" )
+
+   def getRequirements(self):
+      return self.accessor
+
+   def generate(self, path):
+      context = dict()
+      context["properties"]      = self.properties
+      context["interface"]        = self.accessor.properties
+      generateFile(path, 'kernel/HCSITSRelaxationStep.templ.h', context)
diff --git a/python/mesa_pd/kernel/InitContactsForHCSITS.py b/python/mesa_pd/kernel/InitContactsForHCSITS.py
new file mode 100644
index 0000000000000000000000000000000000000000..54da9536a4a2f3569a429357abd02bd7466974a3
--- /dev/null
+++ b/python/mesa_pd/kernel/InitContactsForHCSITS.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+
+from mesa_pd.accessor import Accessor
+from ..Container import Container
+from mesa_pd.utility import generateFile
+
+class InitContactsForHCSITS(Container):
+   def __init__(self):
+      super().__init__()
+      self.addProperty("erp", "real_t", defValue = "real_t(0.8)")
+      self.addProperty("maximumPenetration", "real_t", defValue ="0")
+
+      self.paccessor = Accessor()
+      self.paccessor.require("uid",             "walberla::id_t",                                    access="g")
+      self.paccessor.require("position",        "walberla::mesa_pd::Vec3",                           access="g")
+      self.paccessor.require("invInertia",      "walberla::mesa_pd::Mat3",                           access="g" )
+      self.paccessor.require("invMass",         "walberla::real_t",                                  access="g" )
+
+
+   def getRequirements(self):
+      return self.paccessor
+
+   def generate(self, path):
+      context = dict()
+      context["properties"]      = self.properties
+      context["material_parameters"] = ["friction"]
+      context["interface"]        = self.paccessor.properties
+      generateFile(path, 'kernel/InitContactsForHCSITS.templ.h', context)
diff --git a/python/mesa_pd/kernel/InitParticlesForHCSITS.py b/python/mesa_pd/kernel/InitParticlesForHCSITS.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca0148cab20c55575aff787611076319ac2fe121
--- /dev/null
+++ b/python/mesa_pd/kernel/InitParticlesForHCSITS.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+
+from mesa_pd.accessor import Accessor
+from ..Container import Container
+from mesa_pd.utility import generateFile
+
+class InitParticlesForHCSITS(Container):
+   def __init__(self):
+      super().__init__()
+      self.addProperty("globalAcceleration", "walberla::mesa_pd::Vec3", defValue = "0")
+
+      self.accessor = Accessor()
+      self.accessor.require("uid",             "walberla::id_t",                                    access="g")
+      self.accessor.require("linearVelocity",  "walberla::mesa_pd::Vec3",                           access="gr")
+      self.accessor.require("angularVelocity", "walberla::mesa_pd::Vec3",                           access="gr")
+      self.accessor.require("invMass",         "walberla::real_t",                                  access="g" )
+      self.accessor.require("inertia",         "walberla::mesa_pd::Mat3",                           access="g" )
+      self.accessor.require("invInertia",      "walberla::mesa_pd::Mat3",                           access="g" )
+      self.accessor.require("dv",              "walberla::mesa_pd::Vec3",                           access="gr" )
+      self.accessor.require("dw",              "walberla::mesa_pd::Vec3",                           access="gr" )
+      self.accessor.require("torque",          "walberla::mesa_pd::Vec3",                           access="r" )
+      self.accessor.require("force",           "walberla::mesa_pd::Vec3",                           access="r" )
+
+   def getRequirements(self):
+      return self.accessor
+
+   def generate(self, path):
+      context = dict()
+      context["properties"]      = self.properties
+      context["interface"]       = self.accessor.properties
+      generateFile(path, 'kernel/InitParticlesForHCSITS.templ.h', context)
diff --git a/python/mesa_pd/kernel/IntegrateParticlesHCSITS.py b/python/mesa_pd/kernel/IntegrateParticlesHCSITS.py
new file mode 100644
index 0000000000000000000000000000000000000000..b869d5decd30a3cf05a58981544d1309545f93f7
--- /dev/null
+++ b/python/mesa_pd/kernel/IntegrateParticlesHCSITS.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+
+from mesa_pd.accessor import Accessor
+from ..Container import Container
+from mesa_pd.utility import generateFile
+
+class IntegrateParticlesHCSITS(Container):
+   def __init__(self):
+      super().__init__()
+      self.addProperty("speedLimiterActive", "bool", defValue = "false")
+      self.addProperty("speedLimitFactor",    "real_t", defValue ="real_t(1.0)")
+
+      self.accessor = Accessor()
+      self.accessor.require("uid",              "walberla::id_t",                                   access="g")
+      self.accessor.require("position",         "walberla::mesa_pd::Vec3",                          access="gr")
+      self.accessor.require("rotation",         "walberla::mesa_pd::Rot3",                          access="gr")
+      self.accessor.require("linearVelocity",   "walberla::mesa_pd::Vec3",                          access="gr")
+      self.accessor.require("angularVelocity",  "walberla::mesa_pd::Vec3",                          access="gr")
+      self.accessor.require("dv",               "walberla::mesa_pd::Vec3",                          access="g" )
+      self.accessor.require("dw",               "walberla::mesa_pd::Vec3",                          access="g" )
+      self.accessor.require("flags",            "walberla::mesa_pd::data::particle_flags::FlagT",   access="g")
+
+   def getRequirements(self):
+      return self.accessor
+
+   def generate(self, path):
+      context = dict()
+      context["properties"]      = self.properties
+      context["interface"]       = self.accessor.properties
+      generateFile(path, 'kernel/IntegrateParticlesHCSITS.templ.h', context)
diff --git a/python/mesa_pd/kernel/__init__.py b/python/mesa_pd/kernel/__init__.py
index 9ce98cad54556762bed4c4abd4c6798fa10b7694..28d7b5a1bc72c401ce59cabc3338fda64b1d3a91 100644
--- a/python/mesa_pd/kernel/__init__.py
+++ b/python/mesa_pd/kernel/__init__.py
@@ -4,7 +4,11 @@ from .DoubleCast import DoubleCast
 from .ExplicitEuler import ExplicitEuler
 from .ExplicitEulerWithShape import ExplicitEulerWithShape
 from .ForceLJ import ForceLJ
+from .HCSITSRelaxationStep import HCSITSRelaxationStep
 from .HeatConduction import HeatConduction
+from .InitParticlesForHCSITS import InitParticlesForHCSITS
+from .InitContactsForHCSITS import InitContactsForHCSITS
+from .IntegrateParticlesHCSITS import IntegrateParticlesHCSITS
 from .InsertParticleIntoLinkedCells import InsertParticleIntoLinkedCells
 from .LinearSpringDashpot import LinearSpringDashpot
 from .NonLinearSpringDashpot import NonLinearSpringDashpot
@@ -19,7 +23,11 @@ __all__ = ['DoubleCast',
            'ExplicitEuler',
            'ExplicitEulerWithShape',
            'ForceLJ',
+           'HCSITSRelaxationStep',
            'HeatConduction',
+           'InitParticlesForHCSITS',
+           'InitContactsForHCSITS',
+           'IntegrateParticlesHCSITS',
            'InsertParticleIntoLinkedCells',
            'LinearSpringDashpot',
            'NonLinearSpringDashpot',
diff --git a/python/mesa_pd/templates/data/ContactAccessor.templ.h b/python/mesa_pd/templates/data/ContactAccessor.templ.h
index 50190bf636f3e81461634778c161d5abaafc6554..b51f5de7fd5f4f21d7f58f504da39043a652aeb0 100644
--- a/python/mesa_pd/templates/data/ContactAccessor.templ.h
+++ b/python/mesa_pd/templates/data/ContactAccessor.templ.h
@@ -26,7 +26,7 @@
 
 #pragma once
 
-#include <mesa_pd/data/IAccessor.h>
+#include <mesa_pd/data/IContactAccessor.h>
 #include <mesa_pd/data/ContactStorage.h>
 
 #include <core/UniqueID.h>
@@ -43,7 +43,7 @@ namespace data {
  * Provides get, set and getRef for all members of the ContactStorage.
  * Can be used as a basis class for a more advanced ContactAccessor.
  */
-class ContactAccessor : public IAccessor
+class ContactAccessor : public IContactAccessor
 {
 public:
    ContactAccessor(const std::shared_ptr<data::ContactStorage>& ps) : ps_(ps) {}
diff --git a/python/mesa_pd/templates/kernel/HCSITSRelaxationStep.templ.h b/python/mesa_pd/templates/kernel/HCSITSRelaxationStep.templ.h
new file mode 100644
index 0000000000000000000000000000000000000000..1523a57b348524dba5dd3ce1edba892cb19d912c
--- /dev/null
+++ b/python/mesa_pd/templates/kernel/HCSITSRelaxationStep.templ.h
@@ -0,0 +1,1049 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file HCSITSRelaxationStep.h
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//! \author Tobias Leemann <tobias.leemann@fau.de>
+//
+//======================================================================================================================
+
+//======================================================================================================================
+//
+//  THIS FILE IS GENERATED - PLEASE CHANGE THE TEMPLATE !!!
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <mesa_pd/common/ParticleFunctions.h>
+#include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/data/IAccessor.h>
+#include <mesa_pd/data/IContactAccessor.h>
+#include <mesa_pd/data/Flags.h>
+#include <core/logging/Logging.h>
+#include <core/math/Constants.h>
+#include <core/math/Limits.h>
+
+#include <vector>
+
+namespace walberla {
+namespace mesa_pd {
+namespace kernel {
+/**
+ * Apply a HCSITS (Hard-contact semi-implicit timestepping solver) relaxation to all contacts.
+ * Call this kernel on all bodies to perform on relaxation step of the solver.
+ * There exist different friction and relaxation models. See comments on their respective methods
+ * for an extended documentation. Use setRelaxationModel() to set it.
+ * getDeltaMax() can be used to query the maximum change in the impulses applied wrt to the last iteration.
+ *
+ * Use setMaxSubIterations() to set the maximum number of iterations performed to solve optimization problems arising
+ * during the relaxation (in InelasticGeneralizedMaximumDissipationContact and InelasticCoulombContactByDecoupling).
+ *
+ * Use setCor() to set the coefficient of restitution between 0 and 1 for the InelasticProjectedGaussSeidel model.
+ * All other models describe inelastic contacts.
+ *
+ * Example of Using the HCSITS kernels in a simulation:
+ * \code
+ *     // ps: Particle storage
+ *     // cs: Contact storage
+ *     // pa: Particle accessor
+ *     // ca: Contact accessor
+ *
+ *     // Detect contacts first and fill contact storage
+ *
+ *     mesa_pd::mpi::ReduceProperty reductionKernel;
+ *     mesa_pd::mpi::BroadcastProperty broadcastKernel;
+ *
+ *     kernel::IntegrateBodiesHCSITS integration;
+ *
+ *     kernel::InitContactsForHCSITS initContacts(1);
+ *     initContacts.setFriction(0,0,real_t(0.2));
+ *     kernel::InitBodiesForHCSITS initBodies;
+ *
+ *     kernel::HCSITSRelaxationStep relaxationStep;
+ *     relaxationStep.setCor(real_t(0.1)); // Only effective for PGSM
+ *
+ *     // Init Contacts and Bodies (order is arbitrary)
+ *     cs.forEachContact(false, kernel::SelectAll(), ca, initContacts, ca, pa);
+ *     ps.forEachParticle(false, kernel::SelectAll(), pa, initBodies, pa, dt);
+ *
+ *     // Reduce and Broadcast velocities with relaxation parameter 1 before the iteration
+ *     VelocityUpdateNotification::Parameters::relaxationParam = real_t(1.0);
+ *     reductionKernel.operator()<VelocityCorrectionNotification>(*ps);
+ *     broadcastKernel.operator()<VelocityUpdateNotification>(*ps);
+ *
+ *     VelocityUpdateNotification::Parameters::relaxationParam = real_t(0.8);
+ *     for(int j = 0; j < 10; j++){
+ *        cs.forEachContact(false, kernel::SelectAll(), ca, relaxationStep, ca, pa, dt);
+ *        reductionKernel.operator()<VelocityCorrectionNotification>(*ps);
+ *        broadcastKernel.operator()<VelocityUpdateNotification>(*ps);
+ *     }
+ *
+ *     ps.forEachParticle(false, kernel::SelectAll(), pa, integration, pa, dt);
+ * \endcode
+ * \ingroup mesa_pd_kernel
+ *
+ *
+ */
+class HCSITSRelaxationStep
+{
+public:
+   enum RelaxationModel {
+      InelasticFrictionlessContact,
+      ApproximateInelasticCoulombContactByDecoupling,
+      ApproximateInelasticCoulombContactByOrthogonalProjections,
+      InelasticCoulombContactByDecoupling,
+      InelasticCoulombContactByOrthogonalProjections,
+      InelasticGeneralizedMaximumDissipationContact,
+      InelasticProjectedGaussSeidel
+   };
+   
+   // Default constructor sets the default values
+   HCSITSRelaxationStep() :
+   {%- for prop in properties %}
+   {{prop.name}}_( {{prop.defValue}} ){{ "," if not loop.last}}
+   {%- endfor %}
+   {}
+
+   // Getter and Setter Functions
+   {%- for prop in properties %}
+   inline const {{prop.type}}& get{{prop.name | capFirst}}() const {return {{prop.name}}_;}
+   inline void set{{prop.name | capFirst}}({{prop.type}} v) { {{prop.name}}_ = v;}
+   {% endfor %}
+
+   /**
+    * Perform relaxation for a single contact.
+    * \param cid The index of the contact with the accessor ca.
+    * \param ca The contact accessor.
+    * \param pa The particle accessor.
+    * \param dt The timestep size used.
+    */
+   template <typename CAccessor, typename PAccessor>
+   void operator()(size_t cid, CAccessor &ca, PAccessor& pa,  real_t dt);
+
+   template <typename CAccessor, typename PAccessor>
+   real_t relaxInelasticFrictionlessContacts(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa);
+
+   template <typename CAccessor, typename PAccessor>
+   real_t relaxApproximateInelasticCoulombContactsByDecoupling(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa);
+
+   template <typename CAccessor, typename PAccessor>
+   real_t relaxInelasticCoulombContactsByDecoupling(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa );
+
+   template <typename CAccessor, typename PAccessor>
+   real_t relaxInelasticCoulombContactsByOrthogonalProjections(size_t cid, real_t dtinv, bool approximate, CAccessor &ca, PAccessor& pa );
+
+   template <typename CAccessor, typename PAccessor>
+   real_t relaxInelasticGeneralizedMaximumDissipationContacts(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa );
+
+   template <typename CAccessor, typename PAccessor>
+   real_t relaxInelasticContactsByProjectedGaussSeidel(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa );
+
+private:
+   // List properties
+   {% for prop in properties %}
+   {{prop.type}} {{prop.name }}_;
+   {%- endfor %}
+};
+
+
+template <typename CAccessor, typename PAccessor>
+inline void HCSITSRelaxationStep::operator()(size_t cid, CAccessor &ca, PAccessor& pa,  real_t dt)
+{
+   static_assert(std::is_base_of<data::IContactAccessor, CAccessor>::value, "please provide a valid contact accessor");
+   static_assert(std::is_base_of<data::IAccessor, PAccessor>::value, "please provide a valid accessor");
+
+   real_t dtinv(real_t(1.0)/dt);
+   switch( getRelaxationModel() ) {
+      case InelasticFrictionlessContact:
+         setDeltaMax(std::max( getDeltaMax(), relaxInelasticFrictionlessContacts(cid, dtinv, ca, pa )));
+         break;
+
+      case ApproximateInelasticCoulombContactByDecoupling:
+         setDeltaMax(std::max( getDeltaMax(), relaxApproximateInelasticCoulombContactsByDecoupling( cid, dtinv, ca, pa )));
+         break;
+
+      case ApproximateInelasticCoulombContactByOrthogonalProjections:
+         setDeltaMax(std::max( getDeltaMax(), relaxInelasticCoulombContactsByOrthogonalProjections( cid, dtinv, true, ca, pa )));
+         break;
+
+      case InelasticCoulombContactByDecoupling:
+         setDeltaMax(std::max( getDeltaMax(), relaxInelasticCoulombContactsByDecoupling( cid, dtinv, ca, pa )));
+         break;
+
+      case InelasticCoulombContactByOrthogonalProjections:
+         setDeltaMax(std::max( getDeltaMax(), relaxInelasticCoulombContactsByOrthogonalProjections( cid, dtinv, false, ca, pa )));
+         break;
+
+      case InelasticGeneralizedMaximumDissipationContact:
+         setDeltaMax(std::max( getDeltaMax(), relaxInelasticGeneralizedMaximumDissipationContacts( cid, dtinv, ca, pa )));
+         break;
+
+      case InelasticProjectedGaussSeidel:
+         setDeltaMax(std::max( getDeltaMax(), relaxInelasticContactsByProjectedGaussSeidel( cid, dtinv, ca, pa )));
+         break;
+
+      default:
+         throw std::runtime_error( "Unsupported relaxation model." );
+}
+
+WALBERLA_LOG_DETAIL("Delta Max: " << getDeltaMax());
+
+}
+
+//*************************************************************************************************
+/*!\brief Relaxes The contact with ID cid once. The contact model is for inelastic unilateral contacts without friction.
+ *
+ * \param cid The index of the contact
+ * \param ca The contact accessor
+ * \param pa The particle accessor
+ * \return The largest variation of contact impulses in the L-infinity norm.
+ *
+ * This function is to be called from resolveContacts(). Separating contacts are preferred over
+ * persisting solutions if valid.
+ */
+template <typename CAccessor, typename PAccessor>
+inline real_t HCSITSRelaxationStep::relaxInelasticFrictionlessContacts(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa)
+{
+   real_t delta_max( 0 );
+
+   // Remove velocity corrections of this contact's reaction.
+   size_t bId1 = ca.getId1(cid);
+   size_t bId2 = ca.getId2(cid);
+   pa.getDvRef(bId1) -= pa.getInvMass(bId1) * ca.getP(cid);
+   pa.getDwRef(bId1) -= pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+   pa.getDvRef(bId2) += pa.getInvMass(bId2) * ca.getP(cid);
+   pa.getDwRef(bId2) += pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+
+   // Calculate the relative contact VELOCITY in the global world frame (if no contact reaction is present at contact i)
+   Vec3 gdot    ( ( pa.getLinearVelocity(bId1)     + pa.getDv(bId1) ) -
+                  ( pa.getLinearVelocity(bId2)     + pa.getDv(bId2) ) +
+                  ( pa.getAngularVelocity(bId1)    + pa.getDw(bId1) ) % ca.getR1(cid) -
+                  ( pa.getAngularVelocity(bId2)    + pa.getDw(bId2) ) % ca.getR2(cid) /* + diag_[i] * p */ );
+
+
+   // Change from the global world frame to the contact frame
+   Mat3 contactframe( ca.getNormal(cid), ca.getT(cid), ca.getO(cid) );
+   Vec3 gdot_nto( contactframe.getTranspose() * gdot );
+
+   // The constraint in normal direction is actually a positional constraint but instead of g_n we use g_n/dt equivalently and call it gdot_n
+   gdot_nto[0] += ( /* + trans( contactCache.n_[i] ) * ( contactCache.body1_[i]->getPosition() + contactCache.r1_[i] ) - ( contactCache.body2_[i]->getPosition() + contactCache.r2_[i] ) */ + ca.getDistance(cid) ) * dtinv;
+
+   if( gdot_nto[0] >= 0 ) {
+      // Contact is separating if no contact reaction is present at contact i.
+
+      delta_max = std::max( delta_max, std::max( std::abs( ca.getP(cid)[0] ), std::max( std::abs( ca.getP(cid)[1] ), std::abs( ca.getP(cid)[2] ) ) ) );
+      ca.getPRef(cid) = Vec3();
+
+      // No need to apply zero impulse.
+   }
+   else {
+      // Contact is persisting.
+
+      // Calculate the impulse necessary for a static contact expressed as components in the contact frame.
+      Vec3 p_wf( ca.getNormal(cid) * ( -ca.getDiag_n_inv(cid) * gdot_nto[0] ) );
+      Vec3 dp( ca.getP(cid) - p_wf );
+      delta_max = std::max( delta_max, std::max( std::abs( dp[0] ), std::max( std::abs( dp[1] ), std::abs( dp[2] ) ) ) );
+
+      ca.getPRef(cid) = p_wf;
+
+      // Apply impulse right away.
+      pa.getDvRef(bId1) += pa.getInvMass(bId1) * ca.getP(cid);
+      pa.getDwRef(bId1) += pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+      pa.getDvRef(bId2) -= pa.getInvMass(bId2) * ca.getP(cid);
+      pa.getDwRef(bId2) -= pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+   }
+
+   return delta_max;
+}
+//*************************************************************************************************
+
+
+//*************************************************************************************************
+/*!\brief Relaxes all contacts once. The contact model is for inelastic unilateral contacts with approximate Coulomb friction.
+ *
+ * \return The largest variation of contact impulses in the L-infinity norm.
+ *
+ * This function is to be called from resolveContacts(). Separating contacts are preferred over
+ * other solutions if valid. Static solutions are preferred over dynamic solutions. Dynamic
+ * solutions are computed by decoupling the normal from the frictional components. That is
+ * for a dynamic contact the normal component is relaxed first followed by the frictional
+ * components. The determination of the frictional components does not perform any subiterations
+ * and guarantees that the friction partially opposes slip.
+ */
+template <typename CAccessor, typename PAccessor>
+inline real_t HCSITSRelaxationStep::relaxApproximateInelasticCoulombContactsByDecoupling(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa )
+{
+   real_t delta_max( 0 );
+
+   // Remove velocity corrections of this contact's reaction.
+   size_t bId1 = ca.getId1(cid);
+   size_t bId2 = ca.getId2(cid);
+   pa.getDvRef(bId1) -= pa.getInvMass(bId1) * ca.getP(cid);
+   pa.getDwRef(bId1) -= pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+   pa.getDvRef(bId2) += pa.getInvMass(bId2) * ca.getP(cid);
+   pa.getDwRef(bId2) += pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+
+
+   // Calculate the relative contact VELOCITY in the global world frame (if no contact reaction is present at contact i)
+   Vec3 gdot    ( ( pa.getLinearVelocity(bId1)     + pa.getDv(bId1) ) -
+                  ( pa.getLinearVelocity(bId2)     + pa.getDv(bId2) ) +
+                  ( pa.getAngularVelocity(bId1)    + pa.getDw(bId1) ) % ca.getR1(cid) -
+                  ( pa.getAngularVelocity(bId2)    + pa.getDw(bId2) ) % ca.getR2(cid) /* + diag_[i] * p */ );
+
+   // Change from the global world frame to the contact frame
+   Mat3 contactframe( ca.getNormal(cid), ca.getT(cid), ca.getO(cid) );
+   Vec3 gdot_nto( contactframe.getTranspose() * gdot );
+
+   // The constraint in normal direction is actually a positional constraint but instead of g_n we use g_n/dt equivalently and call it gdot_n
+   gdot_nto[0] += ( /* + trans( contactCache.n_[i] ) * ( contactCache.body1_[i]->getPosition() + contactCache.r1_[i] ) - ( contactCache.body2_[i]->getPosition() + contactCache.r2_[i] ) */ + ca.getDistance(cid) ) * dtinv;
+
+
+   if( gdot_nto[0] >= 0 ) {
+      // Contact is separating if no contact reaction is present at contact i.
+      delta_max = std::max( delta_max, std::max( std::abs( ca.getP(cid)[0] ), std::max( std::abs( ca.getP(cid)[1] ), std::abs( ca.getP(cid)[2] ) ) ) );
+      ca.getPRef(cid) = Vec3();
+
+      // No need to apply zero impulse.
+   }
+   else {
+      // Contact is persisting (either static or dynamic).
+
+      // Calculate the impulse necessary for a static contact expressed as components in the contact frame.
+      //Vec3 p_cf( -( contactCache.diag_nto_inv_[i] * gdot_nto ) );
+      Vec3 p_cf( -( ca.getDiag_nto_inv(cid) * gdot_nto ) );
+
+      // Can p_cf[0] be negative even though -gdot_nto[0] > 0? Yes! Try:
+      // A = [0.5 -0.1 +0.1; -0.1 0.5 -0.1; +0.1 -0.1 1];
+      // b = [0.01 -1 -1]';
+      // A\b    \approx [-0.19 -2.28 -1.21]'
+      // eig(A) \approx [ 0.40  0.56  1.04]'
+
+      //real_t flimit( contactCache.mu_[i] * p_cf[0] );
+      real_t flimit( ca.getMu(cid) * p_cf[0] );
+      real_t fsq( p_cf[1] * p_cf[1] + p_cf[2] * p_cf[2] );
+      if( fsq > flimit * flimit || p_cf[0] < 0 ) {
+         // Contact cannot be static so it must be dynamic.
+         // => Complementarity condition on normal reaction now turns into an equation since we know that the normal reaction is definitely not zero.
+
+         // For simplicity we change to a simpler relaxation scheme here:
+         // 1. Relax normal reaction with the tangential components equal to the previous values
+         // 2. Relax tangential components with the newly relaxed normal reaction
+         // Note: The better approach would be to solve the true 3x3 block problem!
+         // Warning: Simply projecting the frictional components is wrong since then the normal action is no longer 0 and simulations break.
+
+         // Add the action of the frictional reactions from the last iteration to the relative contact velocity in normal direction so we can relax it separately.
+         // TODO This can be simplified:
+         //p_cf = trans( contactframe ) * contactCache.p_[i];
+         //p_cf[0] = 0;
+         //p_[i] = contactframe * p_cf;
+
+
+         //Vec3 p_tmp = ( contactCache.t_[i] * contactCache.p_[i] ) * contactCache.t_[i] + ( contactCache.o_[i] * contactCache.p_[i] ) * contactCache.o_[i];
+         Vec3 p_tmp = ( ca.getT(cid) * ca.getP(cid)  ) * ca.getT(cid)  + ( ca.getO(cid) * ca.getP(cid) ) * ca.getO(cid);
+
+
+         //real_t gdot_n = gdot_nto[0] + contactCache.n_[i] * ( ( contactCache.body1_[i]->getInvInertia() * ( contactCache.r1_[i] % p_tmp ) ) % contactCache.r1_[i] + ( contactCache.body2_[i]->getInvInertia() * ( contactCache.r2_[i] % p_tmp ) ) % contactCache.r2_[i] /* + diag_[i] * p */ );
+         real_t gdot_n = gdot_nto[0] + ca.getNormal(cid) * ( ( pa.getInvInertia(bId1) * ( ca.getR1(cid) % p_tmp ) ) % ca.getR1(cid) + ( pa.getInvInertia(bId2) * ( ca.getR2(cid) % p_tmp ) ) % ca.getR2(cid) /* + diag_[i] * p */ );
+
+         //p_cf[0] = -( contactCache.diag_n_inv_[i] * gdot_n );
+         p_cf[0] = -( ca.getDiag_n_inv(cid) * gdot_n );
+
+         // We cannot be sure that gdot_n <= 0 here and thus p_cf[0] >= 0 since we just modified it with the old values of the tangential reactions! => Project
+         p_cf[0] = std::max( real_c( 0 ), p_cf[0] );
+
+         // Now add the action of the normal reaction to the relative contact velocity in the tangential directions so we can relax the frictional components separately.
+         p_tmp = ca.getNormal(cid)  * p_cf[0];
+         Vec3 gdot2 = gdot + ( pa.getInvInertia(bId1) * ( ca.getR1(cid) % p_tmp ) ) % ca.getR1(cid) + ( pa.getInvInertia(bId2)* ( ca.getR2(cid) % p_tmp ) ) % ca.getR2(cid);
+         Vec2 gdot_to;
+         gdot_to[0] = ca.getT(cid) * gdot2;
+         gdot_to[1] = ca.getO(cid) * gdot2;
+
+         //Vec2 ret = -( contactCache.diag_to_inv_[i] * gdot_to );
+         Vec2 ret = -( ca.getDiag_to_inv(cid) * gdot_to );
+         p_cf[1] = ret[0];
+         p_cf[2] = ret[1];
+
+         flimit = ca.getMu(cid) * p_cf[0];
+         fsq = p_cf[1] * p_cf[1] + p_cf[2] * p_cf[2];
+         if( fsq > flimit * flimit ) {
+            const real_t f( flimit / std::sqrt( fsq ) );
+            p_cf[1] *= f;
+            p_cf[2] *= f;
+         }
+      }
+      else {
+         // Contact is static.
+      }
+      Vec3 p_wf( contactframe * p_cf );
+
+      Vec3 dp( ca.getP(cid) - p_wf );
+      delta_max = std::max( delta_max, std::max( std::abs( dp[0] ), std::max( std::abs( dp[1] ), std::abs( dp[2] ) ) ) );
+
+      ca.getPRef(cid) = p_wf;
+
+      // Apply impulse right away.
+      pa.getDvRef(bId1) += pa.getInvMass(bId1) * ca.getP(cid);
+      pa.getDwRef(bId1) += pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+      pa.getDvRef(bId2) -= pa.getInvMass(bId2) * ca.getP(cid);
+      pa.getDwRef(bId2) -= pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+   }
+   return delta_max;
+}
+//*************************************************************************************************
+
+
+//*************************************************************************************************
+
+/*!\brief Relaxes all contacts once. The contact model is for inelastic unilateral contacts with Coulomb friction.
+ *
+ * \return The largest variation of contact impulses in the L-infinity norm.
+ *
+ * This function is to be called from resolveContacts(). Separating contacts are preferred over
+ * other solutions if valid. Static solutions are preferred over dynamic solutions. Dynamic
+ * solutions are computed by decoupling the normal from the frictional components. That is
+ * for a dynamic contact the normal component is relaxed first followed by the frictional
+ * components. How much the frictional components directly oppose slip as required by the Coulomb
+ * friction model depends on the number of subiterations performed. If no subiterations are
+ * performed the friction is guaranteed to be at least partially dissipative.
+ */
+template <typename CAccessor, typename PAccessor>
+inline real_t HCSITSRelaxationStep::relaxInelasticCoulombContactsByDecoupling(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa )
+{
+   real_t delta_max( 0 );
+
+   // Remove velocity corrections of this contact's reaction.
+   size_t bId1 = ca.getId1(cid);
+   size_t bId2 = ca.getId2(cid);
+   pa.getDvRef(bId1) -= pa.getInvMass(bId1) * ca.getP(cid);
+   pa.getDwRef(bId1) -= pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+   pa.getDvRef(bId2) += pa.getInvMass(bId2) * ca.getP(cid);
+   pa.getDwRef(bId2) += pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+
+
+   // Calculate the relative contact VELOCITY in the global world frame (if no contact reaction is present at contact i)
+   Vec3 gdot    ( ( pa.getLinearVelocity(bId1)     + pa.getDv(bId1) ) -
+                  ( pa.getLinearVelocity(bId2)     + pa.getDv(bId2) ) +
+                  ( pa.getAngularVelocity(bId1)    + pa.getDw(bId1) ) % ca.getR1(cid) -
+                  ( pa.getAngularVelocity(bId2)    + pa.getDw(bId2) ) % ca.getR2(cid) /* + diag_[i] * p */ );
+
+   // Change from the global world frame to the contact frame
+   Mat3 contactframe( ca.getNormal(cid), ca.getT(cid), ca.getO(cid) );
+   Vec3 gdot_nto( contactframe.getTranspose() * gdot );
+
+   // The constraint in normal direction is actually a positional constraint but instead of g_n we use g_n/dt equivalently and call it gdot_n
+   gdot_nto[0] += ( /* + trans( contactCache.n_[i] ) * ( contactCache.body1_[i]->getPosition() + contactCache.r1_[i] ) - ( contactCache.body2_[i]->getPosition() + contactCache.r2_[i] ) */ + ca.getDistance(cid) ) * dtinv;
+
+   //WALBERLA_LOG_WARNING( "Contact #" << i << " is\nA = \n" << contactCache.diag_nto_[i] << "\nb = \n" << gdot_nto << "\nmu = " << contactCache.mu_[i] );
+
+   if( gdot_nto[0] >= 0 ) {
+      // Contact is separating if no contact reaction is present at contact i.
+
+      delta_max = std::max( delta_max, std::max( std::abs( ca.getP(cid)[0] ), std::max( std::abs( ca.getP(cid)[1] ), std::abs( ca.getP(cid)[2] ) ) ) );
+      ca.getPRef(cid) = Vec3();
+      //WALBERLA_LOG_WARNING( "Contact #" << i << " is separating." );
+
+      // No need to apply zero impulse.
+   }
+   else {
+      // Contact is persisting (either static or dynamic).
+
+      // Calculate the impulse necessary for a static contact expressed as components in the contact frame.
+      Vec3 p_cf( -( ca.getDiag_nto_inv(cid) * gdot_nto ) );
+
+      // Can p_cf[0] be negative even though -gdot_nto[0] > 0? Yes! Try:
+      // A = [0.5 -0.1 +0.1; -0.1 0.5 -0.1; +0.1 -0.1 1];
+      // b = [0.01 -1 -1]';
+      // A\b    \approx [-0.19 -2.28 -1.21]'
+      // eig(A) \approx [ 0.40  0.56  1.04]'
+
+      real_t flimit( ca.getMu(cid) * p_cf[0] );
+      real_t fsq( p_cf[1] * p_cf[1] + p_cf[2] * p_cf[2] );
+      if( fsq > flimit * flimit || p_cf[0] < 0 ) {
+         // Contact cannot be static so it must be dynamic.
+         // => Complementarity condition on normal reaction now turns into an equation since we know that the normal reaction is definitely not zero.
+
+         for (int j = 0; j < 20; ++j) {
+            // For simplicity we change to a simpler relaxation scheme here:
+            // 1. Relax normal reaction with the tangential components equal to the previous values
+            // 2. Relax tangential components with the newly relaxed normal reaction
+            // Note: The better approach would be to solve the true 3x3 block problem!
+            // Warning: Simply projecting the frictional components is wrong since then the normal action is no longer 0 and simulations break.
+
+            Vec3 gdotCorrected;
+            real_t gdotCorrected_n;
+            Vec2 gdotCorrected_to;
+
+            // Calculate the relative contact velocity in the global world frame (if no normal contact reaction is present at contact i)
+            p_cf[0] = 0;
+            //                       |<- p_cf is orthogonal to the normal and drops out in next line ->|
+            gdotCorrected   = /* ( contactCache.body1_[i]->getInvMass() + contactCache.body2_[i]->getInvMass() ) * p_cf  */ gdot + ( pa.getInvInertia(bId1) * ( ca.getR1(cid) % ( ca.getT(cid) * p_cf[1] + ca.getO(cid) * p_cf[2] ) ) ) % ca.getR1(cid) + ( pa.getInvInertia(bId2) * ( ca.getR2(cid) % ( ca.getT(cid) * p_cf[1] + ca.getO(cid) * p_cf[2] ) ) ) % ca.getR2(cid);
+            gdotCorrected_n = ca.getNormal(cid) * gdotCorrected + ca.getDistance(cid) * dtinv;
+
+            // Relax normal component.
+            p_cf[0] = std::max( real_c( 0 ), -( ca.getDiag_n_inv(cid) * gdotCorrected_n ) );
+
+            // Calculate the relative contact velocity in the global world frame (if no frictional contact reaction is present at contact i)
+            p_cf[1] = p_cf[2] = real_c( 0 );
+            //                       |<- p_cf is orthogonal to the tangential plane and drops out   ->|
+            gdotCorrected   = /* ( contactCache.body1_[i]->getInvMass() + contactCache.body2_[i]->getInvMass() ) * p_cf */ gdot + ( pa.getInvInertia(bId1) * ( ca.getR1(cid) % ( ca.getNormal(cid) * p_cf[0] ) ) ) % ca.getR1(cid) + ( pa.getInvInertia(bId2) * ( ca.getR2(cid) % ( ca.getNormal(cid) * p_cf[0] ) ) ) % ca.getR2(cid);
+            gdotCorrected_to[0] = ca.getT(cid) * gdotCorrected;
+            gdotCorrected_to[1] = ca.getO(cid) * gdotCorrected;
+
+            // Relax frictional components.
+            Vec2 ret = -( ca.getDiag_to_inv(cid) * gdotCorrected_to );
+            p_cf[1] = ret[0];
+            p_cf[2] = ret[1];
+
+            flimit = ca.getMu(cid) * p_cf[0];
+            fsq = p_cf[1] * p_cf[1] + p_cf[2] * p_cf[2];
+            if( fsq > flimit * flimit ) {
+               // 3.2.1 Decoupling
+               // \tilde{x}^0 = p_cf[1..2]
+
+               // Determine \tilde{A}
+               Mat2 diag_to( ca.getDiag_nto(cid)(1, 1), ca.getDiag_nto(cid)(1, 2), ca.getDiag_nto(cid)(2, 1), ca.getDiag_nto(cid)(2, 2) );
+
+               const real_t f( flimit / std::sqrt( fsq ) );
+               //p_cf[1] *= f;
+               //p_cf[2] *= f;
+
+               // Determine search interval for Golden Section Search
+               const real_t phi( real_c(0.5) * ( real_c(1) + std::sqrt( real_c( 5 ) ) ) );
+               real_t shift( std::atan2( -p_cf[2], p_cf[1] ) );
+               real_t acos_f( std::acos( f ) );
+
+               //WALBERLA_LOG_WARNING( acos_f << " " << shift );
+
+               real_t alpha_left( -acos_f - shift );
+               //Vec2 x_left( flimit * std::cos( alpha_left ), flimit * std::sin( alpha_left ) );
+               //real_t f_left( 0.5 * trans( x_left ) * ( diag_to * x_left ) - trans( x_left ) * ( -gdot_to ) );
+
+               real_t alpha_right( acos_f - shift );
+               //Vec2 x_right( flimit * std::cos( alpha_right ), flimit * std::sin( alpha_right ) );
+               //real_t f_right( 0.5 * trans( x_right ) * ( diag_to * x_right ) - trans( x_right ) * ( -gdot_to ) );
+
+               real_t alpha_mid( ( alpha_right + alpha_left * phi ) / ( 1 + phi ) );
+               Vec2 x_mid( flimit * std::cos( alpha_mid ), flimit * std::sin( alpha_mid ) );
+               real_t f_mid( real_c(0.5) * x_mid * ( diag_to * x_mid ) - x_mid * ( -gdotCorrected_to ) );
+
+               bool leftlarger = false;
+               for( size_t k = 0; k < getMaxSubIterations(); ++k ) {
+                  real_t alpha_next( alpha_left + ( alpha_right - alpha_mid ) );
+                  Vec2 x_next( flimit * std::cos( alpha_next ), flimit * std::sin( alpha_next ) );
+                  real_t f_next( real_c(0.5) * x_next * ( diag_to * x_next ) - x_next * ( -gdotCorrected_to ) );
+                  //WALBERLA_LOG_WARNING( "[(" << alpha_left << ", ?); (" << alpha_mid << ", " << f_mid << "); (" << alpha_right << ", ?)] <- (" << alpha_next << ", " << f_next << ")" );
+                  //WALBERLA_LOG_WARNING( "left: " << alpha_mid - alpha_left << "  right: " << alpha_right - alpha_mid << "  ll: " << leftlarger );
+                  //WALBERLA_ASSERT(leftlarger ? (alpha_mid - alpha_left > alpha_right - alpha_mid) : (alpha_mid - alpha_left < alpha_right - alpha_mid), "ll inconsistent!" );
+
+                  if (leftlarger) {
+                     // left interval larger
+                     if( f_next < f_mid ) {
+                        alpha_right = alpha_mid;
+                        alpha_mid   = alpha_next;
+                        x_mid       = x_next;
+                        f_mid       = f_next;
+                        leftlarger = true;
+                     }
+                     else {
+                        alpha_left  = alpha_next;
+                        leftlarger = false;
+                     }
+                  }
+                  else {
+                     // right interval larger
+                     if( f_next < f_mid ) {
+                        alpha_left = alpha_mid;
+                        alpha_mid  = alpha_next;
+                        x_mid      = x_next;
+                        f_mid      = f_next;
+                        leftlarger = false;
+                     }
+                     else {
+                        alpha_right = alpha_next;
+                        leftlarger = true;
+                     }
+                  }
+               }
+               //WALBERLA_LOG_WARNING( "dalpha = " << alpha_right - alpha_left );
+
+               p_cf[1] = x_mid[0];
+               p_cf[2] = x_mid[1];
+            }
+         }
+         //WALBERLA_LOG_WARNING( "Contact #" << i << " is dynamic." );
+      }
+      else {
+         // Contact is static.
+         //WALBERLA_LOG_WARNING( "Contact #" << i << " is static." );
+      }
+
+      //WALBERLA_LOG_WARNING( "Contact reaction in contact frame: " << p_cf << "\n" << ca.getDiag_nto(cid)*p_cf + gdot_nto );
+      Vec3 p_wf( contactframe * p_cf );
+      Vec3 dp( ca.getP(cid) - p_wf );
+      delta_max = std::max( delta_max, std::max( std::abs( dp[0] ), std::max( std::abs( dp[1] ), std::abs( dp[2] ) ) ) );
+
+      ca.getPRef(cid) = p_wf;
+
+      // Apply impulse right away.
+      pa.getDvRef(bId1) += pa.getInvMass(bId1) * ca.getP(cid);
+      pa.getDwRef(bId1) += pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+      pa.getDvRef(bId2) -= pa.getInvMass(bId2) * ca.getP(cid);
+      pa.getDwRef(bId2) -= pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+   }
+
+   return delta_max;
+}
+//*************************************************************************************************
+
+
+//*************************************************************************************************
+/*!\brief Relaxes all contacts once. The contact model is for inelastic unilateral contacts with Coulomb friction.
+ *
+ * \param dtinv The inverse of the current time step.
+ * \param approximate Use the approximate model showing bouncing.
+ * \return The largest variation of contact impulses in the L-infinity norm.
+ *
+ * This function is to be called from resolveContacts(). The iterative method to solve the contact
+ * problem is e.g. described in the article "A matrix-free cone complementarity approach for
+ * solving large-scale, nonsmooth, rigid body dynamics" by A. Tasora and M. Anitescu in Computer
+ * Methods in Applied Mechanics and Engineering (Volume 200, Issues 5–8, 15 January 2011,
+ * Pages 439-453). The contact model is purely quadratic and convergence should be good but depends
+ * on a parameter. The one-contact problem has a unique solution. The frictional reactions
+ * for a dynamic contact converge to those that directly oppose slip. However, the contact is
+ * not perfectly inelastic for dynamic contacts but bounces. These vertical motions tend to
+ * go to zero for smaller time steps and can be interpreted as exaggerated vertical motions
+ * coming from micro asperities (see "Optimization-based simulation of nonsmooth rigid multibody
+ * dynamics" by M. Anitescu in Mathematical Programming (Volume 105, Issue 1, January 2006, Pages
+ * 113-143). These motions can be prevented by a small change in the iteration proposed in "The
+ * bipotential method: a constructive approach to design the complete contact law with friction and
+ * improved numerical algorithms" by G. De Saxce and Z-Q. Feng in Mathematical and Computer
+ * Modelling (Volume 28, Issue 4, 1998, Pages 225-245). Which iteration is used is controlled with
+ * the approximate parameter.
+ */
+template <typename CAccessor, typename PAccessor>
+inline real_t HCSITSRelaxationStep::relaxInelasticCoulombContactsByOrthogonalProjections(size_t cid, real_t dtinv, bool approximate, CAccessor &ca, PAccessor& pa )
+{
+   real_t delta_max( 0 );
+
+   // Remove velocity corrections of this contact's reaction.
+   size_t bId1 = ca.getId1(cid);
+   size_t bId2 = ca.getId2(cid);
+   pa.getDvRef(bId1) -= pa.getInvMass(bId1) * ca.getP(cid);
+   pa.getDwRef(bId1) -= pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+   pa.getDvRef(bId2) += pa.getInvMass(bId2) * ca.getP(cid);
+   pa.getDwRef(bId2) += pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+
+   // Calculate the relative contact VELOCITY in the global world frame (if no contact reaction is present at contact i)
+   Vec3 gdot    ( ( pa.getLinearVelocity(bId1)     + pa.getDv(bId1) ) -
+                  ( pa.getLinearVelocity(bId2)     + pa.getDv(bId2) ) +
+                  ( pa.getAngularVelocity(bId1)    + pa.getDw(bId1) ) % ca.getR1(cid) -
+                  ( pa.getAngularVelocity(bId2)    + pa.getDw(bId2) ) % ca.getR2(cid) /* + diag_[i] * p */ );
+
+   // Change from the global world frame to the contact frame
+   Mat3 contactframe( ca.getNormal(cid), ca.getT(cid), ca.getO(cid) );
+   Vec3 gdot_nto( contactframe.getTranspose() * gdot );
+
+   // The constraint in normal direction is actually a positional constraint but instead of g_n we use g_n/dt equivalently and call it gdot_n
+   gdot_nto[0] += ( /* + trans( contactCache.n_[i] ) * ( contactCache.body1_[i]->getPosition() + contactCache.r1_[i] ) - ( contactCache.body2_[i]->getPosition() + contactCache.r2_[i] ) */ + ca.getDistance(cid) ) * dtinv;
+
+
+   const real_t w( 1 ); // w > 0
+   Vec3 p_cf( contactframe.getTranspose() * ca.getP(cid) );
+   if( approximate ) {
+      // Calculate next iterate (Anitescu/Tasora).
+      p_cf = p_cf - w * ( ca.getDiag_nto(cid) * p_cf + gdot_nto );
+   }
+   else {
+      // Calculate next iterate (De Saxce/Feng).
+      Vec3 tmp( ca.getDiag_nto(cid) * p_cf + gdot_nto );
+      tmp[0] += std::sqrt( math::sq( tmp[1] ) + math::sq( tmp[2] ) ) * ca.getMu(cid);
+      p_cf = p_cf - w * tmp;
+   }
+
+   // Project.
+   real_t flimit( ca.getMu(cid) * p_cf[0] );
+   real_t fsq( p_cf[1] * p_cf[1] + p_cf[2] * p_cf[2] );
+   if( p_cf[0] > 0 && fsq < flimit * flimit ) {
+      // Unconstrained minimum is in cone leading to a static contact and no projection
+      // is necessary.
+   }
+   else if( p_cf[0] < 0 && fsq < math::sq( p_cf[0] )/ math::sq( ca.getMu(cid) ) ) {
+      // Unconstrained minimum is in dual cone leading to a separating contact where no contact
+      // reaction is present (the unconstrained minimum is projected to the tip of the cone).
+      p_cf = Vec3();
+   }
+   else {
+      // The contact is dynamic.
+      real_t f( std::sqrt( fsq ) );
+      p_cf[0] = ( f * ca.getMu(cid) + p_cf[0] ) / ( math::sq( ca.getMu(cid) ) + 1 );
+      real_t factor( ca.getMu(cid) * p_cf[0] / f );
+      p_cf[1] *= factor;
+      p_cf[2] *= factor;
+   }
+
+   Vec3 p_wf( contactframe * p_cf );
+   Vec3 dp( ca.getP(cid) - p_wf );
+   delta_max = std::max( delta_max, std::max( std::abs( dp[0] ), std::max( std::abs( dp[1] ), std::abs( dp[2] ) ) ) );
+
+   ca.getPRef(cid) = p_wf;
+
+   // Apply impulse right away.
+   pa.getDvRef(bId1) += pa.getInvMass(bId1) * ca.getP(cid);
+   pa.getDwRef(bId1) += pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+   pa.getDvRef(bId2) -= pa.getInvMass(bId2) * ca.getP(cid);
+   pa.getDwRef(bId2) -= pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+
+   return delta_max;
+}
+//*************************************************************************************************
+
+
+//*************************************************************************************************
+
+/*!\brief Relaxes all contacts once. The contact model is for inelastic unilateral contacts with the generalized maximum dissipation principle for friction.
+ *
+ * \return The largest variation of contact impulses in the L-infinity norm.
+ *
+ * This function is to be called from resolveContacts(). Dynamic solutions are computed by
+ * minimizing the kinetic energy along the intersection of the plane of maximum compression and
+ * the friction cone.
+ */
+template <typename CAccessor, typename PAccessor>
+inline real_t HCSITSRelaxationStep::relaxInelasticGeneralizedMaximumDissipationContacts(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa )
+{
+   real_t delta_max( 0 );
+
+   // Remove velocity corrections of this contact's reaction.
+   size_t bId1 = ca.getId1(cid);
+   size_t bId2 = ca.getId2(cid);
+   pa.getDvRef(bId1) -= pa.getInvMass(bId1) * ca.getP(cid);
+   pa.getDwRef(bId1) -= pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+   pa.getDvRef(bId2) += pa.getInvMass(bId2) * ca.getP(cid);
+   pa.getDwRef(bId2) += pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+
+   // Calculate the relative contact VELOCITY in the global world frame (if no contact reaction is present at contact i)
+   Vec3 gdot    ( ( pa.getLinearVelocity(bId1)     + pa.getDv(bId1) ) -
+                  ( pa.getLinearVelocity(bId2)     + pa.getDv(bId2) ) +
+                  ( pa.getAngularVelocity(bId1)    + pa.getDw(bId1) ) % ca.getR1(cid) -
+                  ( pa.getAngularVelocity(bId2)    + pa.getDw(bId2) ) % ca.getR2(cid) /* + diag_[i] * p */ );
+
+   // Change from the global world frame to the contact frame
+   Mat3 contactframe( ca.getNormal(cid), ca.getT(cid), ca.getO(cid) );
+   Vec3 gdot_nto( contactframe.getTranspose() * gdot );
+
+   // The constraint in normal direction is actually a positional constraint but instead of g_n we use g_n/dt equivalently and call it gdot_n
+   gdot_nto[0] += ( /* + trans( contactCache.n_[i] ) * ( contactCache.body1_[i]->getPosition() + contactCache.r1_[i] ) - ( contactCache.body2_[i]->getPosition() + contactCache.r2_[i] ) */ + ca.getDistance(cid) ) * dtinv;
+
+
+   if( gdot_nto[0] >= 0 ) {
+      // Contact is separating if no contact reaction is present at contact i.
+      delta_max = std::max( delta_max, std::max( std::abs( ca.getP(cid)[0] ), std::max( std::abs( ca.getP(cid)[1] ), std::abs( ca.getP(cid)[2] ) ) ) );
+      ca.getPRef(cid) = Vec3();
+
+      // No need to apply zero impulse.
+   }
+   else {
+      // Contact is persisting (either static or dynamic).
+
+      // Calculate the impulse necessary for a static contact expressed as components in the contact frame.
+      Vec3 p_cf( -( ca.getDiag_nto_inv(cid) * gdot_nto ) );
+
+      // Can p_cf[0] be negative even though -gdot_nto[0] > 0? Yes! Try:
+      // A = [0.5 -0.1 +0.1; -0.1 0.5 -0.1; +0.1 -0.1 1];
+      // b = [0.01 -1 -1]';
+      // A\b    \approx [-0.19 -2.28 -1.21]'
+      // eig(A) \approx [ 0.40  0.56  1.04]'
+
+      real_t flimit( ca.getMu(cid) * p_cf[0] );
+      real_t fsq( p_cf[1] * p_cf[1] + p_cf[2] * p_cf[2] );
+      if( fsq > flimit * flimit || p_cf[0] < 0 ) {
+         // Contact cannot be static so it must be dynamic.
+         // => Complementarity condition on normal reaction now turns into an equation since we know that the normal reaction is definitely not zero.
+
+         // \breve{x}^0 = p_cf[1..2]
+
+         // Eliminate normal component from 3x3 system: ca.getDiag_nto(cid)*p_cf + gdot_nto => \breve{A} \breve{x} - \breve{b}
+         const real_t invA_nn( math::inv( ca.getDiag_nto(cid)(0, 0) ) );
+         const real_t offdiag( ca.getDiag_nto(cid)(1, 2) - invA_nn * ca.getDiag_nto(cid)(0, 1) * ca.getDiag_nto(cid)(0, 2) );
+         Mat2 A_breve( ca.getDiag_nto(cid)(1, 1) - invA_nn *math::sq( ca.getDiag_nto(cid)(0, 1) ), offdiag, offdiag, ca.getDiag_nto(cid)(2, 2) - invA_nn *math::sq( ca.getDiag_nto(cid)(0, 2) ) );
+         Vec2 b_breve( -gdot_nto[1] + invA_nn * ca.getDiag_nto(cid)(0, 1) * gdot_nto[0], -gdot_nto[2] + invA_nn * ca.getDiag_nto(cid)(0, 2) * gdot_nto[0] );
+
+         const real_t shiftI( std::atan2( -ca.getDiag_nto(cid)(0, 2), ca.getDiag_nto(cid)(0, 1) ) );
+         const real_t shiftJ( std::atan2( -p_cf[2], p_cf[1] ) );
+         const real_t a3( std::sqrt(math::sq( ca.getDiag_nto(cid)(0, 1) ) +math::sq( ca.getDiag_nto(cid)(0, 2) ) ) );
+         const real_t fractionI( -ca.getDiag_nto(cid)(0, 0) / ( ca.getMu(cid) * a3 ) );
+         const real_t fractionJ( std::min( invA_nn * ca.getMu(cid) * ( ( -gdot_nto[0] ) / std::sqrt( fsq ) - a3 * std::cos( shiftI - shiftJ ) ), real_c( 1 ) ) );
+
+         // Search interval determination.
+         real_t alpha_left, alpha_right;
+         if( fractionJ < -1 ) {
+            // J is complete
+            const real_t angleI( std::acos( fractionI ) );
+            alpha_left = -angleI - shiftI;
+            alpha_right = +angleI - shiftI;
+            if( alpha_left < 0 ) {
+               alpha_left += 2 * math::pi;
+               alpha_right += 2 * math::pi;
+            }
+         }
+         else if( ca.getDiag_nto(cid)(0, 0) > ca.getMu(cid) * a3 ) {
+            // I is complete
+            const real_t angleJ( std::acos( fractionJ ) );
+            alpha_left = -angleJ - shiftJ;
+            alpha_right = +angleJ - shiftJ;
+            if( alpha_left < 0 ) {
+               alpha_left += 2 * math::pi;
+               alpha_right += 2 * math::pi;
+            }
+         }
+         else {
+            // neither I nor J is complete
+            const real_t angleJ( std::acos( fractionJ ) );
+            real_t alpha1_left( -angleJ - shiftJ );
+            real_t alpha1_right( +angleJ - shiftJ );
+            if( alpha1_left < 0 ) {
+               alpha1_left += 2 * math::pi;
+               alpha1_right += 2 * math::pi;
+            }
+            const real_t angleI( std::acos( fractionI ) );
+            real_t alpha2_left( -angleI - shiftI );
+            real_t alpha2_right( +angleI - shiftI );
+            if( alpha2_left < 0 ) {
+               alpha2_left += 2 * math::pi;
+               alpha2_right += 2 * math::pi;
+            }
+
+            // Swap intervals if second interval does not start right of the first interval.
+            if( alpha1_left > alpha2_left ) {
+               std::swap( alpha1_left, alpha2_left );
+               std::swap( alpha1_right, alpha2_right );
+            }
+
+            if( alpha2_left > alpha1_right ) {
+               alpha2_right -= 2*math::pi;
+               if( alpha2_right > alpha1_right ) {
+                  // [alpha1_left; alpha1_right] \subset [alpha2_left; alpha2_right]
+               }
+               else {
+                  // [alpha2_left; alpha2_right] intersects the left end of [alpha1_left; alpha1_right]
+                  alpha1_right = alpha2_right;
+               }
+            }
+            else {
+               alpha1_left = alpha2_left;
+               if( alpha2_right > alpha1_right ) {
+                  // [alpha2_left; alpha2_right] intersects the right end of [alpha1_left; alpha1_right]
+               }
+               else {
+                  // [alpha2_left; alpha2_right] \subset [alpha1_left; alpha1_right]
+                  alpha1_right = alpha2_right;
+               }
+            }
+
+            alpha_left = alpha1_left;
+            alpha_right = alpha1_right;
+         }
+
+         const real_t phi( real_c(0.5) * ( real_c(1) + std::sqrt( real_c( 5 ) ) ) );
+         real_t alpha_mid( ( alpha_right + alpha_left * phi ) / ( 1 + phi ) );
+         Vec2 x_mid;
+         real_t f_mid;
+
+         {
+            real_t r_ub = ca.getMu(cid) * ( -gdot_nto[0] ) / ( ca.getDiag_nto(cid)(0, 0) + ca.getMu(cid) * a3 * std::cos( alpha_mid + shiftI ) );
+            if( r_ub < 0 )
+               r_ub = math::Limits<real_t>::inf();
+            x_mid = Vec2( r_ub * std::cos( alpha_mid ), r_ub * std::sin( alpha_mid ) );
+            f_mid = real_c(0.5) * x_mid * ( A_breve * x_mid ) - x_mid * b_breve;
+         }
+
+         bool leftlarger = false;
+         for( size_t k = 0; k < maxSubIterations_; ++k ) {
+            real_t alpha_next( alpha_left + ( alpha_right - alpha_mid ) );
+            real_t r_ub = ca.getMu(cid) * ( -gdot_nto[0] ) / ( ca.getDiag_nto(cid)(0, 0) + ca.getMu(cid) * a3 * std::cos( alpha_next + shiftI ) );
+            if( r_ub < 0 )
+               r_ub = math::Limits<real_t>::inf();
+            Vec2 x_next( r_ub * std::cos( alpha_next ), r_ub * std::sin( alpha_next ) );
+            real_t f_next( real_c(0.5) * x_next * ( A_breve * x_next ) - x_next * b_breve );
+
+            //WALBERLA_LOG_WARNING( "[(" << alpha_left << ", ?); (" << alpha_mid << ", " << f_mid << "); (" << alpha_right << ", ?)] <- (" << alpha_next << ", " << f_next << ")" );
+            //WALBERLA_LOG_WARNING( "left: " << alpha_mid - alpha_left << "  right: " << alpha_right - alpha_mid << "  ll: " << leftlarger );
+            //WALBERLA_ASSERT(leftlarger ? (alpha_mid - alpha_left > alpha_right - alpha_mid) : (alpha_mid - alpha_left < alpha_right - alpha_mid), "ll inconsistent!" );
+
+            if (leftlarger) {
+               // left interval larger
+               if( f_next < f_mid ) {
+                  alpha_right = alpha_mid;
+                  alpha_mid   = alpha_next;
+                  x_mid       = x_next;
+                  f_mid       = f_next;
+                  leftlarger = true;
+               }
+               else {
+                  alpha_left  = alpha_next;
+                  leftlarger = false;
+               }
+            }
+            else {
+               // right interval larger
+               if( f_next < f_mid ) {
+                  alpha_left = alpha_mid;
+                  alpha_mid  = alpha_next;
+                  x_mid      = x_next;
+                  f_mid      = f_next;
+                  leftlarger = false;
+               }
+               else {
+                  alpha_right = alpha_next;
+                  leftlarger = true;
+               }
+            }
+         }
+         //WALBERLA_LOG_DETAIL( "dalpha = " << alpha_right - alpha_left << "\n");
+         {
+            real_t alpha_init( std::atan2( p_cf[2], p_cf[1] ) );
+            real_t r_ub = ca.getMu(cid) * ( -gdot_nto[0] ) / ( ca.getDiag_nto(cid)(0, 0) + ca.getMu(cid) * a3 * std::cos( alpha_init + shiftI ) );
+            if( r_ub < 0 )
+               r_ub = math::Limits<real_t>::inf();
+            Vec2 x_init( r_ub * std::cos( alpha_init ), r_ub * std::sin( alpha_init ) );
+            real_t f_init( real_c(0.5) * x_init * ( A_breve * x_init ) - x_init * b_breve );
+
+            if( f_init < f_mid )
+            {
+               x_mid = x_init;
+               WALBERLA_LOG_DETAIL( "Replacing solution by primitive dissipative solution (" << f_init << " < " << f_mid << " at " << alpha_init << " vs. " << alpha_mid << ").\n");
+            }
+         }
+
+         p_cf[0] = invA_nn * ( -gdot_nto[0] - ca.getDiag_nto(cid)(0, 1) * x_mid[0] - ca.getDiag_nto(cid)(0, 2) * x_mid[1] );
+         p_cf[1] = x_mid[0];
+         p_cf[2] = x_mid[1];
+         //WALBERLA_LOG_DETAIL( "Contact #" << i << " is dynamic." );
+      }
+      else {
+         // Contact is static.
+         //WALBERLA_LOG_DETAIL( "Contact #" << i << " is static." );
+      }
+      Vec3 p_wf( contactframe * p_cf );
+      Vec3 dp( ca.getP(cid) - p_wf );
+      delta_max = std::max( delta_max, std::max( std::abs( dp[0] ), std::max( std::abs( dp[1] ), std::abs( dp[2] ) ) ) );
+      //WALBERLA_LOG_DETAIL( "Contact reaction in contact frame: " << p_cf << "\nContact action in contact frame: " << ca.getDiag_nto(cid)*p_cf + gdot_nto );
+
+      ca.getPRef(cid) = p_wf;
+
+      // Apply impulse right away.
+      pa.getDvRef(bId1) += pa.getInvMass(bId1) * ca.getP(cid);
+      pa.getDwRef(bId1) += pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+      pa.getDvRef(bId2) -= pa.getInvMass(bId2) * ca.getP(cid);
+      pa.getDwRef(bId2) -= pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+   }
+   return delta_max;
+}
+//*************************************************************************************************
+
+//*************************************************************************************************
+//*************************************************************************************************
+/*!\brief Relaxes all contacts once. The contact model is from the paper by Tschisgale et al.
+ * "A constraint-based collision model for Cosserat rods" and works with a coefficient of
+ * restitution cor with 0 < cor <= 1.
+ *
+ * \return The largest variation of contact impulses in the L-infinity norm.
+ *
+ */
+template <typename CAccessor, typename PAccessor>
+inline real_t HCSITSRelaxationStep::relaxInelasticContactsByProjectedGaussSeidel(size_t cid, real_t /*dtinv*/, CAccessor &ca, PAccessor& pa )
+{
+   real_t delta_max( 0 );
+   size_t bId1 = ca.getId1(cid);
+   size_t bId2 = ca.getId2(cid);
+   // Calculate the relative contact VELOCITY in the global world frame (if no contact reaction is present at contact i)
+   // This is the negative contact velocity as for the other methods
+   Vec3 gdot    ( ( pa.getLinearVelocity(bId2)     + pa.getDv(bId2) ) -
+                  ( pa.getLinearVelocity(bId1)     + pa.getDv(bId1) ) +
+                  ( pa.getAngularVelocity(bId2)    + pa.getDw(bId2) ) % ca.getR2(cid) -
+                  ( pa.getAngularVelocity(bId1)    + pa.getDw(bId1) ) % ca.getR1(cid) /* + diag_[i] * p */ );
+
+   // Use restitution hypothesis
+   gdot = gdot + getCor()*(gdot*ca.getNormal(cid))*ca.getNormal(cid);
+
+   Mat3 contactframe( ca.getNormal(cid), ca.getT(cid), ca.getO(cid) );
+
+   //
+   // Turn system matrix back to the world frame.
+   Mat3 K = contactframe * ca.getDiag_nto(cid) *  contactframe.getTranspose();
+   Mat3 Kinv = contactframe * ca.getDiag_nto_inv(cid) * contactframe.getTranspose();
+
+   // Compute impulse necessary in the world frame
+   Vec3 p_wf( - Kinv * gdot );
+
+
+
+   if( gdot * ca.getNormal(cid) <= 0 ) {
+      // Contact is separating if no contact reaction is present at contact i.
+      delta_max = std::max( delta_max, std::max( std::abs( ca.getP(cid)[0] ), std::max( std::abs( ca.getP(cid)[1] ), std::abs( ca.getP(cid)[2] ) ) ) );
+      ca.getPRef(cid) = Vec3();
+      // No need to apply zero impulse.
+   }
+   else {
+      // Dissect the impuls in a tangetial and normal directions
+      // Use the inverted contact frame with -n as in the publication
+      Mat3 reversedContactFrame( -ca.getNormal(cid), ca.getT(cid), ca.getO(cid) );
+      Vec3 p_rcf( reversedContactFrame.getTranspose() * p_wf );
+
+      // Check the frictional limit
+      real_t flimit( ca.getMu(cid) * p_rcf[0] );
+
+      // Square of tangential components of the impulse
+      real_t fsq( p_rcf[1] * p_rcf[1] + p_rcf[2] * p_rcf[2] );
+
+      if( fsq > flimit * flimit || p_rcf[0] < 0 ) {
+         // Contact cannot be static so it must be dynamic.
+         // Calculate corrected contact reaction by the method of Tschigale et al.
+         // Normal component is close to 0 or negative, and we cannot asses a tangential direction
+         // Treat this case with the 0 impulse
+         if (fsq < real_t(1e-8)) {
+            delta_max = std::max(delta_max, std::max(std::abs(ca.getP(cid)[0]),
+                                                     std::max(std::abs(ca.getP(cid)[1]), std::abs(ca.getP(cid)[2]))));
+            ca.getPRef(cid) = Vec3();
+         } else {
+            // tangential direction of sliding
+            Vec3 tan_dir = ((p_rcf[1] * ca.getT(cid)) + (p_rcf[2] * ca.getO(cid))).getNormalized();
+            // scalar magnitude of pv.
+            real_t abspv = ((K * p_wf) * ca.getNormal(cid)) /
+                           ((K * (-ca.getNormal(cid) + ca.getMu(cid) * tan_dir)) * ca.getNormal(cid));
+            p_wf = abspv * (-ca.getNormal(cid) + ca.getMu(cid) * tan_dir);
+
+         }
+      }
+      // Calculate variation
+      Vec3 dp(ca.getP(cid) - p_wf);
+      delta_max = std::max(delta_max, std::max(std::abs(dp[0]), std::max(std::abs(dp[1]), std::abs(dp[2]))));
+      ca.getPRef(cid) = p_wf;
+
+      // Apply impulse right away.
+      pa.getDvRef(bId1) -= pa.getInvMass(bId1) * ca.getP(cid);
+      pa.getDwRef(bId1) -= pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+      pa.getDvRef(bId2) += pa.getInvMass(bId2) * ca.getP(cid);
+      pa.getDwRef(bId2) += pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+   }
+   return delta_max;
+}
+//*************************************************************************************************
+
+
+} //namespace kernel
+} //namespace mesa_pd
+} //namespace walberla
diff --git a/python/mesa_pd/templates/kernel/InitContactsForHCSITS.templ.h b/python/mesa_pd/templates/kernel/InitContactsForHCSITS.templ.h
new file mode 100644
index 0000000000000000000000000000000000000000..eb48c9a5199d5b54d50f2a52d14ac68f63c74a34
--- /dev/null
+++ b/python/mesa_pd/templates/kernel/InitContactsForHCSITS.templ.h
@@ -0,0 +1,180 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file InitContactsForHCSITS.h
+//! \author Tobias Leemann <tobias.leemann@fau.de>
+//
+//======================================================================================================================
+
+//======================================================================================================================
+//
+//  THIS FILE IS GENERATED - PLEASE CHANGE THE TEMPLATE !!!
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <mesa_pd/common/ParticleFunctions.h>
+#include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/data/IAccessor.h>
+#include <mesa_pd/data/IContactAccessor.h>
+#include <mesa_pd/data/Flags.h>
+#include <core/logging/Logging.h>
+
+#include <vector>
+
+namespace walberla {
+namespace mesa_pd {
+namespace kernel {
+
+/**
+* Init the datastructures for a contact for later use of the HCSITS-Solver.
+* Call this kernel on all contacts that you want to treat with HCSITS before performing any relaxation timesteps.
+* Use setErp() to set the error reduction parameter, which defines the share of the overlap that is resolved in one step
+* (0 meaning after the relaxation the overlap is the same, 1 meaning the particles will have no overlap after relaxation).
+* Use setFriction(a,b, cof) to define the coefficient of friction cof between materials a and b. It is assumed to be
+ * symmetric w.r.t. the materials.
+* \ingroup mesa_pd_kernel
+*/
+class InitContactsForHCSITS{
+public:
+
+   // Default constructor sets the default values
+   explicit InitContactsForHCSITS(const uint_t numParticleTypes) :
+   numParticleTypes_ (numParticleTypes),
+   {%- for prop in properties %}
+   {{prop.name}}_( {{prop.defValue}} ){{ "," if not loop.last}}
+   {%- endfor %}
+   {
+
+      {% for param in material_parameters %}
+      {{param}}_.resize(numParticleTypes * numParticleTypes, real_t(0));
+      {%- endfor %}
+   }
+
+   // Getter and Setter Functions
+   {%- for prop in properties %}
+   inline const {{prop.type}}& get{{prop.name | capFirst}}() const {return {{prop.name}}_;}
+   inline void set{{prop.name | capFirst}}({{prop.type}} v) { {{prop.name}}_ = v;}
+   {% endfor %}
+
+   // Getter and Setter Functions for material parameter combinations (they are assumed symmetric).
+   {% for param in material_parameters %}
+   inline void set{{param | capFirst}}(const size_t type1, const size_t type2, const real_t& val)
+   {
+      WALBERLA_ASSERT_LESS( type1, numParticleTypes_ );
+      WALBERLA_ASSERT_LESS( type2, numParticleTypes_ );
+      {{param}}_[numParticleTypes_*type1 + type2] = val;
+      {{param}}_[numParticleTypes_*type2 + type1] = val;
+   }
+   {%- endfor %}
+
+   {% for param in material_parameters %}
+   inline real_t get{{param | capFirst}}(const size_t type1, const size_t type2) const
+   {
+      WALBERLA_ASSERT_LESS( type1, numParticleTypes_ );
+      WALBERLA_ASSERT_LESS( type2, numParticleTypes_ );
+      WALBERLA_ASSERT_FLOAT_EQUAL( {{param}}_[numParticleTypes_*type1 + type2],
+              {{param}}_[numParticleTypes_*type2 + type1],
+              "parameter matrix for {{param}} not symmetric!");
+      return {{param}}_[numParticleTypes_*type1 + type2];
+   }
+   {%- endfor %}
+
+   // List material parameters
+   {% for param in material_parameters %}
+   std::vector<real_t> {{param}}_ {};
+   {%- endfor %}
+
+   template<typename CAccessor, typename PAccessor>
+   void operator()(size_t c, CAccessor &ca, PAccessor &pa);
+
+private:
+   // List properties
+   const uint_t numParticleTypes_;
+
+   {% for prop in properties %}
+   {{prop.type}} {{prop.name }}_;
+   {%- endfor %}
+
+   {% for param in parameters %}
+   std::vector<real_t> {{param}}_ {};
+   {%- endfor %}
+
+};
+
+
+template<typename CAccessor, typename PAccessor>
+inline void InitContactsForHCSITS::operator()(size_t c, CAccessor &ca, PAccessor &pa) {
+
+      static_assert(std::is_base_of<data::IContactAccessor, CAccessor>::value, "please provide a valid contact accessor");
+      static_assert(std::is_base_of<data::IAccessor, PAccessor>::value, "please provide a valid particle accessor");
+
+   size_t bId1 = ca.getId1(c);
+   size_t bId2 = ca.getId2(c);
+   ca.setR1(c, ca.getPosition(c) - pa.getPosition(bId1));
+   ca.setR2(c, ca.getPosition(c) - pa.getPosition(bId2));
+
+
+   // construct vector perpendicular to the normal (cross product with cardinal basis vector where the 1 component is where the other vector has its component of smallest magnitude)
+   if (std::fabs(ca.getNormalRef(c)[0]) < std::fabs(ca.getNormalRef(c)[1])) {
+      if (std::fabs(ca.getNormalRef(c)[0]) < std::fabs(ca.getNormalRef(c)[2]))
+         ca.setT(c, Vec3(0, ca.getNormalRef(c)[2], -ca.getNormalRef(c)[1]));   // = n x (1, 0, 0)^T
+      else
+         ca.setT(c, Vec3(ca.getNormalRef(c)[1], -ca.getNormalRef(c)[0], 0));   // = n x (0, 0, 1)^T
+   } else {
+      if (std::fabs(ca.getNormalRef(c)[1]) < std::fabs(ca.getNormalRef(c)[2]))
+         ca.setT(c, Vec3(-ca.getNormalRef(c)[2], 0, ca.getNormalRef(c)[0]));   // = n x (0, 1, 0)^T
+      else
+         ca.setT(c, Vec3(ca.getNormalRef(c)[1], -ca.getNormalRef(c)[0], 0));   // = n x (0, 0, 1)^T
+   }
+   normalize(ca.getTRef(c));
+   ca.setO(c, ca.getNormal(c) % ca.getT(c));
+
+   Mat3 contactframe(ca.getNormal(c), ca.getT(c), ca.getO(c));
+
+   // If the distance is negative then penetration is present. This is an error and should be corrected.
+   // Correcting the whole error is not recommended since due to the linearization the errors cannot
+   // completely fixed anyway and the error reduction will introduce artificial restitution.
+   // However, if the distance is positive then it is not about error correction but the distance that
+   // can still be overcome without penetration and thus no error correction parameter should be applied.
+   if (ca.getDistance(c) < real_t(0.0)) {
+      setMaximumPenetration(std::max(getMaximumPenetration(), -ca.getDistance(c)));
+      ca.getDistanceRef(c) *= erp_;
+   }
+
+   ca.getMuRef(c) = getFriction(pa.getType(bId1), pa.getType(bId2));
+
+   Mat3 diag = -(
+           math::skewSymCrossProduct(ca.getR1(c),
+                                     math::skewSymCrossProduct(pa.getInvInertia(bId1), ca.getR1(c)))
+           + math::skewSymCrossProduct(ca.getR2(c),
+                                       math::skewSymCrossProduct(pa.getInvInertia(bId2), ca.getR2(c))));
+   diag[0] += pa.getInvMass(bId1) + pa.getInvMass(bId2);
+   diag[4] += pa.getInvMass(bId1) + pa.getInvMass(bId2);
+   diag[8] += pa.getInvMass(bId1) + pa.getInvMass(bId2);
+
+   diag = contactframe.getTranspose() * diag * contactframe;
+
+   // Diagonal block is known to be positive-definite and thus an inverse always exists.
+   ca.getDiag_ntoRef(c) = diag;
+   ca.getDiag_nto_invRef(c) = diag.getInverse();
+   ca.getDiag_n_invRef(c) = math::inv(diag[0]);
+   ca.getDiag_to_invRef(c) = Mat2(diag[4], diag[5], diag[7], diag[8]).getInverse();
+}
+
+}
+}
+}
diff --git a/python/mesa_pd/templates/kernel/InitParticlesForHCSITS.templ.h b/python/mesa_pd/templates/kernel/InitParticlesForHCSITS.templ.h
new file mode 100644
index 0000000000000000000000000000000000000000..1ef8698a49004622e5ddda143d07b70bc53edce3
--- /dev/null
+++ b/python/mesa_pd/templates/kernel/InitParticlesForHCSITS.templ.h
@@ -0,0 +1,123 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file InitParticlesForHCSITS.h
+//! \author Tobias Leemann <tobias.leemann@fau.de>
+//
+//======================================================================================================================
+
+//======================================================================================================================
+//
+//  THIS FILE IS GENERATED - PLEASE CHANGE THE TEMPLATE !!!
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <mesa_pd/common/ParticleFunctions.h>
+#include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/data/IAccessor.h>
+#include <mesa_pd/data/Flags.h>
+#include <core/logging/Logging.h>
+
+#include <vector>
+
+namespace walberla {
+namespace mesa_pd {
+namespace kernel {
+/**
+ * Init the datastructures for the particles for later use of the HCSITS-Solver.
+ * Call this kernel on all particles that will be treated with HCSITS before performing any relaxation timesteps.
+ * Use setGlobalAcceleration() to set an accelleration action uniformly across all particles (e.g. gravity)
+ * \ingroup mesa_pd_kernel
+ */
+class InitParticlesForHCSITS
+{
+public:
+   // Default constructor sets the default values
+   InitParticlesForHCSITS() :
+   {%- for prop in properties %}
+   {{prop.name}}_( {{prop.defValue}} ){{ "," if not loop.last}}
+   {%- endfor %}
+   {}
+
+   // Getter and Setter Functions for properties
+   {%- for prop in properties %}
+   inline const {{prop.type}}& get{{prop.name | capFirst}}() const {return {{prop.name}}_;}
+   inline void set{{prop.name | capFirst}}({{prop.type}} v) { {{prop.name}}_ = v;}
+   {% endfor %}
+
+   template <typename Accessor>
+   void operator()(size_t j, Accessor& ac, real_t dt);
+
+   template <typename Accessor>
+   void initializeVelocityCorrections(Accessor& ac, size_t body, Vec3& dv, Vec3& dw, real_t dt ) const;
+
+private:
+   // List properties
+   {% for prop in properties %}
+   {{prop.type}} {{prop.name }}_;
+   {%- endfor %}
+
+};
+
+
+template <typename Accessor>
+inline void InitParticlesForHCSITS::operator()(size_t j, Accessor& ac, real_t dt)
+{
+   using namespace data::particle_flags;
+   static_assert(std::is_base_of<data::IAccessor, Accessor>::value, "please provide a valid accessor");
+   auto particle_flags = ac.getFlagsRef(j);
+   if (isSet(particle_flags, GLOBAL)){
+      WALBERLA_CHECK( isSet(particle_flags, FIXED), "Global bodies need to have infinite mass as they are not communicated!" );
+      initializeVelocityCorrections( ac, j, ac.getDvRef(j), ac.getDwRef(j), dt ); // use applied external forces to calculate starting velocity
+   }else{
+      initializeVelocityCorrections( ac, j, ac.getDvRef(j), ac.getDwRef(j), dt ); // use applied external forces to calculate starting velocity
+      if(!isSet(particle_flags, FIXED)){ // Update velocities with global acceleration and angular velocity with euler eqn
+         ac.getLinearVelocityRef(j) = ac.getLinearVelocity(j) + getGlobalAcceleration() * dt;
+         ac.getAngularVelocityRef(j) = ac.getAngularVelocity(j) + dt * ( ac.getInvInertia(j) * ( ( ac.getInertia(j) * ac.getAngularVelocity(j) ) % ac.getAngularVelocity(j) ) );
+      }
+   }
+}
+
+
+//*************************************************************************************************
+/*!\brief Calculates the initial velocity corrections of a given body.
+ * \param ac The particle accessor
+ * \param body The body whose velocities to time integrate
+ * \param dv On return the initial linear velocity correction.
+ * \param w On return the initial angular velocity correction.
+ * \param dt The time step size.
+ * \return void
+ *
+ * Calculates the velocity corrections effected by external forces and torques in an explicit Euler
+ * time integration of the velocities of the given body. For fixed objects the velocity corrections
+ * are set to zero. External forces and torques are reset if indicated by the settings.
+ */
+template <typename Accessor>
+inline void InitParticlesForHCSITS::initializeVelocityCorrections(Accessor& ac, size_t body, Vec3& dv, Vec3& dw, real_t dt ) const
+{
+   dv = ( ac.getInvMass(body) * dt ) * ac.getForce(body);
+   dw = dt * ( ac.getInvInertia(body) * ac.getTorque(body) );
+
+   ac.getForceRef(body) = Vec3();
+   ac.getTorqueRef(body) = Vec3();
+}
+//*************************************************************************************************
+
+
+} //namespace kernel
+} //namespace mesa_pd
+} //namespace walberla
diff --git a/python/mesa_pd/templates/kernel/IntegrateParticlesHCSITS.templ.h b/python/mesa_pd/templates/kernel/IntegrateParticlesHCSITS.templ.h
new file mode 100644
index 0000000000000000000000000000000000000000..068b21e14f59d2c9a341a7f4899656dad054ec34
--- /dev/null
+++ b/python/mesa_pd/templates/kernel/IntegrateParticlesHCSITS.templ.h
@@ -0,0 +1,157 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file IntegrateParticlesHCSITS.h
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//! \author Tobias Leemann <tobias.leemann@fau.de>
+//
+//======================================================================================================================
+
+//======================================================================================================================
+//
+//  THIS FILE IS GENERATED - PLEASE CHANGE THE TEMPLATE !!!
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <mesa_pd/common/ParticleFunctions.h>
+#include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/data/IAccessor.h>
+#include <mesa_pd/data/ContactStorage.h>
+#include <mesa_pd/data/Flags.h>
+#include <core/logging/Logging.h>
+#include <core/math/Constants.h>
+
+#include <vector>
+
+namespace walberla {
+namespace mesa_pd {
+namespace kernel {
+
+/**
+ * Kernel performing integration of the particles after the HCSITS iteration is done.
+ * Call this kernel on all particles j to integrate them by a timestep of size dt.
+ * The Speed Limiter limits the number of body radii, a particle can travel in one timestep.
+ * The speed limit factor defines a number of radii that are allowed in a timestep, e.g. a factor of 1 means, that
+ * a particle can only travel one time its radius in each timestep.
+ *
+ * \ingroup mesa_pd_kernel
+ */
+class IntegrateParticlesHCSITS
+{
+   public:
+   // Default constructor sets the default values
+   IntegrateParticlesHCSITS() :
+   {%- for prop in properties %}
+   {{prop.name}}_( {{prop.defValue}} ){{ "," if not loop.last}}
+   {%- endfor %}
+   {}
+
+   // Getter and Setter Functions
+   {%- for prop in properties %}
+   inline const {{prop.type}}& get{{prop.name | capFirst}}() const {return {{prop.name}}_;}
+   inline void set{{prop.name | capFirst}}({{prop.type}} v) { {{prop.name}}_ = v;}
+   {% endfor %}
+
+   template <typename PAccessor>
+   void operator()(size_t j, PAccessor& ac, real_t dt);
+
+   template <typename PAccessor>
+   void integratePositions(PAccessor& ac, size_t body, Vec3 v, Vec3 w, real_t dt ) const;
+
+
+   private:
+   // List properties
+   {% for prop in properties %}
+   {{prop.type}} {{prop.name }}_;
+   {%- endfor %}
+};
+
+
+template <typename PAccessor>
+inline void IntegrateParticlesHCSITS::operator()(size_t j, PAccessor& ac, real_t dt)
+{
+   using namespace data::particle_flags;
+   static_assert(std::is_base_of<data::IAccessor, PAccessor>::value, "please provide a valid accessor");
+
+   auto body_flags = ac.getFlagsRef(j);
+   if (isSet(body_flags, FIXED)){
+      integratePositions(ac, j, ac.getLinearVelocity(j), ac.getAngularVelocity(j), dt );
+   }else{
+      integratePositions(ac, j, ac.getLinearVelocity(j) + ac.getDv(j), ac.getAngularVelocity(j) + ac.getDw(j), dt );
+   }
+}
+
+
+
+
+//*************************************************************************************************
+/*!\brief Time integration of the position and orientation of a given body.
+ *
+ * \param body The body whose position and orientation to time integrate
+ * \param v The linear velocity to use for time integration of the position.
+ * \param w The angular velocity to use for time integration of the orientation.
+ * \param dt The time step size.
+ * \return void
+ *
+ * Performs an Euler time integration of the positions of the given body. Velocities are damped if
+ * indicated by the settings and stored back in the body properties. The bounding box is
+ * recalculated and it is redetermined whether the body is awake or not. Also the data
+ * structure tracking the contacts attached to the body are cleared and
+ */
+template <typename PAccessor>
+inline void IntegrateParticlesHCSITS::integratePositions(PAccessor& ac, size_t body, Vec3 v, Vec3 w, real_t dt ) const {
+   using namespace data::particle_flags;
+   auto body_flags = ac.getFlags(body);
+   if (isSet(body_flags, FIXED))
+      return;
+
+
+   if (getSpeedLimiterActive()) {
+      const auto speed = v.length();
+      const auto edge = ac.getInteractionRadius(body);
+      if (speed * dt > edge * getSpeedLimitFactor()) {
+         v = v * (edge * getSpeedLimitFactor() / dt / speed);
+      }
+
+      const real_t maxPhi = real_t(2) * math::pi * getSpeedLimitFactor();
+      const real_t phi = w.length() * dt;
+      if (phi > maxPhi) {
+         w = w / phi * maxPhi;
+      }
+   }
+
+   // Calculating the translational displacement
+   ac.getPositionRef(body) = (ac.getPosition(body) + v * dt);
+
+   // Calculating the rotation angle
+   const Vec3 phi(w * dt);
+
+   // Calculating the new orientation
+   WALBERLA_ASSERT(!math::isnan(phi), " phi: " << phi << " w: " << w << " dt: " << dt );
+   ac.getRotationRef(body).rotate(phi, phi.length());
+
+   // Storing the velocities back in the body properties
+   ac.getLinearVelocityRef(body) = v;
+   ac.getAngularVelocityRef(body) = w;
+}
+//*************************************************************************************************
+
+
+
+} //namespace kernel
+} //namespace mesa_pd
+} //namespace walberla
diff --git a/src/mesa_pd/collision_detection/AnalyticContactDetection.h b/src/mesa_pd/collision_detection/AnalyticContactDetection.h
index bb3f8f1f21ba9356d0a4385c8de871efe0ec5dc6..284cd1eb9470b155d8ae7bedd034070f09319182 100644
--- a/src/mesa_pd/collision_detection/AnalyticContactDetection.h
+++ b/src/mesa_pd/collision_detection/AnalyticContactDetection.h
@@ -147,11 +147,11 @@ public:
                     Accessor& ac);
 
 protected:
-   size_t idx1_;
-   size_t idx2_;
-   Vec3   contactPoint_;
-   Vec3   contactNormal_;
-   real_t penetrationDepth_;
+   size_t idx1_ = std::numeric_limits<size_t>::max();
+   size_t idx2_ = std::numeric_limits<size_t>::max();
+   Vec3   contactPoint_ = Vec3();
+   Vec3   contactNormal_ = Vec3();
+   real_t penetrationDepth_ = real_t(0);
 
    real_t contactThreshold_ = real_t(0.0);
 };
diff --git a/src/mesa_pd/data/ContactAccessor.h b/src/mesa_pd/data/ContactAccessor.h
index b0f76962491472c0f0efb436d67a2a919ec43cab..470eb0b7c72670d141e58325b2a0106d8c097a64 100644
--- a/src/mesa_pd/data/ContactAccessor.h
+++ b/src/mesa_pd/data/ContactAccessor.h
@@ -26,7 +26,7 @@
 
 #pragma once
 
-#include <mesa_pd/data/IAccessor.h>
+#include <mesa_pd/data/IContactAccessor.h>
 #include <mesa_pd/data/ContactStorage.h>
 
 #include <core/UniqueID.h>
@@ -43,7 +43,7 @@ namespace data {
  * Provides get, set and getRef for all members of the ContactStorage.
  * Can be used as a basis class for a more advanced ContactAccessor.
  */
-class ContactAccessor : public IAccessor
+class ContactAccessor : public IContactAccessor
 {
 public:
    ContactAccessor(const std::shared_ptr<data::ContactStorage>& ps) : ps_(ps) {}
diff --git a/src/mesa_pd/data/IContactAccessor.h b/src/mesa_pd/data/IContactAccessor.h
new file mode 100644
index 0000000000000000000000000000000000000000..68382fa9b4d38492055050b148b016f0d7821e77
--- /dev/null
+++ b/src/mesa_pd/data/IContactAccessor.h
@@ -0,0 +1,48 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file IContactAccessor.h
+//! \author Tobias Leemann <tobias.leemann@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <core/DataTypes.h>
+
+#include <typeinfo>
+#include <type_traits>
+
+namespace walberla {
+namespace mesa_pd {
+namespace data {
+
+/**
+ * @brief Add this as a base class to identify your class as an accessor for contact objects.
+ *
+ * Contact Accessors passed via templated arguments might be checked like
+ * \code
+ * static_assert(std::is_base_of<IContactAccessor, X>::value, typeid(X).name() + " is not a accessor");
+ * \endcode
+ */
+class IContactAccessor
+{
+public:
+   virtual ~IContactAccessor() = default;
+};
+
+} //namespace data
+} //namespace mesa_pd
+} //namespace walberla
diff --git a/src/mesa_pd/kernel/HCSITSRelaxationStep.h b/src/mesa_pd/kernel/HCSITSRelaxationStep.h
new file mode 100644
index 0000000000000000000000000000000000000000..10d87699963103261cbe32cd411f2af46429739a
--- /dev/null
+++ b/src/mesa_pd/kernel/HCSITSRelaxationStep.h
@@ -0,0 +1,1060 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file HCSITSRelaxationStep.h
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//! \author Tobias Leemann <tobias.leemann@fau.de>
+//
+//======================================================================================================================
+
+//======================================================================================================================
+//
+//  THIS FILE IS GENERATED - PLEASE CHANGE THE TEMPLATE !!!
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <mesa_pd/common/ParticleFunctions.h>
+#include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/data/IAccessor.h>
+#include <mesa_pd/data/IContactAccessor.h>
+#include <mesa_pd/data/Flags.h>
+#include <core/logging/Logging.h>
+#include <core/math/Constants.h>
+#include <core/math/Limits.h>
+
+#include <vector>
+
+namespace walberla {
+namespace mesa_pd {
+namespace kernel {
+/**
+ * Apply a HCSITS (Hard-contact semi-implicit timestepping solver) relaxation to all contacts.
+ * Call this kernel on all bodies to perform on relaxation step of the solver.
+ * There exist different friction and relaxation models. See comments on their respective methods
+ * for an extended documentation. Use setRelaxationModel() to set it.
+ * getDeltaMax() can be used to query the maximum change in the impulses applied wrt to the last iteration.
+ *
+ * Use setMaxSubIterations() to set the maximum number of iterations performed to solve optimization problems arising
+ * during the relaxation (in InelasticGeneralizedMaximumDissipationContact and InelasticCoulombContactByDecoupling).
+ *
+ * Use setCor() to set the coefficient of restitution between 0 and 1 for the InelasticProjectedGaussSeidel model.
+ * All other models describe inelastic contacts.
+ *
+ * Example of Using the HCSITS kernels in a simulation:
+ * \code
+ *     // ps: Particle storage
+ *     // cs: Contact storage
+ *     // pa: Particle accessor
+ *     // ca: Contact accessor
+ *
+ *     // Detect contacts first and fill contact storage
+ *
+ *     mesa_pd::mpi::ReduceProperty reductionKernel;
+ *     mesa_pd::mpi::BroadcastProperty broadcastKernel;
+ *
+ *     kernel::IntegrateBodiesHCSITS integration;
+ *
+ *     kernel::InitContactsForHCSITS initContacts(1);
+ *     initContacts.setFriction(0,0,real_t(0.2));
+ *     kernel::InitBodiesForHCSITS initBodies;
+ *
+ *     kernel::HCSITSRelaxationStep relaxationStep;
+ *     relaxationStep.setCor(real_t(0.1)); // Only effective for PGSM
+ *
+ *     // Init Contacts and Bodies (order is arbitrary)
+ *     cs.forEachContact(false, kernel::SelectAll(), ca, initContacts, ca, pa);
+ *     ps.forEachParticle(false, kernel::SelectAll(), pa, initBodies, pa, dt);
+ *
+ *     // Reduce and Broadcast velocities with relaxation parameter 1 before the iteration
+ *     VelocityUpdateNotification::Parameters::relaxationParam = real_t(1.0);
+ *     reductionKernel.operator()<VelocityCorrectionNotification>(*ps);
+ *     broadcastKernel.operator()<VelocityUpdateNotification>(*ps);
+ *
+ *     VelocityUpdateNotification::Parameters::relaxationParam = real_t(0.8);
+ *     for(int j = 0; j < 10; j++){
+ *        cs.forEachContact(false, kernel::SelectAll(), ca, relaxationStep, ca, pa, dt);
+ *        reductionKernel.operator()<VelocityCorrectionNotification>(*ps);
+ *        broadcastKernel.operator()<VelocityUpdateNotification>(*ps);
+ *     }
+ *
+ *     ps.forEachParticle(false, kernel::SelectAll(), pa, integration, pa, dt);
+ * \endcode
+ * \ingroup mesa_pd_kernel
+ *
+ *
+ */
+class HCSITSRelaxationStep
+{
+public:
+   enum RelaxationModel {
+      InelasticFrictionlessContact,
+      ApproximateInelasticCoulombContactByDecoupling,
+      ApproximateInelasticCoulombContactByOrthogonalProjections,
+      InelasticCoulombContactByDecoupling,
+      InelasticCoulombContactByOrthogonalProjections,
+      InelasticGeneralizedMaximumDissipationContact,
+      InelasticProjectedGaussSeidel
+   };
+   
+   // Default constructor sets the default values
+   HCSITSRelaxationStep() :
+   maxSubIterations_( 20 ),
+   relaxationModel_( InelasticFrictionlessContact ),
+   deltaMax_( 0 ),
+   cor_( real_t(0.2) )
+   {}
+
+   // Getter and Setter Functions
+   inline const size_t& getMaxSubIterations() const {return maxSubIterations_;}
+   inline void setMaxSubIterations(size_t v) { maxSubIterations_ = v;}
+   
+   inline const RelaxationModel& getRelaxationModel() const {return relaxationModel_;}
+   inline void setRelaxationModel(RelaxationModel v) { relaxationModel_ = v;}
+   
+   inline const real_t& getDeltaMax() const {return deltaMax_;}
+   inline void setDeltaMax(real_t v) { deltaMax_ = v;}
+   
+   inline const real_t& getCor() const {return cor_;}
+   inline void setCor(real_t v) { cor_ = v;}
+   
+
+   /**
+    * Perform relaxation for a single contact.
+    * \param cid The index of the contact with the accessor ca.
+    * \param ca The contact accessor.
+    * \param pa The particle accessor.
+    * \param dt The timestep size used.
+    */
+   template <typename CAccessor, typename PAccessor>
+   void operator()(size_t cid, CAccessor &ca, PAccessor& pa,  real_t dt);
+
+   template <typename CAccessor, typename PAccessor>
+   real_t relaxInelasticFrictionlessContacts(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa);
+
+   template <typename CAccessor, typename PAccessor>
+   real_t relaxApproximateInelasticCoulombContactsByDecoupling(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa);
+
+   template <typename CAccessor, typename PAccessor>
+   real_t relaxInelasticCoulombContactsByDecoupling(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa );
+
+   template <typename CAccessor, typename PAccessor>
+   real_t relaxInelasticCoulombContactsByOrthogonalProjections(size_t cid, real_t dtinv, bool approximate, CAccessor &ca, PAccessor& pa );
+
+   template <typename CAccessor, typename PAccessor>
+   real_t relaxInelasticGeneralizedMaximumDissipationContacts(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa );
+
+   template <typename CAccessor, typename PAccessor>
+   real_t relaxInelasticContactsByProjectedGaussSeidel(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa );
+
+private:
+   // List properties
+   
+   size_t maxSubIterations_;
+   RelaxationModel relaxationModel_;
+   real_t deltaMax_;
+   real_t cor_;
+};
+
+
+template <typename CAccessor, typename PAccessor>
+inline void HCSITSRelaxationStep::operator()(size_t cid, CAccessor &ca, PAccessor& pa,  real_t dt)
+{
+   static_assert(std::is_base_of<data::IContactAccessor, CAccessor>::value, "please provide a valid contact accessor");
+   static_assert(std::is_base_of<data::IAccessor, PAccessor>::value, "please provide a valid accessor");
+
+   real_t dtinv(real_t(1.0)/dt);
+   switch( getRelaxationModel() ) {
+      case InelasticFrictionlessContact:
+         setDeltaMax(std::max( getDeltaMax(), relaxInelasticFrictionlessContacts(cid, dtinv, ca, pa )));
+         break;
+
+      case ApproximateInelasticCoulombContactByDecoupling:
+         setDeltaMax(std::max( getDeltaMax(), relaxApproximateInelasticCoulombContactsByDecoupling( cid, dtinv, ca, pa )));
+         break;
+
+      case ApproximateInelasticCoulombContactByOrthogonalProjections:
+         setDeltaMax(std::max( getDeltaMax(), relaxInelasticCoulombContactsByOrthogonalProjections( cid, dtinv, true, ca, pa )));
+         break;
+
+      case InelasticCoulombContactByDecoupling:
+         setDeltaMax(std::max( getDeltaMax(), relaxInelasticCoulombContactsByDecoupling( cid, dtinv, ca, pa )));
+         break;
+
+      case InelasticCoulombContactByOrthogonalProjections:
+         setDeltaMax(std::max( getDeltaMax(), relaxInelasticCoulombContactsByOrthogonalProjections( cid, dtinv, false, ca, pa )));
+         break;
+
+      case InelasticGeneralizedMaximumDissipationContact:
+         setDeltaMax(std::max( getDeltaMax(), relaxInelasticGeneralizedMaximumDissipationContacts( cid, dtinv, ca, pa )));
+         break;
+
+      case InelasticProjectedGaussSeidel:
+         setDeltaMax(std::max( getDeltaMax(), relaxInelasticContactsByProjectedGaussSeidel( cid, dtinv, ca, pa )));
+         break;
+
+      default:
+         throw std::runtime_error( "Unsupported relaxation model." );
+}
+
+WALBERLA_LOG_DETAIL("Delta Max: " << getDeltaMax());
+
+}
+
+//*************************************************************************************************
+/*!\brief Relaxes The contact with ID cid once. The contact model is for inelastic unilateral contacts without friction.
+ *
+ * \param cid The index of the contact
+ * \param ca The contact accessor
+ * \param pa The particle accessor
+ * \return The largest variation of contact impulses in the L-infinity norm.
+ *
+ * This function is to be called from resolveContacts(). Separating contacts are preferred over
+ * persisting solutions if valid.
+ */
+template <typename CAccessor, typename PAccessor>
+inline real_t HCSITSRelaxationStep::relaxInelasticFrictionlessContacts(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa)
+{
+   real_t delta_max( 0 );
+
+   // Remove velocity corrections of this contact's reaction.
+   size_t bId1 = ca.getId1(cid);
+   size_t bId2 = ca.getId2(cid);
+   pa.getDvRef(bId1) -= pa.getInvMass(bId1) * ca.getP(cid);
+   pa.getDwRef(bId1) -= pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+   pa.getDvRef(bId2) += pa.getInvMass(bId2) * ca.getP(cid);
+   pa.getDwRef(bId2) += pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+
+   // Calculate the relative contact VELOCITY in the global world frame (if no contact reaction is present at contact i)
+   Vec3 gdot    ( ( pa.getLinearVelocity(bId1)     + pa.getDv(bId1) ) -
+                  ( pa.getLinearVelocity(bId2)     + pa.getDv(bId2) ) +
+                  ( pa.getAngularVelocity(bId1)    + pa.getDw(bId1) ) % ca.getR1(cid) -
+                  ( pa.getAngularVelocity(bId2)    + pa.getDw(bId2) ) % ca.getR2(cid) /* + diag_[i] * p */ );
+
+
+   // Change from the global world frame to the contact frame
+   Mat3 contactframe( ca.getNormal(cid), ca.getT(cid), ca.getO(cid) );
+   Vec3 gdot_nto( contactframe.getTranspose() * gdot );
+
+   // The constraint in normal direction is actually a positional constraint but instead of g_n we use g_n/dt equivalently and call it gdot_n
+   gdot_nto[0] += ( /* + trans( contactCache.n_[i] ) * ( contactCache.body1_[i]->getPosition() + contactCache.r1_[i] ) - ( contactCache.body2_[i]->getPosition() + contactCache.r2_[i] ) */ + ca.getDistance(cid) ) * dtinv;
+
+   if( gdot_nto[0] >= 0 ) {
+      // Contact is separating if no contact reaction is present at contact i.
+
+      delta_max = std::max( delta_max, std::max( std::abs( ca.getP(cid)[0] ), std::max( std::abs( ca.getP(cid)[1] ), std::abs( ca.getP(cid)[2] ) ) ) );
+      ca.getPRef(cid) = Vec3();
+
+      // No need to apply zero impulse.
+   }
+   else {
+      // Contact is persisting.
+
+      // Calculate the impulse necessary for a static contact expressed as components in the contact frame.
+      Vec3 p_wf( ca.getNormal(cid) * ( -ca.getDiag_n_inv(cid) * gdot_nto[0] ) );
+      Vec3 dp( ca.getP(cid) - p_wf );
+      delta_max = std::max( delta_max, std::max( std::abs( dp[0] ), std::max( std::abs( dp[1] ), std::abs( dp[2] ) ) ) );
+
+      ca.getPRef(cid) = p_wf;
+
+      // Apply impulse right away.
+      pa.getDvRef(bId1) += pa.getInvMass(bId1) * ca.getP(cid);
+      pa.getDwRef(bId1) += pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+      pa.getDvRef(bId2) -= pa.getInvMass(bId2) * ca.getP(cid);
+      pa.getDwRef(bId2) -= pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+   }
+
+   return delta_max;
+}
+//*************************************************************************************************
+
+
+//*************************************************************************************************
+/*!\brief Relaxes all contacts once. The contact model is for inelastic unilateral contacts with approximate Coulomb friction.
+ *
+ * \return The largest variation of contact impulses in the L-infinity norm.
+ *
+ * This function is to be called from resolveContacts(). Separating contacts are preferred over
+ * other solutions if valid. Static solutions are preferred over dynamic solutions. Dynamic
+ * solutions are computed by decoupling the normal from the frictional components. That is
+ * for a dynamic contact the normal component is relaxed first followed by the frictional
+ * components. The determination of the frictional components does not perform any subiterations
+ * and guarantees that the friction partially opposes slip.
+ */
+template <typename CAccessor, typename PAccessor>
+inline real_t HCSITSRelaxationStep::relaxApproximateInelasticCoulombContactsByDecoupling(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa )
+{
+   real_t delta_max( 0 );
+
+   // Remove velocity corrections of this contact's reaction.
+   size_t bId1 = ca.getId1(cid);
+   size_t bId2 = ca.getId2(cid);
+   pa.getDvRef(bId1) -= pa.getInvMass(bId1) * ca.getP(cid);
+   pa.getDwRef(bId1) -= pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+   pa.getDvRef(bId2) += pa.getInvMass(bId2) * ca.getP(cid);
+   pa.getDwRef(bId2) += pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+
+
+   // Calculate the relative contact VELOCITY in the global world frame (if no contact reaction is present at contact i)
+   Vec3 gdot    ( ( pa.getLinearVelocity(bId1)     + pa.getDv(bId1) ) -
+                  ( pa.getLinearVelocity(bId2)     + pa.getDv(bId2) ) +
+                  ( pa.getAngularVelocity(bId1)    + pa.getDw(bId1) ) % ca.getR1(cid) -
+                  ( pa.getAngularVelocity(bId2)    + pa.getDw(bId2) ) % ca.getR2(cid) /* + diag_[i] * p */ );
+
+   // Change from the global world frame to the contact frame
+   Mat3 contactframe( ca.getNormal(cid), ca.getT(cid), ca.getO(cid) );
+   Vec3 gdot_nto( contactframe.getTranspose() * gdot );
+
+   // The constraint in normal direction is actually a positional constraint but instead of g_n we use g_n/dt equivalently and call it gdot_n
+   gdot_nto[0] += ( /* + trans( contactCache.n_[i] ) * ( contactCache.body1_[i]->getPosition() + contactCache.r1_[i] ) - ( contactCache.body2_[i]->getPosition() + contactCache.r2_[i] ) */ + ca.getDistance(cid) ) * dtinv;
+
+
+   if( gdot_nto[0] >= 0 ) {
+      // Contact is separating if no contact reaction is present at contact i.
+      delta_max = std::max( delta_max, std::max( std::abs( ca.getP(cid)[0] ), std::max( std::abs( ca.getP(cid)[1] ), std::abs( ca.getP(cid)[2] ) ) ) );
+      ca.getPRef(cid) = Vec3();
+
+      // No need to apply zero impulse.
+   }
+   else {
+      // Contact is persisting (either static or dynamic).
+
+      // Calculate the impulse necessary for a static contact expressed as components in the contact frame.
+      //Vec3 p_cf( -( contactCache.diag_nto_inv_[i] * gdot_nto ) );
+      Vec3 p_cf( -( ca.getDiag_nto_inv(cid) * gdot_nto ) );
+
+      // Can p_cf[0] be negative even though -gdot_nto[0] > 0? Yes! Try:
+      // A = [0.5 -0.1 +0.1; -0.1 0.5 -0.1; +0.1 -0.1 1];
+      // b = [0.01 -1 -1]';
+      // A\b    \approx [-0.19 -2.28 -1.21]'
+      // eig(A) \approx [ 0.40  0.56  1.04]'
+
+      //real_t flimit( contactCache.mu_[i] * p_cf[0] );
+      real_t flimit( ca.getMu(cid) * p_cf[0] );
+      real_t fsq( p_cf[1] * p_cf[1] + p_cf[2] * p_cf[2] );
+      if( fsq > flimit * flimit || p_cf[0] < 0 ) {
+         // Contact cannot be static so it must be dynamic.
+         // => Complementarity condition on normal reaction now turns into an equation since we know that the normal reaction is definitely not zero.
+
+         // For simplicity we change to a simpler relaxation scheme here:
+         // 1. Relax normal reaction with the tangential components equal to the previous values
+         // 2. Relax tangential components with the newly relaxed normal reaction
+         // Note: The better approach would be to solve the true 3x3 block problem!
+         // Warning: Simply projecting the frictional components is wrong since then the normal action is no longer 0 and simulations break.
+
+         // Add the action of the frictional reactions from the last iteration to the relative contact velocity in normal direction so we can relax it separately.
+         // TODO This can be simplified:
+         //p_cf = trans( contactframe ) * contactCache.p_[i];
+         //p_cf[0] = 0;
+         //p_[i] = contactframe * p_cf;
+
+
+         //Vec3 p_tmp = ( contactCache.t_[i] * contactCache.p_[i] ) * contactCache.t_[i] + ( contactCache.o_[i] * contactCache.p_[i] ) * contactCache.o_[i];
+         Vec3 p_tmp = ( ca.getT(cid) * ca.getP(cid)  ) * ca.getT(cid)  + ( ca.getO(cid) * ca.getP(cid) ) * ca.getO(cid);
+
+
+         //real_t gdot_n = gdot_nto[0] + contactCache.n_[i] * ( ( contactCache.body1_[i]->getInvInertia() * ( contactCache.r1_[i] % p_tmp ) ) % contactCache.r1_[i] + ( contactCache.body2_[i]->getInvInertia() * ( contactCache.r2_[i] % p_tmp ) ) % contactCache.r2_[i] /* + diag_[i] * p */ );
+         real_t gdot_n = gdot_nto[0] + ca.getNormal(cid) * ( ( pa.getInvInertia(bId1) * ( ca.getR1(cid) % p_tmp ) ) % ca.getR1(cid) + ( pa.getInvInertia(bId2) * ( ca.getR2(cid) % p_tmp ) ) % ca.getR2(cid) /* + diag_[i] * p */ );
+
+         //p_cf[0] = -( contactCache.diag_n_inv_[i] * gdot_n );
+         p_cf[0] = -( ca.getDiag_n_inv(cid) * gdot_n );
+
+         // We cannot be sure that gdot_n <= 0 here and thus p_cf[0] >= 0 since we just modified it with the old values of the tangential reactions! => Project
+         p_cf[0] = std::max( real_c( 0 ), p_cf[0] );
+
+         // Now add the action of the normal reaction to the relative contact velocity in the tangential directions so we can relax the frictional components separately.
+         p_tmp = ca.getNormal(cid)  * p_cf[0];
+         Vec3 gdot2 = gdot + ( pa.getInvInertia(bId1) * ( ca.getR1(cid) % p_tmp ) ) % ca.getR1(cid) + ( pa.getInvInertia(bId2)* ( ca.getR2(cid) % p_tmp ) ) % ca.getR2(cid);
+         Vec2 gdot_to;
+         gdot_to[0] = ca.getT(cid) * gdot2;
+         gdot_to[1] = ca.getO(cid) * gdot2;
+
+         //Vec2 ret = -( contactCache.diag_to_inv_[i] * gdot_to );
+         Vec2 ret = -( ca.getDiag_to_inv(cid) * gdot_to );
+         p_cf[1] = ret[0];
+         p_cf[2] = ret[1];
+
+         flimit = ca.getMu(cid) * p_cf[0];
+         fsq = p_cf[1] * p_cf[1] + p_cf[2] * p_cf[2];
+         if( fsq > flimit * flimit ) {
+            const real_t f( flimit / std::sqrt( fsq ) );
+            p_cf[1] *= f;
+            p_cf[2] *= f;
+         }
+      }
+      else {
+         // Contact is static.
+      }
+      Vec3 p_wf( contactframe * p_cf );
+
+      Vec3 dp( ca.getP(cid) - p_wf );
+      delta_max = std::max( delta_max, std::max( std::abs( dp[0] ), std::max( std::abs( dp[1] ), std::abs( dp[2] ) ) ) );
+
+      ca.getPRef(cid) = p_wf;
+
+      // Apply impulse right away.
+      pa.getDvRef(bId1) += pa.getInvMass(bId1) * ca.getP(cid);
+      pa.getDwRef(bId1) += pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+      pa.getDvRef(bId2) -= pa.getInvMass(bId2) * ca.getP(cid);
+      pa.getDwRef(bId2) -= pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+   }
+   return delta_max;
+}
+//*************************************************************************************************
+
+
+//*************************************************************************************************
+
+/*!\brief Relaxes all contacts once. The contact model is for inelastic unilateral contacts with Coulomb friction.
+ *
+ * \return The largest variation of contact impulses in the L-infinity norm.
+ *
+ * This function is to be called from resolveContacts(). Separating contacts are preferred over
+ * other solutions if valid. Static solutions are preferred over dynamic solutions. Dynamic
+ * solutions are computed by decoupling the normal from the frictional components. That is
+ * for a dynamic contact the normal component is relaxed first followed by the frictional
+ * components. How much the frictional components directly oppose slip as required by the Coulomb
+ * friction model depends on the number of subiterations performed. If no subiterations are
+ * performed the friction is guaranteed to be at least partially dissipative.
+ */
+template <typename CAccessor, typename PAccessor>
+inline real_t HCSITSRelaxationStep::relaxInelasticCoulombContactsByDecoupling(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa )
+{
+   real_t delta_max( 0 );
+
+   // Remove velocity corrections of this contact's reaction.
+   size_t bId1 = ca.getId1(cid);
+   size_t bId2 = ca.getId2(cid);
+   pa.getDvRef(bId1) -= pa.getInvMass(bId1) * ca.getP(cid);
+   pa.getDwRef(bId1) -= pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+   pa.getDvRef(bId2) += pa.getInvMass(bId2) * ca.getP(cid);
+   pa.getDwRef(bId2) += pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+
+
+   // Calculate the relative contact VELOCITY in the global world frame (if no contact reaction is present at contact i)
+   Vec3 gdot    ( ( pa.getLinearVelocity(bId1)     + pa.getDv(bId1) ) -
+                  ( pa.getLinearVelocity(bId2)     + pa.getDv(bId2) ) +
+                  ( pa.getAngularVelocity(bId1)    + pa.getDw(bId1) ) % ca.getR1(cid) -
+                  ( pa.getAngularVelocity(bId2)    + pa.getDw(bId2) ) % ca.getR2(cid) /* + diag_[i] * p */ );
+
+   // Change from the global world frame to the contact frame
+   Mat3 contactframe( ca.getNormal(cid), ca.getT(cid), ca.getO(cid) );
+   Vec3 gdot_nto( contactframe.getTranspose() * gdot );
+
+   // The constraint in normal direction is actually a positional constraint but instead of g_n we use g_n/dt equivalently and call it gdot_n
+   gdot_nto[0] += ( /* + trans( contactCache.n_[i] ) * ( contactCache.body1_[i]->getPosition() + contactCache.r1_[i] ) - ( contactCache.body2_[i]->getPosition() + contactCache.r2_[i] ) */ + ca.getDistance(cid) ) * dtinv;
+
+   //WALBERLA_LOG_WARNING( "Contact #" << i << " is\nA = \n" << contactCache.diag_nto_[i] << "\nb = \n" << gdot_nto << "\nmu = " << contactCache.mu_[i] );
+
+   if( gdot_nto[0] >= 0 ) {
+      // Contact is separating if no contact reaction is present at contact i.
+
+      delta_max = std::max( delta_max, std::max( std::abs( ca.getP(cid)[0] ), std::max( std::abs( ca.getP(cid)[1] ), std::abs( ca.getP(cid)[2] ) ) ) );
+      ca.getPRef(cid) = Vec3();
+      //WALBERLA_LOG_WARNING( "Contact #" << i << " is separating." );
+
+      // No need to apply zero impulse.
+   }
+   else {
+      // Contact is persisting (either static or dynamic).
+
+      // Calculate the impulse necessary for a static contact expressed as components in the contact frame.
+      Vec3 p_cf( -( ca.getDiag_nto_inv(cid) * gdot_nto ) );
+
+      // Can p_cf[0] be negative even though -gdot_nto[0] > 0? Yes! Try:
+      // A = [0.5 -0.1 +0.1; -0.1 0.5 -0.1; +0.1 -0.1 1];
+      // b = [0.01 -1 -1]';
+      // A\b    \approx [-0.19 -2.28 -1.21]'
+      // eig(A) \approx [ 0.40  0.56  1.04]'
+
+      real_t flimit( ca.getMu(cid) * p_cf[0] );
+      real_t fsq( p_cf[1] * p_cf[1] + p_cf[2] * p_cf[2] );
+      if( fsq > flimit * flimit || p_cf[0] < 0 ) {
+         // Contact cannot be static so it must be dynamic.
+         // => Complementarity condition on normal reaction now turns into an equation since we know that the normal reaction is definitely not zero.
+
+         for (int j = 0; j < 20; ++j) {
+            // For simplicity we change to a simpler relaxation scheme here:
+            // 1. Relax normal reaction with the tangential components equal to the previous values
+            // 2. Relax tangential components with the newly relaxed normal reaction
+            // Note: The better approach would be to solve the true 3x3 block problem!
+            // Warning: Simply projecting the frictional components is wrong since then the normal action is no longer 0 and simulations break.
+
+            Vec3 gdotCorrected;
+            real_t gdotCorrected_n;
+            Vec2 gdotCorrected_to;
+
+            // Calculate the relative contact velocity in the global world frame (if no normal contact reaction is present at contact i)
+            p_cf[0] = 0;
+            //                       |<- p_cf is orthogonal to the normal and drops out in next line ->|
+            gdotCorrected   = /* ( contactCache.body1_[i]->getInvMass() + contactCache.body2_[i]->getInvMass() ) * p_cf  */ gdot + ( pa.getInvInertia(bId1) * ( ca.getR1(cid) % ( ca.getT(cid) * p_cf[1] + ca.getO(cid) * p_cf[2] ) ) ) % ca.getR1(cid) + ( pa.getInvInertia(bId2) * ( ca.getR2(cid) % ( ca.getT(cid) * p_cf[1] + ca.getO(cid) * p_cf[2] ) ) ) % ca.getR2(cid);
+            gdotCorrected_n = ca.getNormal(cid) * gdotCorrected + ca.getDistance(cid) * dtinv;
+
+            // Relax normal component.
+            p_cf[0] = std::max( real_c( 0 ), -( ca.getDiag_n_inv(cid) * gdotCorrected_n ) );
+
+            // Calculate the relative contact velocity in the global world frame (if no frictional contact reaction is present at contact i)
+            p_cf[1] = p_cf[2] = real_c( 0 );
+            //                       |<- p_cf is orthogonal to the tangential plane and drops out   ->|
+            gdotCorrected   = /* ( contactCache.body1_[i]->getInvMass() + contactCache.body2_[i]->getInvMass() ) * p_cf */ gdot + ( pa.getInvInertia(bId1) * ( ca.getR1(cid) % ( ca.getNormal(cid) * p_cf[0] ) ) ) % ca.getR1(cid) + ( pa.getInvInertia(bId2) * ( ca.getR2(cid) % ( ca.getNormal(cid) * p_cf[0] ) ) ) % ca.getR2(cid);
+            gdotCorrected_to[0] = ca.getT(cid) * gdotCorrected;
+            gdotCorrected_to[1] = ca.getO(cid) * gdotCorrected;
+
+            // Relax frictional components.
+            Vec2 ret = -( ca.getDiag_to_inv(cid) * gdotCorrected_to );
+            p_cf[1] = ret[0];
+            p_cf[2] = ret[1];
+
+            flimit = ca.getMu(cid) * p_cf[0];
+            fsq = p_cf[1] * p_cf[1] + p_cf[2] * p_cf[2];
+            if( fsq > flimit * flimit ) {
+               // 3.2.1 Decoupling
+               // \tilde{x}^0 = p_cf[1..2]
+
+               // Determine \tilde{A}
+               Mat2 diag_to( ca.getDiag_nto(cid)(1, 1), ca.getDiag_nto(cid)(1, 2), ca.getDiag_nto(cid)(2, 1), ca.getDiag_nto(cid)(2, 2) );
+
+               const real_t f( flimit / std::sqrt( fsq ) );
+               //p_cf[1] *= f;
+               //p_cf[2] *= f;
+
+               // Determine search interval for Golden Section Search
+               const real_t phi( real_c(0.5) * ( real_c(1) + std::sqrt( real_c( 5 ) ) ) );
+               real_t shift( std::atan2( -p_cf[2], p_cf[1] ) );
+               real_t acos_f( std::acos( f ) );
+
+               //WALBERLA_LOG_WARNING( acos_f << " " << shift );
+
+               real_t alpha_left( -acos_f - shift );
+               //Vec2 x_left( flimit * std::cos( alpha_left ), flimit * std::sin( alpha_left ) );
+               //real_t f_left( 0.5 * trans( x_left ) * ( diag_to * x_left ) - trans( x_left ) * ( -gdot_to ) );
+
+               real_t alpha_right( acos_f - shift );
+               //Vec2 x_right( flimit * std::cos( alpha_right ), flimit * std::sin( alpha_right ) );
+               //real_t f_right( 0.5 * trans( x_right ) * ( diag_to * x_right ) - trans( x_right ) * ( -gdot_to ) );
+
+               real_t alpha_mid( ( alpha_right + alpha_left * phi ) / ( 1 + phi ) );
+               Vec2 x_mid( flimit * std::cos( alpha_mid ), flimit * std::sin( alpha_mid ) );
+               real_t f_mid( real_c(0.5) * x_mid * ( diag_to * x_mid ) - x_mid * ( -gdotCorrected_to ) );
+
+               bool leftlarger = false;
+               for( size_t k = 0; k < getMaxSubIterations(); ++k ) {
+                  real_t alpha_next( alpha_left + ( alpha_right - alpha_mid ) );
+                  Vec2 x_next( flimit * std::cos( alpha_next ), flimit * std::sin( alpha_next ) );
+                  real_t f_next( real_c(0.5) * x_next * ( diag_to * x_next ) - x_next * ( -gdotCorrected_to ) );
+                  //WALBERLA_LOG_WARNING( "[(" << alpha_left << ", ?); (" << alpha_mid << ", " << f_mid << "); (" << alpha_right << ", ?)] <- (" << alpha_next << ", " << f_next << ")" );
+                  //WALBERLA_LOG_WARNING( "left: " << alpha_mid - alpha_left << "  right: " << alpha_right - alpha_mid << "  ll: " << leftlarger );
+                  //WALBERLA_ASSERT(leftlarger ? (alpha_mid - alpha_left > alpha_right - alpha_mid) : (alpha_mid - alpha_left < alpha_right - alpha_mid), "ll inconsistent!" );
+
+                  if (leftlarger) {
+                     // left interval larger
+                     if( f_next < f_mid ) {
+                        alpha_right = alpha_mid;
+                        alpha_mid   = alpha_next;
+                        x_mid       = x_next;
+                        f_mid       = f_next;
+                        leftlarger = true;
+                     }
+                     else {
+                        alpha_left  = alpha_next;
+                        leftlarger = false;
+                     }
+                  }
+                  else {
+                     // right interval larger
+                     if( f_next < f_mid ) {
+                        alpha_left = alpha_mid;
+                        alpha_mid  = alpha_next;
+                        x_mid      = x_next;
+                        f_mid      = f_next;
+                        leftlarger = false;
+                     }
+                     else {
+                        alpha_right = alpha_next;
+                        leftlarger = true;
+                     }
+                  }
+               }
+               //WALBERLA_LOG_WARNING( "dalpha = " << alpha_right - alpha_left );
+
+               p_cf[1] = x_mid[0];
+               p_cf[2] = x_mid[1];
+            }
+         }
+         //WALBERLA_LOG_WARNING( "Contact #" << i << " is dynamic." );
+      }
+      else {
+         // Contact is static.
+         //WALBERLA_LOG_WARNING( "Contact #" << i << " is static." );
+      }
+
+      //WALBERLA_LOG_WARNING( "Contact reaction in contact frame: " << p_cf << "\n" << ca.getDiag_nto(cid)*p_cf + gdot_nto );
+      Vec3 p_wf( contactframe * p_cf );
+      Vec3 dp( ca.getP(cid) - p_wf );
+      delta_max = std::max( delta_max, std::max( std::abs( dp[0] ), std::max( std::abs( dp[1] ), std::abs( dp[2] ) ) ) );
+
+      ca.getPRef(cid) = p_wf;
+
+      // Apply impulse right away.
+      pa.getDvRef(bId1) += pa.getInvMass(bId1) * ca.getP(cid);
+      pa.getDwRef(bId1) += pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+      pa.getDvRef(bId2) -= pa.getInvMass(bId2) * ca.getP(cid);
+      pa.getDwRef(bId2) -= pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+   }
+
+   return delta_max;
+}
+//*************************************************************************************************
+
+
+//*************************************************************************************************
+/*!\brief Relaxes all contacts once. The contact model is for inelastic unilateral contacts with Coulomb friction.
+ *
+ * \param dtinv The inverse of the current time step.
+ * \param approximate Use the approximate model showing bouncing.
+ * \return The largest variation of contact impulses in the L-infinity norm.
+ *
+ * This function is to be called from resolveContacts(). The iterative method to solve the contact
+ * problem is e.g. described in the article "A matrix-free cone complementarity approach for
+ * solving large-scale, nonsmooth, rigid body dynamics" by A. Tasora and M. Anitescu in Computer
+ * Methods in Applied Mechanics and Engineering (Volume 200, Issues 5–8, 15 January 2011,
+ * Pages 439-453). The contact model is purely quadratic and convergence should be good but depends
+ * on a parameter. The one-contact problem has a unique solution. The frictional reactions
+ * for a dynamic contact converge to those that directly oppose slip. However, the contact is
+ * not perfectly inelastic for dynamic contacts but bounces. These vertical motions tend to
+ * go to zero for smaller time steps and can be interpreted as exaggerated vertical motions
+ * coming from micro asperities (see "Optimization-based simulation of nonsmooth rigid multibody
+ * dynamics" by M. Anitescu in Mathematical Programming (Volume 105, Issue 1, January 2006, Pages
+ * 113-143). These motions can be prevented by a small change in the iteration proposed in "The
+ * bipotential method: a constructive approach to design the complete contact law with friction and
+ * improved numerical algorithms" by G. De Saxce and Z-Q. Feng in Mathematical and Computer
+ * Modelling (Volume 28, Issue 4, 1998, Pages 225-245). Which iteration is used is controlled with
+ * the approximate parameter.
+ */
+template <typename CAccessor, typename PAccessor>
+inline real_t HCSITSRelaxationStep::relaxInelasticCoulombContactsByOrthogonalProjections(size_t cid, real_t dtinv, bool approximate, CAccessor &ca, PAccessor& pa )
+{
+   real_t delta_max( 0 );
+
+   // Remove velocity corrections of this contact's reaction.
+   size_t bId1 = ca.getId1(cid);
+   size_t bId2 = ca.getId2(cid);
+   pa.getDvRef(bId1) -= pa.getInvMass(bId1) * ca.getP(cid);
+   pa.getDwRef(bId1) -= pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+   pa.getDvRef(bId2) += pa.getInvMass(bId2) * ca.getP(cid);
+   pa.getDwRef(bId2) += pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+
+   // Calculate the relative contact VELOCITY in the global world frame (if no contact reaction is present at contact i)
+   Vec3 gdot    ( ( pa.getLinearVelocity(bId1)     + pa.getDv(bId1) ) -
+                  ( pa.getLinearVelocity(bId2)     + pa.getDv(bId2) ) +
+                  ( pa.getAngularVelocity(bId1)    + pa.getDw(bId1) ) % ca.getR1(cid) -
+                  ( pa.getAngularVelocity(bId2)    + pa.getDw(bId2) ) % ca.getR2(cid) /* + diag_[i] * p */ );
+
+   // Change from the global world frame to the contact frame
+   Mat3 contactframe( ca.getNormal(cid), ca.getT(cid), ca.getO(cid) );
+   Vec3 gdot_nto( contactframe.getTranspose() * gdot );
+
+   // The constraint in normal direction is actually a positional constraint but instead of g_n we use g_n/dt equivalently and call it gdot_n
+   gdot_nto[0] += ( /* + trans( contactCache.n_[i] ) * ( contactCache.body1_[i]->getPosition() + contactCache.r1_[i] ) - ( contactCache.body2_[i]->getPosition() + contactCache.r2_[i] ) */ + ca.getDistance(cid) ) * dtinv;
+
+
+   const real_t w( 1 ); // w > 0
+   Vec3 p_cf( contactframe.getTranspose() * ca.getP(cid) );
+   if( approximate ) {
+      // Calculate next iterate (Anitescu/Tasora).
+      p_cf = p_cf - w * ( ca.getDiag_nto(cid) * p_cf + gdot_nto );
+   }
+   else {
+      // Calculate next iterate (De Saxce/Feng).
+      Vec3 tmp( ca.getDiag_nto(cid) * p_cf + gdot_nto );
+      tmp[0] += std::sqrt( math::sq( tmp[1] ) + math::sq( tmp[2] ) ) * ca.getMu(cid);
+      p_cf = p_cf - w * tmp;
+   }
+
+   // Project.
+   real_t flimit( ca.getMu(cid) * p_cf[0] );
+   real_t fsq( p_cf[1] * p_cf[1] + p_cf[2] * p_cf[2] );
+   if( p_cf[0] > 0 && fsq < flimit * flimit ) {
+      // Unconstrained minimum is in cone leading to a static contact and no projection
+      // is necessary.
+   }
+   else if( p_cf[0] < 0 && fsq < math::sq( p_cf[0] )/ math::sq( ca.getMu(cid) ) ) {
+      // Unconstrained minimum is in dual cone leading to a separating contact where no contact
+      // reaction is present (the unconstrained minimum is projected to the tip of the cone).
+      p_cf = Vec3();
+   }
+   else {
+      // The contact is dynamic.
+      real_t f( std::sqrt( fsq ) );
+      p_cf[0] = ( f * ca.getMu(cid) + p_cf[0] ) / ( math::sq( ca.getMu(cid) ) + 1 );
+      real_t factor( ca.getMu(cid) * p_cf[0] / f );
+      p_cf[1] *= factor;
+      p_cf[2] *= factor;
+   }
+
+   Vec3 p_wf( contactframe * p_cf );
+   Vec3 dp( ca.getP(cid) - p_wf );
+   delta_max = std::max( delta_max, std::max( std::abs( dp[0] ), std::max( std::abs( dp[1] ), std::abs( dp[2] ) ) ) );
+
+   ca.getPRef(cid) = p_wf;
+
+   // Apply impulse right away.
+   pa.getDvRef(bId1) += pa.getInvMass(bId1) * ca.getP(cid);
+   pa.getDwRef(bId1) += pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+   pa.getDvRef(bId2) -= pa.getInvMass(bId2) * ca.getP(cid);
+   pa.getDwRef(bId2) -= pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+
+   return delta_max;
+}
+//*************************************************************************************************
+
+
+//*************************************************************************************************
+
+/*!\brief Relaxes all contacts once. The contact model is for inelastic unilateral contacts with the generalized maximum dissipation principle for friction.
+ *
+ * \return The largest variation of contact impulses in the L-infinity norm.
+ *
+ * This function is to be called from resolveContacts(). Dynamic solutions are computed by
+ * minimizing the kinetic energy along the intersection of the plane of maximum compression and
+ * the friction cone.
+ */
+template <typename CAccessor, typename PAccessor>
+inline real_t HCSITSRelaxationStep::relaxInelasticGeneralizedMaximumDissipationContacts(size_t cid, real_t dtinv, CAccessor &ca, PAccessor& pa )
+{
+   real_t delta_max( 0 );
+
+   // Remove velocity corrections of this contact's reaction.
+   size_t bId1 = ca.getId1(cid);
+   size_t bId2 = ca.getId2(cid);
+   pa.getDvRef(bId1) -= pa.getInvMass(bId1) * ca.getP(cid);
+   pa.getDwRef(bId1) -= pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+   pa.getDvRef(bId2) += pa.getInvMass(bId2) * ca.getP(cid);
+   pa.getDwRef(bId2) += pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+
+   // Calculate the relative contact VELOCITY in the global world frame (if no contact reaction is present at contact i)
+   Vec3 gdot    ( ( pa.getLinearVelocity(bId1)     + pa.getDv(bId1) ) -
+                  ( pa.getLinearVelocity(bId2)     + pa.getDv(bId2) ) +
+                  ( pa.getAngularVelocity(bId1)    + pa.getDw(bId1) ) % ca.getR1(cid) -
+                  ( pa.getAngularVelocity(bId2)    + pa.getDw(bId2) ) % ca.getR2(cid) /* + diag_[i] * p */ );
+
+   // Change from the global world frame to the contact frame
+   Mat3 contactframe( ca.getNormal(cid), ca.getT(cid), ca.getO(cid) );
+   Vec3 gdot_nto( contactframe.getTranspose() * gdot );
+
+   // The constraint in normal direction is actually a positional constraint but instead of g_n we use g_n/dt equivalently and call it gdot_n
+   gdot_nto[0] += ( /* + trans( contactCache.n_[i] ) * ( contactCache.body1_[i]->getPosition() + contactCache.r1_[i] ) - ( contactCache.body2_[i]->getPosition() + contactCache.r2_[i] ) */ + ca.getDistance(cid) ) * dtinv;
+
+
+   if( gdot_nto[0] >= 0 ) {
+      // Contact is separating if no contact reaction is present at contact i.
+      delta_max = std::max( delta_max, std::max( std::abs( ca.getP(cid)[0] ), std::max( std::abs( ca.getP(cid)[1] ), std::abs( ca.getP(cid)[2] ) ) ) );
+      ca.getPRef(cid) = Vec3();
+
+      // No need to apply zero impulse.
+   }
+   else {
+      // Contact is persisting (either static or dynamic).
+
+      // Calculate the impulse necessary for a static contact expressed as components in the contact frame.
+      Vec3 p_cf( -( ca.getDiag_nto_inv(cid) * gdot_nto ) );
+
+      // Can p_cf[0] be negative even though -gdot_nto[0] > 0? Yes! Try:
+      // A = [0.5 -0.1 +0.1; -0.1 0.5 -0.1; +0.1 -0.1 1];
+      // b = [0.01 -1 -1]';
+      // A\b    \approx [-0.19 -2.28 -1.21]'
+      // eig(A) \approx [ 0.40  0.56  1.04]'
+
+      real_t flimit( ca.getMu(cid) * p_cf[0] );
+      real_t fsq( p_cf[1] * p_cf[1] + p_cf[2] * p_cf[2] );
+      if( fsq > flimit * flimit || p_cf[0] < 0 ) {
+         // Contact cannot be static so it must be dynamic.
+         // => Complementarity condition on normal reaction now turns into an equation since we know that the normal reaction is definitely not zero.
+
+         // \breve{x}^0 = p_cf[1..2]
+
+         // Eliminate normal component from 3x3 system: ca.getDiag_nto(cid)*p_cf + gdot_nto => \breve{A} \breve{x} - \breve{b}
+         const real_t invA_nn( math::inv( ca.getDiag_nto(cid)(0, 0) ) );
+         const real_t offdiag( ca.getDiag_nto(cid)(1, 2) - invA_nn * ca.getDiag_nto(cid)(0, 1) * ca.getDiag_nto(cid)(0, 2) );
+         Mat2 A_breve( ca.getDiag_nto(cid)(1, 1) - invA_nn *math::sq( ca.getDiag_nto(cid)(0, 1) ), offdiag, offdiag, ca.getDiag_nto(cid)(2, 2) - invA_nn *math::sq( ca.getDiag_nto(cid)(0, 2) ) );
+         Vec2 b_breve( -gdot_nto[1] + invA_nn * ca.getDiag_nto(cid)(0, 1) * gdot_nto[0], -gdot_nto[2] + invA_nn * ca.getDiag_nto(cid)(0, 2) * gdot_nto[0] );
+
+         const real_t shiftI( std::atan2( -ca.getDiag_nto(cid)(0, 2), ca.getDiag_nto(cid)(0, 1) ) );
+         const real_t shiftJ( std::atan2( -p_cf[2], p_cf[1] ) );
+         const real_t a3( std::sqrt(math::sq( ca.getDiag_nto(cid)(0, 1) ) +math::sq( ca.getDiag_nto(cid)(0, 2) ) ) );
+         const real_t fractionI( -ca.getDiag_nto(cid)(0, 0) / ( ca.getMu(cid) * a3 ) );
+         const real_t fractionJ( std::min( invA_nn * ca.getMu(cid) * ( ( -gdot_nto[0] ) / std::sqrt( fsq ) - a3 * std::cos( shiftI - shiftJ ) ), real_c( 1 ) ) );
+
+         // Search interval determination.
+         real_t alpha_left, alpha_right;
+         if( fractionJ < -1 ) {
+            // J is complete
+            const real_t angleI( std::acos( fractionI ) );
+            alpha_left = -angleI - shiftI;
+            alpha_right = +angleI - shiftI;
+            if( alpha_left < 0 ) {
+               alpha_left += 2 * math::pi;
+               alpha_right += 2 * math::pi;
+            }
+         }
+         else if( ca.getDiag_nto(cid)(0, 0) > ca.getMu(cid) * a3 ) {
+            // I is complete
+            const real_t angleJ( std::acos( fractionJ ) );
+            alpha_left = -angleJ - shiftJ;
+            alpha_right = +angleJ - shiftJ;
+            if( alpha_left < 0 ) {
+               alpha_left += 2 * math::pi;
+               alpha_right += 2 * math::pi;
+            }
+         }
+         else {
+            // neither I nor J is complete
+            const real_t angleJ( std::acos( fractionJ ) );
+            real_t alpha1_left( -angleJ - shiftJ );
+            real_t alpha1_right( +angleJ - shiftJ );
+            if( alpha1_left < 0 ) {
+               alpha1_left += 2 * math::pi;
+               alpha1_right += 2 * math::pi;
+            }
+            const real_t angleI( std::acos( fractionI ) );
+            real_t alpha2_left( -angleI - shiftI );
+            real_t alpha2_right( +angleI - shiftI );
+            if( alpha2_left < 0 ) {
+               alpha2_left += 2 * math::pi;
+               alpha2_right += 2 * math::pi;
+            }
+
+            // Swap intervals if second interval does not start right of the first interval.
+            if( alpha1_left > alpha2_left ) {
+               std::swap( alpha1_left, alpha2_left );
+               std::swap( alpha1_right, alpha2_right );
+            }
+
+            if( alpha2_left > alpha1_right ) {
+               alpha2_right -= 2*math::pi;
+               if( alpha2_right > alpha1_right ) {
+                  // [alpha1_left; alpha1_right] \subset [alpha2_left; alpha2_right]
+               }
+               else {
+                  // [alpha2_left; alpha2_right] intersects the left end of [alpha1_left; alpha1_right]
+                  alpha1_right = alpha2_right;
+               }
+            }
+            else {
+               alpha1_left = alpha2_left;
+               if( alpha2_right > alpha1_right ) {
+                  // [alpha2_left; alpha2_right] intersects the right end of [alpha1_left; alpha1_right]
+               }
+               else {
+                  // [alpha2_left; alpha2_right] \subset [alpha1_left; alpha1_right]
+                  alpha1_right = alpha2_right;
+               }
+            }
+
+            alpha_left = alpha1_left;
+            alpha_right = alpha1_right;
+         }
+
+         const real_t phi( real_c(0.5) * ( real_c(1) + std::sqrt( real_c( 5 ) ) ) );
+         real_t alpha_mid( ( alpha_right + alpha_left * phi ) / ( 1 + phi ) );
+         Vec2 x_mid;
+         real_t f_mid;
+
+         {
+            real_t r_ub = ca.getMu(cid) * ( -gdot_nto[0] ) / ( ca.getDiag_nto(cid)(0, 0) + ca.getMu(cid) * a3 * std::cos( alpha_mid + shiftI ) );
+            if( r_ub < 0 )
+               r_ub = math::Limits<real_t>::inf();
+            x_mid = Vec2( r_ub * std::cos( alpha_mid ), r_ub * std::sin( alpha_mid ) );
+            f_mid = real_c(0.5) * x_mid * ( A_breve * x_mid ) - x_mid * b_breve;
+         }
+
+         bool leftlarger = false;
+         for( size_t k = 0; k < maxSubIterations_; ++k ) {
+            real_t alpha_next( alpha_left + ( alpha_right - alpha_mid ) );
+            real_t r_ub = ca.getMu(cid) * ( -gdot_nto[0] ) / ( ca.getDiag_nto(cid)(0, 0) + ca.getMu(cid) * a3 * std::cos( alpha_next + shiftI ) );
+            if( r_ub < 0 )
+               r_ub = math::Limits<real_t>::inf();
+            Vec2 x_next( r_ub * std::cos( alpha_next ), r_ub * std::sin( alpha_next ) );
+            real_t f_next( real_c(0.5) * x_next * ( A_breve * x_next ) - x_next * b_breve );
+
+            //WALBERLA_LOG_WARNING( "[(" << alpha_left << ", ?); (" << alpha_mid << ", " << f_mid << "); (" << alpha_right << ", ?)] <- (" << alpha_next << ", " << f_next << ")" );
+            //WALBERLA_LOG_WARNING( "left: " << alpha_mid - alpha_left << "  right: " << alpha_right - alpha_mid << "  ll: " << leftlarger );
+            //WALBERLA_ASSERT(leftlarger ? (alpha_mid - alpha_left > alpha_right - alpha_mid) : (alpha_mid - alpha_left < alpha_right - alpha_mid), "ll inconsistent!" );
+
+            if (leftlarger) {
+               // left interval larger
+               if( f_next < f_mid ) {
+                  alpha_right = alpha_mid;
+                  alpha_mid   = alpha_next;
+                  x_mid       = x_next;
+                  f_mid       = f_next;
+                  leftlarger = true;
+               }
+               else {
+                  alpha_left  = alpha_next;
+                  leftlarger = false;
+               }
+            }
+            else {
+               // right interval larger
+               if( f_next < f_mid ) {
+                  alpha_left = alpha_mid;
+                  alpha_mid  = alpha_next;
+                  x_mid      = x_next;
+                  f_mid      = f_next;
+                  leftlarger = false;
+               }
+               else {
+                  alpha_right = alpha_next;
+                  leftlarger = true;
+               }
+            }
+         }
+         //WALBERLA_LOG_DETAIL( "dalpha = " << alpha_right - alpha_left << "\n");
+         {
+            real_t alpha_init( std::atan2( p_cf[2], p_cf[1] ) );
+            real_t r_ub = ca.getMu(cid) * ( -gdot_nto[0] ) / ( ca.getDiag_nto(cid)(0, 0) + ca.getMu(cid) * a3 * std::cos( alpha_init + shiftI ) );
+            if( r_ub < 0 )
+               r_ub = math::Limits<real_t>::inf();
+            Vec2 x_init( r_ub * std::cos( alpha_init ), r_ub * std::sin( alpha_init ) );
+            real_t f_init( real_c(0.5) * x_init * ( A_breve * x_init ) - x_init * b_breve );
+
+            if( f_init < f_mid )
+            {
+               x_mid = x_init;
+               WALBERLA_LOG_DETAIL( "Replacing solution by primitive dissipative solution (" << f_init << " < " << f_mid << " at " << alpha_init << " vs. " << alpha_mid << ").\n");
+            }
+         }
+
+         p_cf[0] = invA_nn * ( -gdot_nto[0] - ca.getDiag_nto(cid)(0, 1) * x_mid[0] - ca.getDiag_nto(cid)(0, 2) * x_mid[1] );
+         p_cf[1] = x_mid[0];
+         p_cf[2] = x_mid[1];
+         //WALBERLA_LOG_DETAIL( "Contact #" << i << " is dynamic." );
+      }
+      else {
+         // Contact is static.
+         //WALBERLA_LOG_DETAIL( "Contact #" << i << " is static." );
+      }
+      Vec3 p_wf( contactframe * p_cf );
+      Vec3 dp( ca.getP(cid) - p_wf );
+      delta_max = std::max( delta_max, std::max( std::abs( dp[0] ), std::max( std::abs( dp[1] ), std::abs( dp[2] ) ) ) );
+      //WALBERLA_LOG_DETAIL( "Contact reaction in contact frame: " << p_cf << "\nContact action in contact frame: " << ca.getDiag_nto(cid)*p_cf + gdot_nto );
+
+      ca.getPRef(cid) = p_wf;
+
+      // Apply impulse right away.
+      pa.getDvRef(bId1) += pa.getInvMass(bId1) * ca.getP(cid);
+      pa.getDwRef(bId1) += pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+      pa.getDvRef(bId2) -= pa.getInvMass(bId2) * ca.getP(cid);
+      pa.getDwRef(bId2) -= pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+   }
+   return delta_max;
+}
+//*************************************************************************************************
+
+//*************************************************************************************************
+//*************************************************************************************************
+/*!\brief Relaxes all contacts once. The contact model is from the paper by Tschisgale et al.
+ * "A constraint-based collision model for Cosserat rods" and works with a coefficient of
+ * restitution cor with 0 < cor <= 1.
+ *
+ * \return The largest variation of contact impulses in the L-infinity norm.
+ *
+ */
+template <typename CAccessor, typename PAccessor>
+inline real_t HCSITSRelaxationStep::relaxInelasticContactsByProjectedGaussSeidel(size_t cid, real_t /*dtinv*/, CAccessor &ca, PAccessor& pa )
+{
+   real_t delta_max( 0 );
+   size_t bId1 = ca.getId1(cid);
+   size_t bId2 = ca.getId2(cid);
+   // Calculate the relative contact VELOCITY in the global world frame (if no contact reaction is present at contact i)
+   // This is the negative contact velocity as for the other methods
+   Vec3 gdot    ( ( pa.getLinearVelocity(bId2)     + pa.getDv(bId2) ) -
+                  ( pa.getLinearVelocity(bId1)     + pa.getDv(bId1) ) +
+                  ( pa.getAngularVelocity(bId2)    + pa.getDw(bId2) ) % ca.getR2(cid) -
+                  ( pa.getAngularVelocity(bId1)    + pa.getDw(bId1) ) % ca.getR1(cid) /* + diag_[i] * p */ );
+
+   // Use restitution hypothesis
+   gdot = gdot + getCor()*(gdot*ca.getNormal(cid))*ca.getNormal(cid);
+
+   Mat3 contactframe( ca.getNormal(cid), ca.getT(cid), ca.getO(cid) );
+
+   //
+   // Turn system matrix back to the world frame.
+   Mat3 K = contactframe * ca.getDiag_nto(cid) *  contactframe.getTranspose();
+   Mat3 Kinv = contactframe * ca.getDiag_nto_inv(cid) * contactframe.getTranspose();
+
+   // Compute impulse necessary in the world frame
+   Vec3 p_wf( - Kinv * gdot );
+
+
+
+   if( gdot * ca.getNormal(cid) <= 0 ) {
+      // Contact is separating if no contact reaction is present at contact i.
+      delta_max = std::max( delta_max, std::max( std::abs( ca.getP(cid)[0] ), std::max( std::abs( ca.getP(cid)[1] ), std::abs( ca.getP(cid)[2] ) ) ) );
+      ca.getPRef(cid) = Vec3();
+      // No need to apply zero impulse.
+   }
+   else {
+      // Dissect the impuls in a tangetial and normal directions
+      // Use the inverted contact frame with -n as in the publication
+      Mat3 reversedContactFrame( -ca.getNormal(cid), ca.getT(cid), ca.getO(cid) );
+      Vec3 p_rcf( reversedContactFrame.getTranspose() * p_wf );
+
+      // Check the frictional limit
+      real_t flimit( ca.getMu(cid) * p_rcf[0] );
+
+      // Square of tangential components of the impulse
+      real_t fsq( p_rcf[1] * p_rcf[1] + p_rcf[2] * p_rcf[2] );
+
+      if( fsq > flimit * flimit || p_rcf[0] < 0 ) {
+         // Contact cannot be static so it must be dynamic.
+         // Calculate corrected contact reaction by the method of Tschigale et al.
+         // Normal component is close to 0 or negative, and we cannot asses a tangential direction
+         // Treat this case with the 0 impulse
+         if (fsq < real_t(1e-8)) {
+            delta_max = std::max(delta_max, std::max(std::abs(ca.getP(cid)[0]),
+                                                     std::max(std::abs(ca.getP(cid)[1]), std::abs(ca.getP(cid)[2]))));
+            ca.getPRef(cid) = Vec3();
+         } else {
+            // tangential direction of sliding
+            Vec3 tan_dir = ((p_rcf[1] * ca.getT(cid)) + (p_rcf[2] * ca.getO(cid))).getNormalized();
+            // scalar magnitude of pv.
+            real_t abspv = ((K * p_wf) * ca.getNormal(cid)) /
+                           ((K * (-ca.getNormal(cid) + ca.getMu(cid) * tan_dir)) * ca.getNormal(cid));
+            p_wf = abspv * (-ca.getNormal(cid) + ca.getMu(cid) * tan_dir);
+
+         }
+      }
+      // Calculate variation
+      Vec3 dp(ca.getP(cid) - p_wf);
+      delta_max = std::max(delta_max, std::max(std::abs(dp[0]), std::max(std::abs(dp[1]), std::abs(dp[2]))));
+      ca.getPRef(cid) = p_wf;
+
+      // Apply impulse right away.
+      pa.getDvRef(bId1) -= pa.getInvMass(bId1) * ca.getP(cid);
+      pa.getDwRef(bId1) -= pa.getInvInertia(bId1) * (ca.getR1(cid) % ca.getP(cid));
+      pa.getDvRef(bId2) += pa.getInvMass(bId2) * ca.getP(cid);
+      pa.getDwRef(bId2) += pa.getInvInertia(bId2) * (ca.getR2(cid) % ca.getP(cid));
+   }
+   return delta_max;
+}
+//*************************************************************************************************
+
+
+} //namespace kernel
+} //namespace mesa_pd
+} //namespace walberla
\ No newline at end of file
diff --git a/src/mesa_pd/kernel/InitContactsForHCSITS.h b/src/mesa_pd/kernel/InitContactsForHCSITS.h
new file mode 100644
index 0000000000000000000000000000000000000000..e2d14795204d7372f0b61032f3437653acbbd154
--- /dev/null
+++ b/src/mesa_pd/kernel/InitContactsForHCSITS.h
@@ -0,0 +1,175 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file InitContactsForHCSITS.h
+//! \author Tobias Leemann <tobias.leemann@fau.de>
+//
+//======================================================================================================================
+
+//======================================================================================================================
+//
+//  THIS FILE IS GENERATED - PLEASE CHANGE THE TEMPLATE !!!
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <mesa_pd/common/ParticleFunctions.h>
+#include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/data/IAccessor.h>
+#include <mesa_pd/data/IContactAccessor.h>
+#include <mesa_pd/data/Flags.h>
+#include <core/logging/Logging.h>
+
+#include <vector>
+
+namespace walberla {
+namespace mesa_pd {
+namespace kernel {
+
+/**
+* Init the datastructures for a contact for later use of the HCSITS-Solver.
+* Call this kernel on all contacts that you want to treat with HCSITS before performing any relaxation timesteps.
+* Use setErp() to set the error reduction parameter, which defines the share of the overlap that is resolved in one step
+* (0 meaning after the relaxation the overlap is the same, 1 meaning the particles will have no overlap after relaxation).
+* Use setFriction(a,b, cof) to define the coefficient of friction cof between materials a and b. It is assumed to be
+ * symmetric w.r.t. the materials.
+* \ingroup mesa_pd_kernel
+*/
+class InitContactsForHCSITS{
+public:
+
+   // Default constructor sets the default values
+   explicit InitContactsForHCSITS(const uint_t numParticleTypes) :
+   numParticleTypes_ (numParticleTypes),
+   erp_( real_t(0.8) ),
+   maximumPenetration_( 0 )
+   {
+
+      
+      friction_.resize(numParticleTypes * numParticleTypes, real_t(0));
+   }
+
+   // Getter and Setter Functions
+   inline const real_t& getErp() const {return erp_;}
+   inline void setErp(real_t v) { erp_ = v;}
+   
+   inline const real_t& getMaximumPenetration() const {return maximumPenetration_;}
+   inline void setMaximumPenetration(real_t v) { maximumPenetration_ = v;}
+   
+
+   // Getter and Setter Functions for material parameter combinations (they are assumed symmetric).
+   
+   inline void setFriction(const size_t type1, const size_t type2, const real_t& val)
+   {
+      WALBERLA_ASSERT_LESS( type1, numParticleTypes_ );
+      WALBERLA_ASSERT_LESS( type2, numParticleTypes_ );
+      friction_[numParticleTypes_*type1 + type2] = val;
+      friction_[numParticleTypes_*type2 + type1] = val;
+   }
+
+   
+   inline real_t getFriction(const size_t type1, const size_t type2) const
+   {
+      WALBERLA_ASSERT_LESS( type1, numParticleTypes_ );
+      WALBERLA_ASSERT_LESS( type2, numParticleTypes_ );
+      WALBERLA_ASSERT_FLOAT_EQUAL( friction_[numParticleTypes_*type1 + type2],
+              friction_[numParticleTypes_*type2 + type1],
+              "parameter matrix for friction not symmetric!");
+      return friction_[numParticleTypes_*type1 + type2];
+   }
+
+   // List material parameters
+   
+   std::vector<real_t> friction_ {};
+
+   template<typename CAccessor, typename PAccessor>
+   void operator()(size_t c, CAccessor &ca, PAccessor &pa);
+
+private:
+   // List properties
+   const uint_t numParticleTypes_;
+
+   
+   real_t erp_;
+   real_t maximumPenetration_;
+
+   
+
+};
+
+
+template<typename CAccessor, typename PAccessor>
+inline void InitContactsForHCSITS::operator()(size_t c, CAccessor &ca, PAccessor &pa) {
+
+      static_assert(std::is_base_of<data::IContactAccessor, CAccessor>::value, "please provide a valid contact accessor");
+      static_assert(std::is_base_of<data::IAccessor, PAccessor>::value, "please provide a valid particle accessor");
+
+   size_t bId1 = ca.getId1(c);
+   size_t bId2 = ca.getId2(c);
+   ca.setR1(c, ca.getPosition(c) - pa.getPosition(bId1));
+   ca.setR2(c, ca.getPosition(c) - pa.getPosition(bId2));
+
+
+   // construct vector perpendicular to the normal (cross product with cardinal basis vector where the 1 component is where the other vector has its component of smallest magnitude)
+   if (std::fabs(ca.getNormalRef(c)[0]) < std::fabs(ca.getNormalRef(c)[1])) {
+      if (std::fabs(ca.getNormalRef(c)[0]) < std::fabs(ca.getNormalRef(c)[2]))
+         ca.setT(c, Vec3(0, ca.getNormalRef(c)[2], -ca.getNormalRef(c)[1]));   // = n x (1, 0, 0)^T
+      else
+         ca.setT(c, Vec3(ca.getNormalRef(c)[1], -ca.getNormalRef(c)[0], 0));   // = n x (0, 0, 1)^T
+   } else {
+      if (std::fabs(ca.getNormalRef(c)[1]) < std::fabs(ca.getNormalRef(c)[2]))
+         ca.setT(c, Vec3(-ca.getNormalRef(c)[2], 0, ca.getNormalRef(c)[0]));   // = n x (0, 1, 0)^T
+      else
+         ca.setT(c, Vec3(ca.getNormalRef(c)[1], -ca.getNormalRef(c)[0], 0));   // = n x (0, 0, 1)^T
+   }
+   normalize(ca.getTRef(c));
+   ca.setO(c, ca.getNormal(c) % ca.getT(c));
+
+   Mat3 contactframe(ca.getNormal(c), ca.getT(c), ca.getO(c));
+
+   // If the distance is negative then penetration is present. This is an error and should be corrected.
+   // Correcting the whole error is not recommended since due to the linearization the errors cannot
+   // completely fixed anyway and the error reduction will introduce artificial restitution.
+   // However, if the distance is positive then it is not about error correction but the distance that
+   // can still be overcome without penetration and thus no error correction parameter should be applied.
+   if (ca.getDistance(c) < real_t(0.0)) {
+      setMaximumPenetration(std::max(getMaximumPenetration(), -ca.getDistance(c)));
+      ca.getDistanceRef(c) *= erp_;
+   }
+
+   ca.getMuRef(c) = getFriction(pa.getType(bId1), pa.getType(bId2));
+
+   Mat3 diag = -(
+           math::skewSymCrossProduct(ca.getR1(c),
+                                     math::skewSymCrossProduct(pa.getInvInertia(bId1), ca.getR1(c)))
+           + math::skewSymCrossProduct(ca.getR2(c),
+                                       math::skewSymCrossProduct(pa.getInvInertia(bId2), ca.getR2(c))));
+   diag[0] += pa.getInvMass(bId1) + pa.getInvMass(bId2);
+   diag[4] += pa.getInvMass(bId1) + pa.getInvMass(bId2);
+   diag[8] += pa.getInvMass(bId1) + pa.getInvMass(bId2);
+
+   diag = contactframe.getTranspose() * diag * contactframe;
+
+   // Diagonal block is known to be positive-definite and thus an inverse always exists.
+   ca.getDiag_ntoRef(c) = diag;
+   ca.getDiag_nto_invRef(c) = diag.getInverse();
+   ca.getDiag_n_invRef(c) = math::inv(diag[0]);
+   ca.getDiag_to_invRef(c) = Mat2(diag[4], diag[5], diag[7], diag[8]).getInverse();
+}
+
+}
+}
+}
\ No newline at end of file
diff --git a/src/mesa_pd/kernel/InitParticlesForHCSITS.h b/src/mesa_pd/kernel/InitParticlesForHCSITS.h
new file mode 100644
index 0000000000000000000000000000000000000000..5a72e4fadaee5c914b88c654ae40cf41a6955d0d
--- /dev/null
+++ b/src/mesa_pd/kernel/InitParticlesForHCSITS.h
@@ -0,0 +1,119 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file InitParticlesForHCSITS.h
+//! \author Tobias Leemann <tobias.leemann@fau.de>
+//
+//======================================================================================================================
+
+//======================================================================================================================
+//
+//  THIS FILE IS GENERATED - PLEASE CHANGE THE TEMPLATE !!!
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <mesa_pd/common/ParticleFunctions.h>
+#include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/data/IAccessor.h>
+#include <mesa_pd/data/Flags.h>
+#include <core/logging/Logging.h>
+
+#include <vector>
+
+namespace walberla {
+namespace mesa_pd {
+namespace kernel {
+/**
+ * Init the datastructures for the particles for later use of the HCSITS-Solver.
+ * Call this kernel on all particles that will be treated with HCSITS before performing any relaxation timesteps.
+ * Use setGlobalAcceleration() to set an accelleration action uniformly across all particles (e.g. gravity)
+ * \ingroup mesa_pd_kernel
+ */
+class InitParticlesForHCSITS
+{
+public:
+   // Default constructor sets the default values
+   InitParticlesForHCSITS() :
+   globalAcceleration_( 0 )
+   {}
+
+   // Getter and Setter Functions for properties
+   inline const walberla::mesa_pd::Vec3& getGlobalAcceleration() const {return globalAcceleration_;}
+   inline void setGlobalAcceleration(walberla::mesa_pd::Vec3 v) { globalAcceleration_ = v;}
+   
+
+   template <typename Accessor>
+   void operator()(size_t j, Accessor& ac, real_t dt);
+
+   template <typename Accessor>
+   void initializeVelocityCorrections(Accessor& ac, size_t body, Vec3& dv, Vec3& dw, real_t dt ) const;
+
+private:
+   // List properties
+   
+   walberla::mesa_pd::Vec3 globalAcceleration_;
+
+};
+
+
+template <typename Accessor>
+inline void InitParticlesForHCSITS::operator()(size_t j, Accessor& ac, real_t dt)
+{
+   using namespace data::particle_flags;
+   static_assert(std::is_base_of<data::IAccessor, Accessor>::value, "please provide a valid accessor");
+   auto particle_flags = ac.getFlagsRef(j);
+   if (isSet(particle_flags, GLOBAL)){
+      WALBERLA_CHECK( isSet(particle_flags, FIXED), "Global bodies need to have infinite mass as they are not communicated!" );
+      initializeVelocityCorrections( ac, j, ac.getDvRef(j), ac.getDwRef(j), dt ); // use applied external forces to calculate starting velocity
+   }else{
+      initializeVelocityCorrections( ac, j, ac.getDvRef(j), ac.getDwRef(j), dt ); // use applied external forces to calculate starting velocity
+      if(!isSet(particle_flags, FIXED)){ // Update velocities with global acceleration and angular velocity with euler eqn
+         ac.getLinearVelocityRef(j) = ac.getLinearVelocity(j) + getGlobalAcceleration() * dt;
+         ac.getAngularVelocityRef(j) = ac.getAngularVelocity(j) + dt * ( ac.getInvInertia(j) * ( ( ac.getInertia(j) * ac.getAngularVelocity(j) ) % ac.getAngularVelocity(j) ) );
+      }
+   }
+}
+
+
+//*************************************************************************************************
+/*!\brief Calculates the initial velocity corrections of a given body.
+ * \param ac The particle accessor
+ * \param body The body whose velocities to time integrate
+ * \param dv On return the initial linear velocity correction.
+ * \param w On return the initial angular velocity correction.
+ * \param dt The time step size.
+ * \return void
+ *
+ * Calculates the velocity corrections effected by external forces and torques in an explicit Euler
+ * time integration of the velocities of the given body. For fixed objects the velocity corrections
+ * are set to zero. External forces and torques are reset if indicated by the settings.
+ */
+template <typename Accessor>
+inline void InitParticlesForHCSITS::initializeVelocityCorrections(Accessor& ac, size_t body, Vec3& dv, Vec3& dw, real_t dt ) const
+{
+   dv = ( ac.getInvMass(body) * dt ) * ac.getForce(body);
+   dw = dt * ( ac.getInvInertia(body) * ac.getTorque(body) );
+
+   ac.getForceRef(body) = Vec3();
+   ac.getTorqueRef(body) = Vec3();
+}
+//*************************************************************************************************
+
+
+} //namespace kernel
+} //namespace mesa_pd
+} //namespace walberla
\ No newline at end of file
diff --git a/src/mesa_pd/kernel/IntegrateParticlesHCSITS.h b/src/mesa_pd/kernel/IntegrateParticlesHCSITS.h
new file mode 100644
index 0000000000000000000000000000000000000000..7cc75be73f78cba51ba5d78b29e037624710ae42
--- /dev/null
+++ b/src/mesa_pd/kernel/IntegrateParticlesHCSITS.h
@@ -0,0 +1,158 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file IntegrateParticlesHCSITS.h
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//! \author Tobias Leemann <tobias.leemann@fau.de>
+//
+//======================================================================================================================
+
+//======================================================================================================================
+//
+//  THIS FILE IS GENERATED - PLEASE CHANGE THE TEMPLATE !!!
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <mesa_pd/common/ParticleFunctions.h>
+#include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/data/IAccessor.h>
+#include <mesa_pd/data/ContactStorage.h>
+#include <mesa_pd/data/Flags.h>
+#include <core/logging/Logging.h>
+#include <core/math/Constants.h>
+
+#include <vector>
+
+namespace walberla {
+namespace mesa_pd {
+namespace kernel {
+
+/**
+ * Kernel performing integration of the particles after the HCSITS iteration is done.
+ * Call this kernel on all particles j to integrate them by a timestep of size dt.
+ * The Speed Limiter limits the number of body radii, a particle can travel in one timestep.
+ * The speed limit factor defines a number of radii that are allowed in a timestep, e.g. a factor of 1 means, that
+ * a particle can only travel one time its radius in each timestep.
+ *
+ * \ingroup mesa_pd_kernel
+ */
+class IntegrateParticlesHCSITS
+{
+   public:
+   // Default constructor sets the default values
+   IntegrateParticlesHCSITS() :
+   speedLimiterActive_( false ),
+   speedLimitFactor_( real_t(1.0) )
+   {}
+
+   // Getter and Setter Functions
+   inline const bool& getSpeedLimiterActive() const {return speedLimiterActive_;}
+   inline void setSpeedLimiterActive(bool v) { speedLimiterActive_ = v;}
+   
+   inline const real_t& getSpeedLimitFactor() const {return speedLimitFactor_;}
+   inline void setSpeedLimitFactor(real_t v) { speedLimitFactor_ = v;}
+   
+
+   template <typename PAccessor>
+   void operator()(size_t j, PAccessor& ac, real_t dt);
+
+   template <typename PAccessor>
+   void integratePositions(PAccessor& ac, size_t body, Vec3 v, Vec3 w, real_t dt ) const;
+
+
+   private:
+   // List properties
+   
+   bool speedLimiterActive_;
+   real_t speedLimitFactor_;
+};
+
+
+template <typename PAccessor>
+inline void IntegrateParticlesHCSITS::operator()(size_t j, PAccessor& ac, real_t dt)
+{
+   using namespace data::particle_flags;
+   static_assert(std::is_base_of<data::IAccessor, PAccessor>::value, "please provide a valid accessor");
+
+   auto body_flags = ac.getFlagsRef(j);
+   if (isSet(body_flags, FIXED)){
+      integratePositions(ac, j, ac.getLinearVelocity(j), ac.getAngularVelocity(j), dt );
+   }else{
+      integratePositions(ac, j, ac.getLinearVelocity(j) + ac.getDv(j), ac.getAngularVelocity(j) + ac.getDw(j), dt );
+   }
+}
+
+
+
+
+//*************************************************************************************************
+/*!\brief Time integration of the position and orientation of a given body.
+ *
+ * \param body The body whose position and orientation to time integrate
+ * \param v The linear velocity to use for time integration of the position.
+ * \param w The angular velocity to use for time integration of the orientation.
+ * \param dt The time step size.
+ * \return void
+ *
+ * Performs an Euler time integration of the positions of the given body. Velocities are damped if
+ * indicated by the settings and stored back in the body properties. The bounding box is
+ * recalculated and it is redetermined whether the body is awake or not. Also the data
+ * structure tracking the contacts attached to the body are cleared and
+ */
+template <typename PAccessor>
+inline void IntegrateParticlesHCSITS::integratePositions(PAccessor& ac, size_t body, Vec3 v, Vec3 w, real_t dt ) const {
+   using namespace data::particle_flags;
+   auto body_flags = ac.getFlags(body);
+   if (isSet(body_flags, FIXED))
+      return;
+
+
+   if (getSpeedLimiterActive()) {
+      const auto speed = v.length();
+      const auto edge = ac.getInteractionRadius(body);
+      if (speed * dt > edge * getSpeedLimitFactor()) {
+         v = v * (edge * getSpeedLimitFactor() / dt / speed);
+      }
+
+      const real_t maxPhi = real_t(2) * math::pi * getSpeedLimitFactor();
+      const real_t phi = w.length() * dt;
+      if (phi > maxPhi) {
+         w = w / phi * maxPhi;
+      }
+   }
+
+   // Calculating the translational displacement
+   ac.getPositionRef(body) = (ac.getPosition(body) + v * dt);
+
+   // Calculating the rotation angle
+   const Vec3 phi(w * dt);
+
+   // Calculating the new orientation
+   WALBERLA_ASSERT(!math::isnan(phi), " phi: " << phi << " w: " << w << " dt: " << dt );
+   ac.getRotationRef(body).rotate(phi, phi.length());
+
+   // Storing the velocities back in the body properties
+   ac.getLinearVelocityRef(body) = v;
+   ac.getAngularVelocityRef(body) = w;
+}
+//*************************************************************************************************
+
+
+
+} //namespace kernel
+} //namespace mesa_pd
+} //namespace walberla
\ No newline at end of file
diff --git a/src/mesa_pd/mpi/notifications/VelocityCorrectionNotification.h b/src/mesa_pd/mpi/notifications/VelocityCorrectionNotification.h
new file mode 100644
index 0000000000000000000000000000000000000000..dd4ca2a89d89251f35cca0dfef8750e452cf1b34
--- /dev/null
+++ b/src/mesa_pd/mpi/notifications/VelocityCorrectionNotification.h
@@ -0,0 +1,120 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file VelocityCorrectionNotification.h
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+//======================================================================================================================
+//
+//  THIS FILE IS GENERATED - PLEASE CHANGE THE TEMPLATE !!!
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/data/ParticleStorage.h>
+#include <mesa_pd/mpi/notifications/NotificationType.h>
+#include <mesa_pd/mpi/notifications/reset.h>
+
+#include <core/mpi/Datatype.h>
+#include <core/mpi/RecvBuffer.h>
+#include <core/mpi/SendBuffer.h>
+
+namespace walberla {
+namespace mesa_pd {
+
+/**
+ * Transmits corrections of the linear and angular velocity (dv / dw) that were generated by the impulses acting on the
+ * ghost particles during application of the Hard-Contact-Solvers (HCSITS) to the main particle and adds them up.
+ * Use this Notification with ReduceProperty only.
+ */
+class VelocityCorrectionNotification
+{
+public:
+
+   struct Parameters
+   {
+      id_t uid_;
+      Vec3 dv_; /* Linear velocity correction */
+      Vec3 dw_; /* Angular velocity correction */
+   };
+
+   inline explicit VelocityCorrectionNotification( const data::Particle& p ) : p_(p)  {}
+
+   const data::Particle& p_;
+};
+
+
+// Reduce method for reduction (add up the velocity corrections)
+void reduce(data::Particle&& p, const VelocityCorrectionNotification::Parameters& objparam)
+{
+   p.getDvRef() += objparam.dv_;
+   p.getDwRef() += objparam.dw_;
+}
+
+template<>
+void reset<VelocityCorrectionNotification>(data::Particle& p )
+{
+   p.setDv( Vec3(real_t(0)) );
+   p.setDw( Vec3(real_t(0)) );
+}
+
+}  // namespace mesa_pd
+}  // namespace walberla
+
+//======================================================================================================================
+//
+//  Send/Recv Buffer Serialization Specialization
+//
+//======================================================================================================================
+
+namespace walberla {
+namespace mpi {
+
+template< typename T,    // Element type of SendBuffer
+          typename G>    // Growth policy of SendBuffer
+mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const mesa_pd::VelocityCorrectionNotification& obj )
+{
+   buf.addDebugMarker( "ft" );
+   buf << obj.p_.getUid();
+   buf << obj.p_.getDv();
+   buf << obj.p_.getDw();
+   return buf;
+}
+
+template< typename T>    // Element type  of RecvBuffer
+mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, mesa_pd::VelocityCorrectionNotification::Parameters& objparam )
+{
+   buf.readDebugMarker( "ft" );
+   buf >> objparam.uid_;
+   buf >> objparam.dv_;
+   buf >> objparam.dw_;
+   return buf;
+}
+
+template< >
+struct BufferSizeTrait< mesa_pd::VelocityCorrectionNotification > {
+   static const bool constantSize = true;
+   static const uint_t size = BufferSizeTrait<id_t>::size +
+                              BufferSizeTrait<mesa_pd::Vec3>::size +
+                              BufferSizeTrait<mesa_pd::Vec3>::size +
+                              mpi::BUFFER_DEBUG_OVERHEAD;
+};
+
+} // mpi
+} // walberla
diff --git a/src/mesa_pd/mpi/notifications/VelocityUpdateNotification.h b/src/mesa_pd/mpi/notifications/VelocityUpdateNotification.h
new file mode 100644
index 0000000000000000000000000000000000000000..7f8c1e7c733df07629bbbc4fe3540c19c7dc97d4
--- /dev/null
+++ b/src/mesa_pd/mpi/notifications/VelocityUpdateNotification.h
@@ -0,0 +1,135 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file VelocityCorrectionNotification.h
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+//======================================================================================================================
+//
+//  THIS FILE IS GENERATED - PLEASE CHANGE THE TEMPLATE !!!
+//
+//======================================================================================================================
+
+#pragma once
+
+#include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/data/ParticleStorage.h>
+#include <mesa_pd/mpi/notifications/NotificationType.h>
+#include <mesa_pd/mpi/notifications/reset.h>
+#include <core/mpi/Datatype.h>
+#include <core/mpi/RecvBuffer.h>
+#include <core/mpi/SendBuffer.h>
+
+namespace walberla {
+namespace mesa_pd {
+
+/**
+ * Adds up LinearVelocity + Parameters::relaxationParam * velocity_correction in a particle and transmits the result to
+ * all its ghost particles.
+ * After adding up and sending the result, the velocity corrections of the main particle are reset to 0.
+ * After receiving new values, the velocity_corrections of the ghost_particles are reset to 0.
+ * This notification is used during relaxation with the HCSITS solver.
+ * Notification for use with BroadcastProperty only.
+ *
+ */
+class VelocityUpdateNotification
+{
+public:
+
+   struct Parameters
+   {
+      static real_t relaxationParam;
+      id_t uid_;
+      Vec3 v_; /* Linear velocity */
+      Vec3 w_; /* Angular velocity */
+   };
+
+   inline explicit VelocityUpdateNotification( data::Particle& p ) : p_(p)  {}
+
+   data::Particle& p_;
+};
+
+real_t VelocityUpdateNotification::Parameters::relaxationParam = real_t(0.8);
+
+// Update method for broadcast
+void update(data::Particle&& p, const VelocityUpdateNotification::Parameters& objparam) {
+   // Reset the velocity corrections dv/dw of ghost particle
+   p.getDvRef() = Vec3();
+   p.getDwRef() = Vec3();
+   p.getLinearVelocityRef() = objparam.v_;
+   p.getAngularVelocityRef() = objparam.w_;
+}
+
+template<>
+void reset<VelocityUpdateNotification>(data::Particle& p )
+{
+   p.setDv( Vec3(real_t(0)) );
+   p.setDw( Vec3(real_t(0)) );
+   p.setLinearVelocity( Vec3(real_t(0)) );
+   p.setAngularVelocity( Vec3(real_t(0)) );
+}
+
+}  // namespace mesa_pd
+}  // namespace walberla
+
+//======================================================================================================================
+//
+//  Send/Recv Buffer Serialization Specialization
+//
+//======================================================================================================================
+
+namespace walberla {
+namespace mpi {
+
+template< typename T,    // Element type of SendBuffer
+          typename G>    // Growth policy of SendBuffer
+mpi::GenericSendBuffer<T,G>& operator<<( mpi::GenericSendBuffer<T,G> & buf, const mesa_pd::VelocityUpdateNotification& obj )
+{
+   // Perform the addition
+   obj.p_.getLinearVelocityRef() = obj.p_.getLinearVelocity() + mesa_pd::VelocityUpdateNotification::Parameters::relaxationParam * obj.p_.getDv();
+   obj.p_.getAngularVelocityRef() = obj.p_.getAngularVelocity() + mesa_pd::VelocityUpdateNotification::Parameters::relaxationParam * obj.p_.getDw();
+   // Reset the velocity corrections dv/dw of main particle
+   obj.p_.getDvRef() = mesa_pd::Vec3();
+   obj.p_.getDwRef() = mesa_pd::Vec3();
+   buf.addDebugMarker( "ft" );
+   buf << obj.p_.getUid();
+   buf << obj.p_.getLinearVelocity();
+   buf << obj.p_.getAngularVelocity();
+   return buf;
+}
+
+template< typename T>    // Element type  of RecvBuffer
+mpi::GenericRecvBuffer<T>& operator>>( mpi::GenericRecvBuffer<T> & buf, mesa_pd::VelocityUpdateNotification::Parameters& objparam )
+{
+   buf.readDebugMarker( "ft" );
+   buf >> objparam.uid_;
+   buf >> objparam.v_;
+   buf >> objparam.w_;
+   return buf;
+}
+
+template< >
+struct BufferSizeTrait< mesa_pd::VelocityUpdateNotification > {
+   static const bool constantSize = true;
+   static const uint_t size = BufferSizeTrait<id_t>::size +
+                              BufferSizeTrait<mesa_pd::Vec3>::size +
+                              BufferSizeTrait<mesa_pd::Vec3>::size +
+                              mpi::BUFFER_DEBUG_OVERHEAD;
+};
+
+} // mpi
+} // walberla
diff --git a/tests/mesa_pd/CMakeLists.txt b/tests/mesa_pd/CMakeLists.txt
index 69278f7430280644ef08cbf5226db64477972ead..6fbc2d0259ae524ff24fda49b1b00e2366ba3a18 100644
--- a/tests/mesa_pd/CMakeLists.txt
+++ b/tests/mesa_pd/CMakeLists.txt
@@ -105,6 +105,9 @@ waLBerla_execute_test( NAME   MESA_PD_Kernel_GenerateAnalyticContacts PROCESSES
 waLBerla_compile_test( NAME   MESA_PD_Kernel_GenerateLinkedCells FILES kernel/GenerateLinkedCells.cpp DEPENDS core )
 waLBerla_execute_test( NAME   MESA_PD_Kernel_GenerateLinkedCells )
 
+waLBerla_compile_test( NAME   MESA_PD_Kernel_HCSITSKernels FILES kernel/HCSITSKernels.cpp DEPENDS core )
+waLBerla_execute_test( NAME   MESA_PD_Kernel_HCSITSKernels )
+
 waLBerla_compile_test( NAME   MESA_PD_Kernel_HeatConduction FILES kernel/HeatConduction.cpp DEPENDS core )
 waLBerla_execute_test( NAME   MESA_PD_Kernel_HeatConduction )
 
@@ -158,6 +161,9 @@ waLBerla_execute_test( NAME   MESA_PD_MPI_ReduceContactHistory PROCESSES 8 )
 waLBerla_compile_test( NAME   MESA_PD_MPI_ReduceProperty FILES mpi/ReduceProperty.cpp DEPENDS core )
 waLBerla_execute_test( NAME   MESA_PD_MPI_ReduceProperty PROCESSES 8 )
 
+waLBerla_compile_test( NAME   MESA_PD_MPI_VelocityCorrectionNotification FILES mpi/VelocityCorrectionNotification.cpp DEPENDS core )
+waLBerla_execute_test( NAME   MESA_PD_MPI_VelocityCorrectionNotification PROCESSES 8)
+
 waLBerla_compile_test( NAME   MESA_PD_Sorting FILES Sorting.cpp DEPENDS core )
 waLBerla_execute_test( NAME   MESA_PD_Sorting )
 
diff --git a/tests/mesa_pd/kernel/HCSITSKernels.cpp b/tests/mesa_pd/kernel/HCSITSKernels.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2977c398d114905ad0623fd4057b6ba069179837
--- /dev/null
+++ b/tests/mesa_pd/kernel/HCSITSKernels.cpp
@@ -0,0 +1,485 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file   HCSITSKernels.cpp
+//! \author Tobias Leemann <tobias.leemann@fau.de>
+//
+//======================================================================================================================
+
+
+/** Test Collision Detection and Insertion of contacts into the contact storage */
+
+#include <mesa_pd/data/DataTypes.h>
+#include <mesa_pd/data/ContactStorage.h>
+#include <mesa_pd/data/ParticleStorage.h>
+#include <mesa_pd/data/ShapeStorage.h>
+#include <mesa_pd/kernel/DetectAndStoreContacts.h>
+#include <mesa_pd/domain/InfiniteDomain.h>
+#include <mesa_pd/data/ContactAccessor.h>
+#include <mesa_pd/kernel/ParticleSelector.h>
+//HCSITS Kernels
+
+#include "mesa_pd/kernel/InitContactsForHCSITS.h"
+#include "mesa_pd/kernel/InitParticlesForHCSITS.h"
+#include "mesa_pd/kernel/HCSITSRelaxationStep.h"
+#include "mesa_pd/kernel/IntegrateParticlesHCSITS.h"
+
+// Communication kernels
+#include "mesa_pd/mpi/ReduceProperty.h"
+#include "mesa_pd/mpi/BroadcastProperty.h"
+
+#include "mesa_pd/mpi/notifications/VelocityUpdateNotification.h"
+#include "mesa_pd/mpi/notifications/VelocityCorrectionNotification.h"
+
+#include <mesa_pd/data/ParticleAccessor.h>
+#include <core/Environment.h>
+#include <core/logging/Logging.h>
+
+#include <iostream>
+
+namespace walberla {
+
+using namespace walberla::mesa_pd;
+
+   class ParticleAccessorWithShape : public data::ParticleAccessor
+   {
+      public:
+      ParticleAccessorWithShape(std::shared_ptr<data::ParticleStorage>& ps, std::shared_ptr<data::ShapeStorage>& ss)
+              : ParticleAccessor(ps)
+              , ss_(ss)
+      {}
+
+      const real_t& getMass(const size_t p_idx) const {return ss_->shapes[ps_->getShapeIDRef(p_idx)]->getMass();}
+      const real_t& getInvMass(const size_t p_idx) const {return ss_->shapes[ps_->getShapeIDRef(p_idx)]->getInvMass();}
+
+      const Mat3& getInertia(const size_t p_idx) const {return ss_->shapes[ps_->getShapeIDRef(p_idx)]->getInertiaBF();}
+      const Mat3& getInvInertia(const size_t p_idx) const {return ss_->shapes[ps_->getShapeIDRef(p_idx)]->getInvInertiaBF();}
+
+      data::BaseShape* getShape(const size_t p_idx) const {return ss_->shapes[ps_->getShapeIDRef(p_idx)].get();}
+      private:
+      std::shared_ptr<data::ShapeStorage> ss_;
+   };
+
+   template<typename PStorage, typename CStorage, typename PAccessor, typename CAccessor>
+   class TestHCSITSKernel {
+      public:
+      TestHCSITSKernel(PStorage &ps_, CStorage &cs_, PAccessor &pa_, CAccessor &ca_) : ps(ps_), cs(cs_), pa(pa_), ca(ca_),
+            erp(real_t(1.0)), model(kernel::HCSITSRelaxationStep::RelaxationModel::InelasticFrictionlessContact), contactThreshold(0), globalAcc(0) {}
+
+      void operator()(real_t dt){
+         // Perform Collision detection (call kernel, that stores contacts into cs)
+         kernel::DetectAndStoreContacts detectAndStore(cs);
+         cs.clear();
+
+         domain::InfiniteDomain domain;
+         collision_detection::AnalyticContactDetection acd;
+         acd.getContactThreshold() = contactThreshold;
+         ps.forEachParticlePairHalf(false, kernel::ExcludeInfiniteInfinite(), pa, detectAndStore, pa, domain, acd);
+
+         // Create Kernels
+         kernel::InitContactsForHCSITS initContacts(1);
+         initContacts.setFriction(0,0,real_t(0.2));
+         initContacts.setErp(real_t(erp));
+
+         kernel::InitParticlesForHCSITS initParticles;
+         initParticles.setGlobalAcceleration(globalAcc);
+
+         kernel::HCSITSRelaxationStep relaxationStep;
+         relaxationStep.setRelaxationModel(model);
+         relaxationStep.setCor(real_t(0.6)); // Only effective for PGSM
+
+         kernel::IntegrateParticlesHCSITS integration;
+
+         mesa_pd::mpi::ReduceProperty reductionKernel;
+         mesa_pd::mpi::BroadcastProperty broadcastKernel;
+
+         // Run the HCSITS loop
+         cs.forEachContact(false, kernel::SelectAll(), ca, initContacts, ca, pa);
+         ps.forEachParticle(false, kernel::SelectAll(), pa, initParticles, pa, dt);
+
+         VelocityUpdateNotification::Parameters::relaxationParam = real_t(1.0);
+         reductionKernel.operator()<VelocityCorrectionNotification>(ps);
+         broadcastKernel.operator()<VelocityUpdateNotification>(ps);
+
+         VelocityUpdateNotification::Parameters::relaxationParam = real_t(0.8);
+         for(int i = 0; i < 10; i++){
+            cs.forEachContact(false, kernel::SelectAll(), ca, relaxationStep, ca, pa, dt);
+            reductionKernel.operator()<VelocityCorrectionNotification>(ps);
+            broadcastKernel.operator()<VelocityUpdateNotification>(ps);
+         }
+         ps.forEachParticle(false, kernel::SelectAll(), pa, integration, pa, dt);
+      }
+
+      private:
+         PStorage  &ps;
+         CStorage  &cs;
+         PAccessor &pa;
+         CAccessor &ca;
+
+      public:
+         real_t erp;
+         kernel::HCSITSRelaxationStep::RelaxationModel model;
+         real_t contactThreshold;
+         Vec3 globalAcc;
+   };
+
+
+void normalReactionTest(kernel::HCSITSRelaxationStep::RelaxationModel model)
+{
+   //init data structures
+   auto ps = std::make_shared<data::ParticleStorage>(100);
+   auto cs = std::make_shared<data::ContactStorage>(100);
+   auto ss = std::make_shared<data::ShapeStorage>();
+   ParticleAccessorWithShape paccessor(ps, ss);
+   data::ContactAccessor caccessor(cs);
+   auto density = real_t(7.874);
+   auto radius = real_t(1.1);
+
+   //Geometries for sphere and half space.
+   auto smallSphere = ss->create<data::Sphere>( radius );
+   auto halfSpace = ss->create<data::HalfSpace>(Vec3(0,0,1));
+
+   ss->shapes[halfSpace]->updateMassAndInertia( density );
+   ss->shapes[smallSphere]->updateMassAndInertia( density );
+
+   // Create four slightly overlapping spheres in a row (located at x=0,2)
+   auto p = ps->create();
+   p->getPositionRef()        = Vec3(real_t(0), real_t(0), real_t(0));
+   p->getShapeIDRef()         = smallSphere;
+   p->getOwnerRef()           = walberla::mpi::MPIManager::instance()->rank();
+   p->getLinearVelocityRef()  = Vec3(real_t(5), real_t(5), real_t(6));
+   p->getTypeRef()            = 0;
+   //WALBERLA_LOG_INFO(paccessor.ParticleAccessorWithShape::getInvMass(0));
+
+
+   auto p2 = ps->create();
+   p2->getPositionRef()          = Vec3(real_t(5), real_t(5), real_t(5));
+   p2->getShapeIDRef()           = halfSpace;
+   p2->getOwnerRef()             = walberla::mpi::MPIManager::instance()->rank();
+   p2->getTypeRef()               = 0;
+   data::particle_flags::set(p2->getFlagsRef(), data::particle_flags::FIXED);
+   data::particle_flags::set(p2->getFlagsRef(), data::particle_flags::GLOBAL);
+
+   TestHCSITSKernel<data::ParticleStorage, data::ContactStorage, ParticleAccessorWithShape, data::ContactAccessor> testHCSITS(*ps, *cs, paccessor, caccessor);
+   testHCSITS.model = model;
+
+   WALBERLA_LOG_INFO(paccessor.getInvMass(0))
+   WALBERLA_LOG_INFO(paccessor.getInvMass(1))
+
+   // plane at 5,5,5
+   // radius 1.1
+   p->setPosition(  Vec3(5,5,6) );
+   p->setLinearVelocity( Vec3(0,0,0) );
+   testHCSITS( real_c( real_t(1.0) ) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getPosition() , Vec3(5,5,real_t(6.1)) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getLinearVelocity(), Vec3(0,0,real_t(0.1)) );
+   WALBERLA_LOG_INFO(p->getPosition());
+   WALBERLA_LOG_INFO(p->getLinearVelocity());
+
+   p->setPosition(  Vec3(5,5,6) );
+   p->setLinearVelocity( Vec3(0,0,0) );
+   testHCSITS.erp = real_t(0.5) ;
+   testHCSITS( real_t(1.0) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getPosition() , Vec3(5,5,real_t(6.05)) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getLinearVelocity(), Vec3(0,0,real_t(0.05)) );
+   WALBERLA_LOG_INFO(p->getPosition());
+   WALBERLA_LOG_INFO(p->getLinearVelocity());
+
+   p->setPosition(  Vec3(5,5,6) );
+   p->setLinearVelocity( Vec3(0,0,0) );
+   testHCSITS.erp = real_t(0.0) ;
+   testHCSITS( real_t(1.0) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getPosition() , Vec3(5,5,6) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getLinearVelocity(), Vec3(0,0,0) );
+   WALBERLA_LOG_INFO(p->getPosition());
+   WALBERLA_LOG_INFO(p->getLinearVelocity());
+
+   p->setPosition(  Vec3(5,5,6) );
+   p->setLinearVelocity( Vec3(0,0,-1) );
+   testHCSITS.erp = real_t(1.0) ;
+   testHCSITS( real_t(1.0) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getPosition() , Vec3(5,5,real_t(6.1)) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getLinearVelocity(), Vec3(0,0,real_t(0.1)) );
+   WALBERLA_LOG_INFO(p->getPosition());
+   WALBERLA_LOG_INFO(p->getLinearVelocity());
+
+   p->setPosition(  Vec3(5,5,6) );
+   p->setLinearVelocity( Vec3(0,0,-1) );
+   testHCSITS.erp = real_t(0.5) ;
+   testHCSITS( real_t(1.0) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getPosition() , Vec3(5,5,real_t(6.05)) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getLinearVelocity(), Vec3(0,0,real_t(0.05)) );
+   WALBERLA_LOG_INFO(p->getPosition());
+   WALBERLA_LOG_INFO(p->getLinearVelocity());
+
+   p->setPosition(  Vec3(5,5,6) );
+   p->setLinearVelocity( Vec3(0,0,-1) );
+   testHCSITS.erp = real_t(0.0) ;
+   testHCSITS( real_t(1.0) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getPosition() , Vec3(5,5,6) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getLinearVelocity(), Vec3(0,0,0) );
+   WALBERLA_LOG_INFO(p->getPosition());
+   WALBERLA_LOG_INFO(p->getLinearVelocity());
+
+   // No collision
+   p->setPosition(  Vec3(5,5,real_t(6.2)) );
+   p->setLinearVelocity( Vec3(0,0,real_t(-0.2)) );
+   testHCSITS.erp = real_t(1.0) ;
+   testHCSITS( real_t(1.0) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getPosition() , Vec3(5,5,real_t(6.0)) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getLinearVelocity(), Vec3(0,0,real_t(-0.2)) );
+
+   testHCSITS.contactThreshold = real_t(1.0);
+   p->setPosition(  Vec3(5,5,real_t(6.2)) );
+   p->setLinearVelocity( Vec3(0,0,real_t(-0.2)) );
+   testHCSITS.erp = real_t(1.0) ;
+   testHCSITS( real_t(1.0) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getPosition() , Vec3(5,5,real_t(6.1)) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getLinearVelocity(), Vec3(0,0,real_t(-0.1)) );
+
+   p->setPosition(  Vec3(5,5,real_t(6.1)) );
+   p->setLinearVelocity( Vec3(0,0,real_t(-0.1)) );
+   testHCSITS.erp = real_t(1.0) ;
+   testHCSITS( real_t(1.0) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getPosition() , Vec3(5,5,real_t(6.1)) );
+   WALBERLA_CHECK_FLOAT_EQUAL( p->getLinearVelocity(), Vec3(0,0,real_t(0)) );
+   WALBERLA_LOG_INFO(p->getPosition());
+   WALBERLA_LOG_INFO(p->getLinearVelocity());
+}
+
+
+/**Check hard contact constraints on two overlapping, colliding spheres
+ * Works only for the solvers that really archieve seperation after a single
+ * timestep. Use SphereSeperationTest to check for seperation after multiple
+ * timesteps.
+ * @param model The collision model to use.
+ * */
+void SphereSphereTest(kernel::HCSITSRelaxationStep::RelaxationModel model){
+
+      //init data structures
+      auto ps = std::make_shared<data::ParticleStorage>(100);
+      auto cs = std::make_shared<data::ContactStorage>(100);
+      auto ss = std::make_shared<data::ShapeStorage>();
+      ParticleAccessorWithShape paccessor(ps, ss);
+      data::ContactAccessor caccessor(cs);
+      auto density = real_t(7.874);
+      auto radius = real_t(1.1);
+
+      auto smallSphere = ss->create<data::Sphere>( radius );
+      ss->shapes[smallSphere]->updateMassAndInertia( density );
+
+      auto dt = real_t(1);
+
+      // Create two slightly overlapping spheres in a row (located at x=0,2)
+      auto p = ps->create();
+      p->getPositionRef()          = Vec3(real_t(0), real_t(0), real_t(0));
+      p->getShapeIDRef()           = smallSphere;
+      p->getOwnerRef()             = walberla::mpi::MPIManager::instance()->rank();
+      p->getLinearVelocityRef()    = Vec3(real_t(1), real_t(0), real_t(0));
+      p->getTypeRef()              = 0;
+      auto p2 = ps->create();
+      p2->getPositionRef()          = Vec3(real_t(2), real_t(0), real_t(0));
+      p2->getShapeIDRef()           = smallSphere;
+      p2->getOwnerRef()             = walberla::mpi::MPIManager::instance()->rank();
+      p2->getLinearVelocityRef() = Vec3(real_t(-1), real_t(0), real_t(0));
+      p2->getTypeRef()              = 0;
+      TestHCSITSKernel<data::ParticleStorage, data::ContactStorage, ParticleAccessorWithShape, data::ContactAccessor> testHCSITS(*ps, *cs, paccessor, caccessor);
+      testHCSITS.model = model;
+      testHCSITS(dt);
+
+      WALBERLA_CHECK_FLOAT_EQUAL(p->getPosition(), Vec3(real_t(-0.1),0,0));
+      WALBERLA_CHECK_FLOAT_EQUAL(p->getLinearVelocity(), Vec3(real_t(-0.1),0,0));
+      WALBERLA_CHECK_FLOAT_EQUAL(p->getAngularVelocity(), Vec3(0,0,0));
+      WALBERLA_CHECK_FLOAT_EQUAL(p2->getPosition(), Vec3(real_t(2.1),0,0));
+      WALBERLA_CHECK_FLOAT_EQUAL(p2->getLinearVelocity(), Vec3(real_t(0.1),0,0))
+      WALBERLA_CHECK_FLOAT_EQUAL(p2->getAngularVelocity(), Vec3(0,0,0));
+
+      WALBERLA_LOG_INFO(p->getPosition());
+      WALBERLA_LOG_INFO(p->getLinearVelocity());
+      WALBERLA_LOG_INFO(p2->getPosition());
+      WALBERLA_LOG_INFO(p2->getLinearVelocity());
+}
+
+/**
+ * Create two overlapping spheres with opposing velocities, that are in contact.
+ * Assert that the collision is resolved after a certain (10) number of timesteps.
+ * @param model The collision model to use.
+ */
+void SphereSeperationTest(kernel::HCSITSRelaxationStep::RelaxationModel model){
+
+   //init data structures
+   auto ps = std::make_shared<data::ParticleStorage>(100);
+   auto cs = std::make_shared<data::ContactStorage>(100);
+   auto ss = std::make_shared<data::ShapeStorage>();
+   ParticleAccessorWithShape paccessor(ps, ss);
+   data::ContactAccessor caccessor(cs);
+   auto density = real_t(7.874);
+   auto radius = real_t(1.1);
+
+   auto smallSphere = ss->create<data::Sphere>( radius );
+
+   ss->shapes[smallSphere]->updateMassAndInertia( density );
+   auto dt = real_t(0.2);
+
+   // Create two slightly overlapping spheres in a row (located at x=0,2)
+   auto p = ps->create();
+   p->getPositionRef()          = Vec3(real_t(0), real_t(0), real_t(0));
+   p->getShapeIDRef()           = smallSphere;
+   p->getOwnerRef()             = walberla::mpi::MPIManager::instance()->rank();
+   p->getLinearVelocityRef()    = Vec3(real_t(1), real_t(0), real_t(0));
+   p->getTypeRef()              = 0;
+   auto p2 = ps->create();
+   p2->getPositionRef()          = Vec3(real_t(2.0), real_t(0), real_t(0));
+   p2->getShapeIDRef()           = smallSphere;
+   p2->getOwnerRef()             = walberla::mpi::MPIManager::instance()->rank();
+   p2->getLinearVelocityRef()    = Vec3(real_t(-1), real_t(0), real_t(0));
+   p2->getTypeRef()              = 0;
+   TestHCSITSKernel<data::ParticleStorage, data::ContactStorage, ParticleAccessorWithShape, data::ContactAccessor> testHCSITS(*ps, *cs, paccessor, caccessor);
+
+   int solveCount = 0;
+   testHCSITS.model = model;
+
+   // Number of allowed iterations
+   int maxIter = 10;
+   while(p2->getPosition()[0]-p->getPosition()[0] < 2.2){
+      testHCSITS(dt);
+      WALBERLA_LOG_INFO(p->getPosition());
+      WALBERLA_LOG_INFO(p->getLinearVelocity());
+      WALBERLA_LOG_INFO(p2->getPosition());
+      WALBERLA_LOG_INFO(p2->getLinearVelocity());
+      solveCount ++;
+      if(solveCount==maxIter){
+         WALBERLA_CHECK(false, "Seperation did not occur after " << maxIter << " Iterations performed.");
+      }
+   }
+   WALBERLA_LOG_INFO("Seperation achieved after " << solveCount << " iterations.");
+}
+
+/**
+ * Create a sphere that slides on a plane with only linear velocity. It is
+ * put into a spin by the frictional reactions, until it rolls with no slip.
+ * Assert that the sphere rolls with all slip resolved and at the physically correct
+ * speed after a certain number of timesteps. Use only with solvers that obey the coulomb friction law.
+ * @param model The collision model to use.
+ */
+void SlidingSphereFrictionalReactionTest(kernel::HCSITSRelaxationStep::RelaxationModel model){
+
+   //init data structures
+   auto ps = std::make_shared<data::ParticleStorage>(100);
+   auto cs = std::make_shared<data::ContactStorage>(100);
+   auto ss = std::make_shared<data::ShapeStorage>();
+   ParticleAccessorWithShape paccessor(ps, ss);
+   data::ContactAccessor caccessor(cs);
+   auto density = real_t(1);
+   auto radius = real_t(1);
+
+   auto smallSphere = ss->create<data::Sphere>( radius );
+   auto halfSpace = ss->create<data::HalfSpace>(Vec3(0,0,1));
+   ss->shapes[smallSphere]->updateMassAndInertia( density );
+   ss->shapes[halfSpace]->updateMassAndInertia( density );
+   auto dt = real_t(0.002);
+
+   // Create a spheres (located at x=0, height = 1)
+   auto p = ps->create();
+   p->getPositionRef()          = Vec3(real_t(0), real_t(0), real_t(1));
+   p->getShapeIDRef()           = smallSphere;
+   p->getOwnerRef()             = walberla::mpi::MPIManager::instance()->rank();
+   p->getLinearVelocityRef()    = Vec3(real_t(5), real_t(0), real_t(0));
+   p->getTypeRef()              = 0;
+
+   auto p2 = ps->create();
+   p2->getPositionRef()          = Vec3(real_t(0), real_t(0), real_t(0));
+   p2->getShapeIDRef()           = halfSpace;
+   p2->getOwnerRef()             = walberla::mpi::MPIManager::instance()->rank();
+   p2->getTypeRef()               = 0;
+   data::particle_flags::set(p2->getFlagsRef(), data::particle_flags::FIXED);
+   data::particle_flags::set(p2->getFlagsRef(), data::particle_flags::GLOBAL);
+
+   TestHCSITSKernel<data::ParticleStorage, data::ContactStorage, ParticleAccessorWithShape, data::ContactAccessor> testHCSITS(*ps, *cs, paccessor, caccessor);
+   testHCSITS.model = model;
+   testHCSITS.globalAcc = Vec3(0,0,-10);
+   int solveCount = 0;
+
+
+   // Number of allowed iterations
+   int maxIter = 500;
+   while(!walberla::floatIsEqual(p->getAngularVelocity()[1],p->getLinearVelocity()[0], real_t(0.002))){
+      testHCSITS(dt);
+      if(solveCount % 50 == 0){
+         WALBERLA_LOG_INFO(p->getAngularVelocity());
+         WALBERLA_LOG_INFO(p->getLinearVelocity());
+      }
+      solveCount ++;
+      if(solveCount==maxIter){
+         WALBERLA_CHECK(false, "End of slip did not occur after " << maxIter << " Iterations performed.");
+      }
+   }
+   WALBERLA_LOG_INFO("Rolling with no slip achieved after " << solveCount << " iterations.");
+
+   // Check if the value obtained values equal the physically correct values
+   // (which can be determined by newtons equation to be 25/7).
+   WALBERLA_CHECK_FLOAT_EQUAL_EPSILON(real_t(25.0/7.0), p->getAngularVelocity()[1], real_t(0.01), "Angular velocity is not physically correct");
+   WALBERLA_CHECK_FLOAT_EQUAL_EPSILON(real_t(25.0/7.0), p->getLinearVelocity()[0], real_t(0.01), "Linear velocity is not physically correct");
+}
+
+int main( int argc, char ** argv )
+{
+   Environment env(argc, argv);
+   WALBERLA_UNUSED(env);
+
+   walberla::mpi::MPIManager::instance()->useWorldComm();
+
+   WALBERLA_LOG_INFO_ON_ROOT("*** SETUP - START ***");
+
+
+   WALBERLA_LOG_INFO_ON_ROOT("InelasticFrictionlessContact");
+   normalReactionTest(kernel::HCSITSRelaxationStep::RelaxationModel::InelasticFrictionlessContact);
+   SphereSphereTest(kernel::HCSITSRelaxationStep::RelaxationModel::InelasticFrictionlessContact);
+
+   WALBERLA_LOG_INFO_ON_ROOT("ApproximateInelasticCoulombContactByDecoupling");
+   normalReactionTest(kernel::HCSITSRelaxationStep::RelaxationModel::ApproximateInelasticCoulombContactByDecoupling);
+   SphereSphereTest(kernel::HCSITSRelaxationStep::RelaxationModel::ApproximateInelasticCoulombContactByDecoupling);
+   SlidingSphereFrictionalReactionTest(kernel::HCSITSRelaxationStep::RelaxationModel::ApproximateInelasticCoulombContactByDecoupling);
+
+   WALBERLA_LOG_INFO_ON_ROOT("InelasticCoulombContactByDecoupling");
+   normalReactionTest(kernel::HCSITSRelaxationStep::RelaxationModel::InelasticCoulombContactByDecoupling);
+   SphereSphereTest(kernel::HCSITSRelaxationStep::RelaxationModel::InelasticCoulombContactByDecoupling);
+   SlidingSphereFrictionalReactionTest(kernel::HCSITSRelaxationStep::RelaxationModel::InelasticCoulombContactByDecoupling);
+
+   WALBERLA_LOG_INFO_ON_ROOT("InelasticGeneralizedMaximumDissipationContact");
+   normalReactionTest(kernel::HCSITSRelaxationStep::RelaxationModel::InelasticGeneralizedMaximumDissipationContact);
+   SphereSphereTest(kernel::HCSITSRelaxationStep::RelaxationModel::InelasticGeneralizedMaximumDissipationContact);
+   SlidingSphereFrictionalReactionTest(kernel::HCSITSRelaxationStep::RelaxationModel::InelasticGeneralizedMaximumDissipationContact);
+
+   WALBERLA_LOG_INFO_ON_ROOT("InelasticCoulombContactByOrthogonalProjections");
+   SphereSeperationTest(kernel::HCSITSRelaxationStep::RelaxationModel::InelasticCoulombContactByOrthogonalProjections);
+   SlidingSphereFrictionalReactionTest(kernel::HCSITSRelaxationStep::RelaxationModel::InelasticCoulombContactByOrthogonalProjections);
+
+   WALBERLA_LOG_INFO_ON_ROOT("ApproximateInelasticCoulombContactByOrthogonalProjections");
+   SphereSeperationTest(kernel::HCSITSRelaxationStep::RelaxationModel::ApproximateInelasticCoulombContactByOrthogonalProjections);
+   SlidingSphereFrictionalReactionTest(kernel::HCSITSRelaxationStep::RelaxationModel::ApproximateInelasticCoulombContactByOrthogonalProjections);
+
+   WALBERLA_LOG_INFO_ON_ROOT("InelasticProjectedGaussSeidel");
+   SphereSeperationTest(kernel::HCSITSRelaxationStep::RelaxationModel::InelasticProjectedGaussSeidel);
+   SlidingSphereFrictionalReactionTest(kernel::HCSITSRelaxationStep::RelaxationModel::InelasticProjectedGaussSeidel);
+   return EXIT_SUCCESS;
+}
+
+} //namespace walberla
+
+int main( int argc, char ** argv )
+{
+   return walberla::main(argc, argv);
+}
diff --git a/tests/mesa_pd/mpi/VelocityCorrectionNotification.cpp b/tests/mesa_pd/mpi/VelocityCorrectionNotification.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..79cd6351519295c35e4ab62dd7106537fdc3752e
--- /dev/null
+++ b/tests/mesa_pd/mpi/VelocityCorrectionNotification.cpp
@@ -0,0 +1,135 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file   VelocityCorrectionNotification.cpp
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+
+#include <blockforest/BlockForest.h>
+#include <blockforest/Initialization.h>
+#include <core/Environment.h>
+#include <core/logging/Logging.h>
+
+#include <mesa_pd/domain/BlockForestDomain.h>
+
+#include <mesa_pd/data/ParticleStorage.h>
+#include <mesa_pd/data/ShapeStorage.h>
+
+#include <mesa_pd/mpi/SyncNextNeighbors.h>
+#include <mesa_pd/mpi/BroadcastProperty.h>
+#include <mesa_pd/mpi/ReduceProperty.h>
+
+#include <mesa_pd/mpi/notifications/VelocityCorrectionNotification.h>
+#include <mesa_pd/mpi/notifications/VelocityUpdateNotification.h>
+
+#include <iostream>
+
+namespace walberla {
+
+int main( int argc, char ** argv )
+{
+
+   using namespace mesa_pd;
+
+   Environment env(argc, argv);
+   WALBERLA_UNUSED(env);
+   int np = walberla::mpi::MPIManager::instance()->numProcesses();
+
+   // create blocks
+   shared_ptr< blockforest::BlockForest > forest = blockforest::createBlockForest(
+           math::AABB(-10,-10,-10, 10, 10, 10),
+           Vector3<uint_t>(2,2,2),
+           Vector3<bool>(false, false, false));
+
+   domain::BlockForestDomain domain(forest);
+
+   //init data structures
+   data::ParticleStorage ps(10);
+
+   //initialize particle
+   const auto linVel = Vec3(1,2,3);
+   const auto angVel = Vec3(-1,-2,-3);
+
+   Vec3 pt(real_t(0.1), real_t(0.1),real_t(0.1));
+
+   for (auto& iBlk : *forest)
+   {
+      if(iBlk.getAABB().contains(pt)) {
+         auto p                       = ps.create();
+         p->getPositionRef()          = pt;
+         p->getInteractionRadiusRef() = real_t(1.0);
+         p->getOwnerRef()             = walberla::mpi::MPIManager::instance()->rank();
+         p->getTypeRef()              = 0;
+         p->getLinearVelocityRef()    = linVel;
+         p->getAngularVelocityRef()    = angVel;
+         WALBERLA_LOG_INFO("Particle created.");
+      }
+   }
+
+   // Sync (create ghost particles on the other processes)
+   mesa_pd::mpi::SyncNextNeighbors snn;
+   snn(ps, domain);
+
+   auto relax_param = real_t(0.8);
+   VelocityUpdateNotification::Parameters::relaxationParam = relax_param;
+   mesa_pd::mpi::ReduceProperty reductionKernel;
+   mesa_pd::mpi::BroadcastProperty broadcastKernel;
+
+   WALBERLA_LOG_INFO("Particle-Storage size: " << ps.size());
+
+   // Reduce dv
+   // dv per process
+   auto dvprocess = Vec3(real_t(0.1),real_t(0.1),real_t(0.1));
+   auto dwprocess = Vec3(real_t(0.05),real_t(0.1),real_t(0.15));
+   ps.setDv(0, dvprocess);
+   ps.setDw(0, dwprocess);
+   reductionKernel.operator()<VelocityCorrectionNotification>(ps);
+
+   for (auto& iBlk : *forest)
+   {
+      if(iBlk.getAABB().contains(pt)) {
+         WALBERLA_CHECK_FLOAT_EQUAL(ps.getLinearVelocity(0), linVel);
+         WALBERLA_CHECK_FLOAT_EQUAL(ps.getAngularVelocity(0), angVel);
+         WALBERLA_CHECK_FLOAT_EQUAL(ps.getDv(0), real_t(np) * dvprocess);
+         WALBERLA_CHECK_FLOAT_EQUAL(ps.getDw(0), real_t(np) * dwprocess);
+      }
+   }
+
+   broadcastKernel.operator()<VelocityUpdateNotification>(ps);
+
+
+   // Broadcast v
+   reductionKernel.operator()<VelocityCorrectionNotification>(ps);
+   if(np > 1){
+      WALBERLA_CHECK_FLOAT_EQUAL(ps.getLinearVelocity(0), linVel + relax_param * real_t(np) * dvprocess);
+      WALBERLA_CHECK_FLOAT_EQUAL(ps.getAngularVelocity(0), angVel + relax_param * real_t(np) * dwprocess);
+      WALBERLA_CHECK_FLOAT_EQUAL(ps.getDv(0), Vec3());
+      WALBERLA_CHECK_FLOAT_EQUAL(ps.getDw(0), Vec3());
+   }else{
+      WALBERLA_CHECK_FLOAT_EQUAL(ps.getLinearVelocity(0) + ps.getDv(0), linVel + real_t(np) * dvprocess);
+      WALBERLA_CHECK_FLOAT_EQUAL(ps.getAngularVelocity(0) + ps.getDw(0), angVel + real_t(np) * dwprocess);
+   }
+
+   return EXIT_SUCCESS;
+}
+
+} //namespace walberla
+
+int main( int argc, char ** argv )
+{
+   return walberla::main(argc, argv);
+}