diff --git a/src/lbm/SuViscoelasticity.h b/src/lbm/SuViscoelasticity.h
new file mode 100644
index 0000000000000000000000000000000000000000..493a3fa4987fb27d911317e93ee7686d5d1142e3
--- /dev/null
+++ b/src/lbm/SuViscoelasticity.h
@@ -0,0 +1,267 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file SuViscoelasticity.h
+//! \ingroup lbm
+//! \author Cameron Stewart <cstewart@icp.uni-stuttgart.de>
+//! \brief Oldroyd-B viscoelasticity extended from Su et al. Phys. Rev. E, 2013
+//
+//======================================================================================================================
+
+
+#include "blockforest/communication/UniformBufferedScheme.h"
+#include "core/math/Matrix3.h"
+#include <field/AddToStorage.h>
+#include "field/GhostLayerField.h"
+#include "field/communication/PackInfo.h"
+#include "lbm/field/PdfField.h"
+
+namespace walberla {
+namespace lbm {
+namespace viscoelastic {
+
+template < typename LatticeModel_T , typename BoundaryHandling_T>
+class Su {
+
+public:
+            
+   typedef GhostLayerField< Matrix3<real_t>, 1> StressField_T;
+   typedef GhostLayerField< Vector3<real_t>, 1> VelocityField_T;
+   typedef GhostLayerField< Vector3<real_t>, 1> ForceField_T;
+   typedef PdfField< LatticeModel_T > PdfField_T;
+   typedef shared_ptr< StructuredBlockForest > Blocks_T;
+
+
+   Su( Blocks_T blocks, BlockDataID force, BlockDataID pdfId, BlockDataID boundaryHandlingId, BlockDataID stressId, BlockDataID stressOldId, real_t lambda_p, real_t eta_p,
+       uint_t period = uint_c(1), bool compressibleFlag = false): blocks_( blocks ), forceId_( force ), pdfId_(pdfId), boundaryHandlingId_(boundaryHandlingId), stressId_(stressId), stressOldId_(stressOldId),
+                                   inv_lambda_p_( real_c(1.0)/lambda_p ), eta_p_( eta_p ), delta_t_( real_c(period) ), executionCount_( 0 ), communicateStress_(blocks), 
+                                   communicateVelocities_(blocks), compressibleFlag_(compressibleFlag)
+   {
+      // create velocity fields
+      velocityId_ = walberla::field::addToStorage<VelocityField_T>( blocks_, "Velocity Field", Vector3<real_t>(0.0), field::zyxf, uint_c(1));
+
+      // stress and velocity communication scheme
+      communicateStress_.addPackInfo( make_shared< field::communication::PackInfo<StressField_T> >( stressId_ ));
+      communicateVelocities_.addPackInfo( make_shared< field::communication::PackInfo<VelocityField_T> >( velocityId_ ));
+
+      WALBERLA_ASSERT_GREATER_EQUAL(delta_t_, real_c(1.0));
+   }
+
+   Su( Blocks_T blocks, BlockDataID force, BlockDataID pdfId, BlockDataID boundaryHandlingId, BlockDataID stressId, BlockDataID stressOldId, BlockDataID velocityId, real_t lambda_p, real_t eta_p,
+       uint_t period = uint_c(1), bool compressibleFlag = false): blocks_( blocks ), forceId_( force ), pdfId_(pdfId), boundaryHandlingId_(boundaryHandlingId), stressId_(stressId), stressOldId_(stressOldId),
+                                   velocityId_(velocityId), inv_lambda_p_( real_c(1.0)/lambda_p ), eta_p_( eta_p ), delta_t_( real_c(period) ), executionCount_( 0 ), communicateStress_(blocks), 
+                                   communicateVelocities_(blocks), compressibleFlag_(compressibleFlag)
+   {
+      // stress and velocity communication scheme
+      communicateStress_.addPackInfo( make_shared< field::communication::PackInfo<StressField_T> >( stressId_ ));
+      communicateVelocities_.addPackInfo( make_shared< field::communication::PackInfo<VelocityField_T> >( velocityId_ ));
+
+      WALBERLA_ASSERT_GREATER_EQUAL(delta_t_, real_c(1.0));
+   }
+
+   void calculateStresses(IBlock * block) {
+      StressField_T *stressNew = block->getData<StressField_T>(stressId_);
+      StressField_T *stressOld = block->getData<StressField_T>(stressOldId_);
+      VelocityField_T *velocity = block->getData<VelocityField_T>(velocityId_);
+      PdfField_T *pdf = block->getData<PdfField_T>(pdfId_);
+      BoundaryHandling_T * boundaryHandling = block->getData< BoundaryHandling_T >( boundaryHandlingId_ );
+
+      WALBERLA_ASSERT_GREATER_EQUAL(stressOld->nrOfGhostLayers(), 2);
+      WALBERLA_ASSERT_GREATER_EQUAL(velocity->nrOfGhostLayers(), 1);
+
+      WALBERLA_FOR_ALL_CELLS_XYZ(stressNew, {
+         Cell cell(x,y,z);
+         Matrix3<real_t> stress1 = Matrix3<real_t>(0.0);
+         Matrix3<real_t> stress2 = Matrix3<real_t>(0.0);
+         Matrix3<real_t> stress3 = Matrix3<real_t>(0.0);
+         Matrix3<real_t> relstr  = Matrix3<real_t>(0.0);
+         bool nearBoundaryFlag = false;
+
+         // if cell is a fluid cell then calculate the stress tensor
+         if (boundaryHandling->isDomain(cell)) {
+            // check if near a wall
+            if (boundaryHandling->isNearBoundary(cell)) {
+               nearBoundaryFlag = true;
+            }
+            else {
+               for (auto d = LatticeModel_T::Stencil::beginNoCenter(); d != LatticeModel_T::Stencil::end(); ++d) {
+                  Cell cell1 = cell - *d;
+                  if (boundaryHandling->isNearBoundary(cell1)) {
+                     nearBoundaryFlag = true;
+                  }
+               }
+            }
+            for (auto d = LatticeModel_T::Stencil::beginNoCenter(); d != LatticeModel_T::Stencil::end(); ++d) {
+               Cell cell1 = cell - *d;
+               Cell cell2 = cell1 - *d;
+               Cell cell3 = cell + *d;
+
+               // check if using compressible of incompressible algorithm
+               if (compressibleFlag_) {
+                  if (nearBoundaryFlag) {
+                     if (boundaryHandling->isDomain(cell1)) {
+                        stress1 += (stressOld->get(cell1) * pdf->get(cell, d.toIdx())) * real_c(1 / 2.0);
+                     }
+                     if (boundaryHandling->isDomain(cell3)) {
+                        stress2 += (stressOld->get(cell) * pdf->get(cell, d.toIdx())) * real_c(1 / 1.5);
+                     }
+                  }
+                  else {
+                     stress1 += stressOld->get(cell1) * pdf->get(cell, d.toIdx());
+                     stress2 += stressOld->get(cell) * pdf->get(cell, d.toIdx());
+                     stress3 += stressOld->get(cell2) * pdf->get(cell, d.toIdx());
+                  }
+               }
+               else {
+                  if (nearBoundaryFlag) {
+                     if (boundaryHandling->isDomain(cell1)) {
+                        stress1 += (stressOld->get(cell1) * pdf->get(cell1, d.toIdx())) * real_c(1 / 2.0);
+                     }
+                     if (boundaryHandling->isDomain(cell3)) {
+                        stress2 += (stressOld->get(cell) * pdf->get(cell, d.toIdx())) * real_c(1 / 1.5);
+                     }
+                  }
+                  else {
+                     stress1 += stressOld->get(cell1) * pdf->get(cell1, d.toIdx());
+                     stress2 += stressOld->get(cell) * pdf->get(cell, d.toIdx());
+                     stress3 += stressOld->get(cell2) * pdf->get(cell2, d.toIdx());
+                  }
+               }
+            }
+            Matrix3<real_t> gradu = Matrix3<real_t>(0.0);
+
+            // compute velocity gradient
+            for (auto d = LatticeModel_T::Stencil::beginNoCenter(); d.direction() != stencil::NW; ++d) {
+               for (uint_t a = 0; a < LatticeModel_T::Stencil::D; ++a) {
+                  for (uint_t b = 0; b < LatticeModel_T::Stencil::D; ++b) {
+                     if(boundaryHandling->isDomain(cell + *d) ) {
+                        gradu(b, a) += velocity->get(cell + *d)[a] * real_c(d.c(b)) * real_c(0.5);
+                     } else if(boundaryHandling->isDomain(cell - *d)){
+                        gradu(b, a) += velocity->get(cell - *d)[a] * real_c(d.c(b)) * real_c(-0.5) + velocity->get(cell)[a] * real_c(d.c(b));
+                     }
+                  }
+               }
+            }
+            auto graduT = gradu.getTranspose();
+
+            // equation 16 from Su 2013
+            relstr = stressOld->get(cell) * gradu + graduT * stressOld->get(cell);
+
+            if(eta_p_ > real_t(0)) {
+               relstr += ((gradu + graduT) * eta_p_ - stressOld->get(cell)) * inv_lambda_p_;
+            }
+
+            // equation 23 from Su 2013
+            stressNew->get(cell) = stressOld->get(cell) + (stress1 * real_c(2.0) - stress2 * real_c(1.5) - stress3 * real_c(0.5)) * delta_t_ * (real_c(1.0) / pdf->getDensity(cell)) +
+                                   relstr * delta_t_;
+         }
+      })
+   }
+
+   void swapStressBuffers( IBlock * block ){
+      StressField_T * stressNew = block->getData< StressField_T >(stressId_);
+      StressField_T * stressOld = block->getData< StressField_T >(stressOldId_);
+
+      // swap pointers to old and new stress fields
+      stressOld->swapDataPointers(stressNew);
+   }
+
+   void cacheVelocity( IBlock * block ){
+      VelocityField_T * velocity = block->getData< VelocityField_T >(velocityId_);
+      PdfField_T * pdf = block->getData< PdfField_T >(pdfId_);
+      BoundaryHandling_T * boundaryHandling = block->getData< BoundaryHandling_T >( boundaryHandlingId_ );
+
+      WALBERLA_ASSERT_GREATER_EQUAL( velocity->nrOfGhostLayers(), 1 );
+
+      // update velocity field for all fluid cells
+      WALBERLA_FOR_ALL_CELLS_XYZ(velocity,{
+         Cell cell(x, y, z);
+         if( boundaryHandling->isDomain(cell) ) {
+            velocity->get( cell ) = pdf->getVelocity(x, y, z);
+         }
+      })
+   }
+
+   void calculateForces( IBlock * block ) {
+      using namespace stencil;
+      StressField_T * stress = block->getData< StressField_T >(stressId_);
+      ForceField_T * force = block->getData< ForceField_T >(forceId_);
+      BoundaryHandling_T * boundaryHandling = block->getData< BoundaryHandling_T >( boundaryHandlingId_ );
+
+      WALBERLA_ASSERT_GREATER_EQUAL( stress->nrOfGhostLayers(), 1);
+
+      WALBERLA_FOR_ALL_CELLS_XYZ(force, {
+         Cell cell(x, y, z);
+         uint_t k = 0;
+         Vector3<real_t> f = Vector3<real_t>(0.0);
+
+         // calculate force from finite difference divergence of extra stress in 3d or 2d
+         if (boundaryHandling->isDomain(cell)) {
+            for (auto d = LatticeModel_T::Stencil::beginNoCenter(); d.direction() != NW; ++d) {
+               for (uint_t i = 0; i < LatticeModel_T::Stencil::D; ++i) {
+                  if (d.direction() == E || d.direction() == W) {
+                     k = 0;
+                  } else if (d.direction() == N || d.direction() == S) {
+                     k = 1;
+                  } else {
+                     k = 2;
+                  }
+                  if (boundaryHandling->isDomain(cell + *d)) {
+                     f[i] += stress->get(cell + *d)(k, i) * real_c(d.c(k));
+                  } else if(boundaryHandling->isDomain(cell - *d)){
+                     f[i] += -stress->get(cell - *d)(k, i) * real_c(d.c(k)) + stress->get(cell)(k, i) * real_c(d.c(k)) * real_c(2.0);
+                  }
+               }
+            }
+            force->get(x, y, z) = f*real_c(0.5);
+         }
+      })
+   }
+
+   void operator()() {
+      for( auto it = blocks_->begin(); it != blocks_->end(); ++it ) {
+         auto block = it.get();
+         if (executionCount_ % uint_c(delta_t_) == 0) {
+            swapStressBuffers(block);
+            cacheVelocity(block);
+         }
+      }
+      communicateVelocities_();
+      for( auto it = blocks_->begin(); it != blocks_->end(); ++it ){
+         auto block = it.get();
+         if (executionCount_ % uint_c(delta_t_) == 0)
+         {
+            calculateStresses(block);
+         }
+      }
+      communicateStress_();
+      for( auto it = blocks_->begin(); it != blocks_->end(); ++it ){
+         auto block = it.get();
+         calculateForces(block);
+      }
+      executionCount_++;
+   }
+private:
+   Blocks_T blocks_;
+   BlockDataID forceId_, pdfId_, boundaryHandlingId_, stressId_, stressOldId_, velocityId_;
+   const real_t inv_lambda_p_, eta_p_, delta_t_;
+   uint_t executionCount_;
+   blockforest::communication::UniformBufferedScheme< typename LatticeModel_T::CommunicationStencil > communicateStress_, communicateVelocities_;
+   bool compressibleFlag_;
+};
+        
+} // namespace viscoelastic
+} // namespace lbm
+} // namespace walberla
diff --git a/tests/lbm/CMakeLists.txt b/tests/lbm/CMakeLists.txt
index 795636733fa71b846d3e954429169e6f1e0c28a1..5a40bcfe181bc8d5400e9f1814e9b44e019f67be 100644
--- a/tests/lbm/CMakeLists.txt
+++ b/tests/lbm/CMakeLists.txt
@@ -62,6 +62,8 @@ waLBerla_execute_test( NAME PermeabilityTest_TRT_64_8 COMMAND $<TARGET_FILE:Perm
 waLBerla_compile_test( FILES initializer/PdfFieldInitializerTest.cpp )
 waLBerla_execute_test( NAME PdfFieldInitializerTest COMMAND $<TARGET_FILE:PdfFieldInitializerTest> ${CMAKE_CURRENT_SOURCE_DIR}/PdfFieldInitializerTest.prm PROCESSES 4 CONFIGURATIONS Release RelWithDbgInfo )
 
+waLBerla_compile_test( FILES SuViscoelasticityTest.cpp DEPENDS field blockforest timeloop)
+waLBerla_execute_test( NAME  SuViscoelasticityTest COMMAND $<TARGET_FILE:SuViscoelasticityTest> ${CMAKE_CURRENT_SOURCE_DIR}/Su.prm )
 
 # Code Generation
 waLBerla_python_file_generates(codegen/SrtWithForceFieldModel.py
diff --git a/tests/lbm/Su.prm b/tests/lbm/Su.prm
new file mode 100644
index 0000000000000000000000000000000000000000..88a9bfe3baaa384708a746f6ac8a98dd8aa42727
--- /dev/null
+++ b/tests/lbm/Su.prm
@@ -0,0 +1,26 @@
+Parameters 
+{
+	eta_s           0.5;
+	timesteps       3000;
+	force           0.00001;
+	lambda_p        300.0;
+	eta_p           0.5;
+    period          1;
+    L               11;
+    H               33;
+    blockSize       11;
+    uMax            1.28737;
+    t0              370;
+    t1              326;
+}
+
+DomainSetup
+{
+   blocks        <  1,3,1 >;
+   cartesianSetup false;
+   cellsPerBlock <  11,11, 1 >;
+   periodic      <  1, 0, 0 >;
+}
+
+
+
diff --git a/tests/lbm/SuViscoelasticityTest.cpp b/tests/lbm/SuViscoelasticityTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3952d7cb48a6faf2cc81c6d716c07bc5db54587a
--- /dev/null
+++ b/tests/lbm/SuViscoelasticityTest.cpp
@@ -0,0 +1,320 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file SuViscoelasticityTest.cpp
+//! \ingroup lbm
+//! \author Cameron Stewart <cstewart@icp.uni-stuttgart.de>
+//! \brief D2Q9 Poiseuille flow simulation with Oldroyd-B viscoelasticity - Checks against analytical formula and
+//!        reference data
+//
+//
+//======================================================================================================================
+
+#include "blockforest/Initialization.h"
+#include "blockforest/communication/UniformBufferedScheme.h"
+
+#include "boundary/BoundaryHandling.h"
+
+#include "core/Environment.h"
+#include "core/SharedFunctor.h"
+
+#include "domain_decomposition/SharedSweep.h"
+
+#include "field/communication/PackInfo.h"
+
+#include "lbm/boundary/all.h"
+#include "lbm/field/AddToStorage.h"
+#include "lbm/lattice_model/D2Q9.h"
+#include "lbm/lattice_model/ForceModel.h"
+#include "lbm/SuViscoelasticity.h"
+#include "lbm/sweeps/CellwiseSweep.h"
+
+#include "timeloop/SweepTimeloop.h"
+
+namespace walberla {
+
+//////////////
+// TYPEDEFS //
+//////////////
+
+typedef GhostLayerField< Vector3<real_t>, 1> ForceField_T;
+
+typedef lbm::collision_model::TRT CollisionModel_T;
+typedef lbm::force_model::GuoField< ForceField_T > ForceModel_T;
+typedef lbm::D2Q9< CollisionModel_T, false, ForceModel_T >  LatticeModel_T;
+typedef LatticeModel_T::Stencil    Stencil_T;
+
+typedef GhostLayerField<Matrix3<real_t>, 1> StressField_T;
+typedef GhostLayerField< Vector3<real_t>, 1> VelocityField_T;
+typedef lbm::PdfField< LatticeModel_T >  PdfField_T;
+typedef walberla::uint8_t flag_t;
+typedef FlagField< flag_t > FlagField_T;
+const uint_t FieldGhostLayers = 2;
+
+typedef lbm::NoSlip< LatticeModel_T, flag_t> NoSlip_T;
+typedef boost::tuples::tuple< NoSlip_T > BoundaryConditions_T;
+typedef BoundaryHandling< FlagField_T, Stencil_T, BoundaryConditions_T> BoundaryHandling_T;
+
+///////////
+// FLAGS //
+///////////
+
+const FlagUID Fluid_Flag( "fluid" );
+const FlagUID NoSlip_Flag( "no slip" );
+
+/////////////////////////////////////
+// BOUNDARY HANDLING CUSTOMIZATION //
+/////////////////////////////////////
+
+class MyBoundaryHandling
+{
+public:
+
+   MyBoundaryHandling( const BlockDataID & flagFieldID, const BlockDataID & pdfFieldID ) : flagFieldID_( flagFieldID ), pdfFieldID_( pdfFieldID ) {}
+
+   BoundaryHandling_T * operator()( IBlock* const block, const StructuredBlockStorage* const storage ) const;
+
+private:
+
+   const BlockDataID flagFieldID_;
+   const BlockDataID pdfFieldID_;
+
+}; // class MyBoundaryHandling
+
+
+BoundaryHandling_T * MyBoundaryHandling::operator()( IBlock * const block, const StructuredBlockStorage * const storage ) const
+{
+   WALBERLA_ASSERT_NOT_NULLPTR( block );
+   WALBERLA_ASSERT_NOT_NULLPTR( storage );
+
+   FlagField_T * flagField       = block->getData< FlagField_T >( flagFieldID_ );
+   PdfField_T *  pdfField        = block->getData< PdfField_T > ( pdfFieldID_ );
+
+   const auto fluid = flagField->flagExists( Fluid_Flag ) ? flagField->getFlag( Fluid_Flag ) : flagField->registerFlag( Fluid_Flag );
+
+   BoundaryHandling_T * handling = new BoundaryHandling_T( "moving obstacle boundary handling", flagField, fluid,
+                                                           boost::tuples::make_tuple(    NoSlip_T( "NoSlip", NoSlip_Flag, pdfField ) ) );
+
+   const auto noSlip = flagField->getFlag(NoSlip_Flag);
+
+   CellInterval domainBB = storage->getDomainCellBB();
+   domainBB.xMin() -= cell_idx_c( 1 );
+   domainBB.xMax() += cell_idx_c( 1 );
+
+   domainBB.yMin() -= cell_idx_c( 1 );
+   domainBB.yMax() += cell_idx_c( 1 );
+
+   // SOUTH
+   CellInterval south( domainBB.xMin(), domainBB.yMin(), domainBB.zMin(), domainBB.xMax(), domainBB.yMin(), domainBB.zMax() );
+   storage->transformGlobalToBlockLocalCellInterval( south, *block );
+   handling->forceBoundary( noSlip, south );
+
+   // NORTH
+   CellInterval north( domainBB.xMin(), domainBB.yMax(), domainBB.zMin(), domainBB.xMax(), domainBB.yMax(), domainBB.zMax() );
+   storage->transformGlobalToBlockLocalCellInterval( north, *block );
+   handling->forceBoundary( noSlip, north );
+
+   handling->fillWithDomain( FieldGhostLayers );
+
+   return handling;
+}
+
+//////////////////////
+// Poiseuille Force //
+//////////////////////
+
+template < typename BoundaryHandling_T >
+class ConstantForce
+{
+public:
+
+   ConstantForce( BlockDataID forceFieldId, BlockDataID boundaryHandlingId, real_t force)
+         : forceFieldId_( forceFieldId ), boundaryHandlingId_(boundaryHandlingId), force_(force)
+   {}
+
+   void operator()( IBlock * block )
+   {
+      ForceField_T *forceField = block->getData< ForceField_T >(forceFieldId_);
+      BoundaryHandling_T *boundaryHandling = block->getData< BoundaryHandling_T >( boundaryHandlingId_ );
+
+      WALBERLA_FOR_ALL_CELLS_XYZ(forceField,
+                                 {
+                                    Cell cell(x,y,z);
+                                    if (boundaryHandling->isDomain(cell)) {
+                                       forceField->get(cell)[0] += force_;
+                                    }
+                                 })
+   }
+
+private:
+
+   BlockDataID forceFieldId_, boundaryHandlingId_;
+   real_t force_;
+};
+
+////////////////////
+// Take Test Data //
+////////////////////
+
+class TestData
+{
+public:
+   TestData(Timeloop & timeloop, shared_ptr< StructuredBlockForest > blocks, BlockDataID pdfFieldId, BlockDataID stressFieldId, uint_t timesteps, uint_t blockSize, real_t L, real_t H, real_t uExpected)
+            : timeloop_(timeloop), blocks_(blocks), pdfFieldId_(pdfFieldId), stressFieldId_(stressFieldId), timesteps_(timesteps), blockSize_(blockSize), t0_(0), t1_(0), L_(L), H_(H), uMax_(0.0), uPrev_(0.0),
+            uExpected_(uExpected), uSteady_(0.0) {}
+
+   void operator()() {
+      for (auto blockIt = blocks_->begin(); blockIt != blocks_->end(); ++blockIt) {
+         PdfField_T *pdf = blockIt.get()->getData<PdfField_T>(pdfFieldId_);
+
+         if (blockIt.get()->getAABB().contains(float_c(L_/2.0), float_c(H_/2.0), 0)) {
+            uCurr_ = pdf->getVelocity(int32_c(blockSize_/2), int32_c(blockSize_/2), 0)[0]/uExpected_;
+            tCurr_ = timeloop_.getCurrentTimeStep();
+            if (tCurr_ == timesteps_ - 1){
+               uSteady_ = uCurr_;
+            }
+            if (maxFlag_ == 0) {
+               if (uCurr_ >= uPrev_) {
+                  uMax_ = uCurr_;
+               } else {
+                  t0_ = tCurr_;
+                  maxFlag_ = 1;
+               }
+               uPrev_ = uCurr_;
+            }
+            else if (maxFlag_ == 1) {
+               if ((uCurr_ - 1.0) <= (uMax_ - 1.0) / std::exp(1)) {
+                  t1_ = tCurr_ - t0_;
+                  maxFlag_ = 2;
+               }
+            }
+         }
+      }
+   }
+
+   real_t getUSteady(){
+      return uSteady_;
+   }
+
+   real_t getUMax(){
+      return uMax_;
+   }
+
+   real_t getT0(){
+      return real_c(t0_);
+   }
+
+   real_t getT1(){
+      return real_c(t1_);
+   }
+
+private:
+   Timeloop & timeloop_;
+   shared_ptr< StructuredBlockForest > blocks_;
+   BlockDataID pdfFieldId_, stressFieldId_;
+   uint_t timesteps_, blockSize_, t0_, t1_, tCurr_;
+   uint_t maxFlag_ = 0;
+   real_t L_, H_, uMax_, uPrev_, uCurr_, uExpected_, uSteady_;
+};
+
+} // namespace walberla
+
+//////////
+// Main //
+//////////
+
+int main(int argc, char ** argv ){
+   using namespace walberla;
+
+   Environment env( argc, argv );
+
+   // read parameter
+   shared_ptr<StructuredBlockForest> blocks = blockforest::createUniformBlockGridFromConfig( env.config() );
+   auto parameters = env.config()->getOneBlock( "Parameters" );
+
+   // extract some constants from the parameters
+   const real_t          eta_s           = parameters.getParameter< real_t > ("eta_s");
+   const real_t          force           = parameters.getParameter< real_t > ("force");
+   const real_t          eta_p           = parameters.getParameter< real_t > ("eta_p");
+   const real_t          lambda_p        = parameters.getParameter< real_t > ("lambda_p");
+   const uint_t          period          = parameters.getParameter< uint_t > ("period");
+   const real_t          L               = parameters.getParameter< real_t > ("L");
+   const real_t          H               = parameters.getParameter< real_t > ("H");
+   const uint_t          blockSize       = parameters.getParameter< uint_t > ("blockSize");
+   const uint_t          timesteps       = parameters.getParameter< uint_t > ("timesteps");
+
+   // reference data
+   const real_t          uExpected       = force*H*H/(real_c(8.0)*(eta_s + eta_p));
+   const real_t          uMax            = parameters.getParameter< real_t > ("uMax");
+   const real_t          t0              = parameters.getParameter< real_t > ("t0");
+   const real_t          t1              = parameters.getParameter< real_t > ("t1");
+
+   // create fields
+   BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field", FieldGhostLayers);
+   BlockDataID forceFieldId = field::addToStorage<ForceField_T>( blocks, "Force Field", Vector3<real_t>(0.0), field::zyxf, FieldGhostLayers);
+   LatticeModel_T latticeModel = LatticeModel_T(lbm::collision_model::TRT::constructWithMagicNumber( walberla::lbm::collision_model::omegaFromViscosity(eta_s)), lbm::force_model::GuoField<ForceField_T>( forceFieldId ) );
+   BlockDataID pdfFieldId = lbm::addPdfFieldToStorage( blocks, "pdf field", latticeModel, Vector3<real_t>(), real_c(1.0), FieldGhostLayers );
+   BlockDataID stressId = walberla::field::addToStorage<StressField_T>( blocks, "Stress Field", Matrix3<real_t>(0.0), field::zyxf, FieldGhostLayers);
+   BlockDataID stressOldId = walberla::field::addToStorage<StressField_T>( blocks, "Old Stress Field", Matrix3<real_t>(0.0), field::zyxf, FieldGhostLayers);
+   BlockDataID velocityId = walberla::field::addToStorage<VelocityField_T> (blocks, "Velocity Field", Vector3<real_t>(0.0), field::zyxf, FieldGhostLayers);
+
+   // add boundary handling
+   BlockDataID boundaryHandlingId = blocks->addStructuredBlockData< BoundaryHandling_T >( MyBoundaryHandling( flagFieldId, pdfFieldId ), "boundary handling" );
+
+   // create time loop
+   SweepTimeloop timeloop( blocks->getBlockStorage(), timesteps );
+
+   // create communication for PdfField
+   blockforest::communication::UniformBufferedScheme< Stencil_T > communication( blocks );
+   communication.addPackInfo( make_shared< field::communication::PackInfo< PdfField_T > >( pdfFieldId ) );
+   auto testData = make_shared< TestData >(TestData(timeloop, blocks, pdfFieldId, stressId, timesteps, blockSize, L, H, uExpected));
+
+   // structure timeloop
+   timeloop.add() << BeforeFunction( communication, "communication" )
+                  << Sweep( BoundaryHandling_T::getBlockSweep( boundaryHandlingId ), "boundary handling" );
+   timeloop.add() << BeforeFunction( lbm::viscoelastic::Su<LatticeModel_T, BoundaryHandling_T>(blocks, forceFieldId, pdfFieldId, boundaryHandlingId, stressId, stressOldId, velocityId,
+                                                                                               lambda_p, eta_p, period, true), "viscoelasticity")
+                  << Sweep( ConstantForce<BoundaryHandling_T>(forceFieldId, boundaryHandlingId, force),"Poiseuille Force");
+   timeloop.add() << Sweep( makeSharedSweep( lbm::makeCellwiseSweep< LatticeModel_T, FlagField_T >( pdfFieldId, flagFieldId, Fluid_Flag ) ),
+                            "LB stream & collide" )
+                  << AfterFunction(makeSharedFunctor(testData), "test data");
+
+   timeloop.run();
+
+
+   // compare to reference data
+   real_t errSteady = real_c(fabs(testData->getUSteady() - real_c(1.0))/real_c(1.0));
+   real_t errMax = real_c(fabs(testData->getUMax() - uMax)/uMax);
+   real_t errt0 = real_c(fabs(testData->getT0() - t0)/t0);
+   real_t errt1 = real_c(fabs(testData->getT1() - t1)/t1);
+
+   WALBERLA_LOG_RESULT("Steady State Velocity Error: " << errSteady );
+   WALBERLA_LOG_RESULT("Maximum Velocity Error: " << errMax );
+   WALBERLA_LOG_RESULT("Time of Maximum Error: " << errt0 );
+   WALBERLA_LOG_RESULT("Decay Time Error: " << errt1 );
+
+   // check that errors < 1%
+   if (errSteady < 0.01 && errMax < 0.01 && errt0 < 0.01 && errt1 < 0.01){
+      WALBERLA_LOG_RESULT("Success" );
+      return EXIT_SUCCESS;
+   }
+   else {
+      WALBERLA_LOG_RESULT("Failure" );
+      return EXIT_FAILURE;
+   }
+
+}
+
+