diff --git a/src/pe_coupling/mapping/BodyMapping.h b/src/pe_coupling/mapping/BodyMapping.h
index 0594c911fcdf54e30c3177a89deacf08984ba304..6df7a1341381f868b6c6d46e6134fa48f7fc1f81 100644
--- a/src/pe_coupling/mapping/BodyMapping.h
+++ b/src/pe_coupling/mapping/BodyMapping.h
@@ -114,6 +114,21 @@ void mapGlobalBodies( const shared_ptr<StructuredBlockStorage> & blockStorage, c
    }
 }
 
+template< typename BoundaryHandling_T >
+void mapGlobalBody( const id_t globalBodySystemID,
+                    const shared_ptr<StructuredBlockStorage> & blockStorage, const BlockDataID & boundaryHandlingID, pe::BodyStorage & globalBodyStorage, const FlagUID & obstacle,
+                    const bool fixedBodiesOnly = true, const bool moBodiesOnly = true )
+{
+   for( auto blockIt = blockStorage->begin(); blockIt != blockStorage->end(); ++blockIt )
+   {
+      auto bodyIt = globalBodyStorage.find( globalBodySystemID );
+      if( bodyIt != globalBodyStorage.end() )
+      {
+         mapBody< BoundaryHandling_T >( *bodyIt, *blockIt, blockStorage, boundaryHandlingID, obstacle, fixedBodiesOnly, moBodiesOnly );
+      }
+   }
+}
+
 
 } // namespace pe_coupling
 } // namespace walberla
diff --git a/src/pe_coupling/momentum_exchange_method/BodyMapping.h b/src/pe_coupling/momentum_exchange_method/BodyMapping.h
index c1f3db97a07155654dc8ef6a3340987f327c0157..732b31fe67bc483c3a6e52ad6daf85560c3fc726 100644
--- a/src/pe_coupling/momentum_exchange_method/BodyMapping.h
+++ b/src/pe_coupling/momentum_exchange_method/BodyMapping.h
@@ -253,6 +253,20 @@ void mapMovingGlobalBodies( const shared_ptr<StructuredBlockStorage> & blockStor
    }
 }
 
+template< typename BoundaryHandling_T >
+void mapMovingGlobalBody( const id_t globalBodySystemID,
+                          const shared_ptr<StructuredBlockStorage> & blockStorage, const BlockDataID & boundaryHandlingID, pe::BodyStorage & globalBodyStorage,
+                          const BlockDataID & bodyFieldID, const FlagUID & obstacle )
+{
+   for( auto blockIt = blockStorage->begin(); blockIt != blockStorage->end(); ++blockIt )
+   {
+      auto bodyIt = globalBodyStorage.find( globalBodySystemID );
+      if( bodyIt !=  globalBodyStorage.end() )
+      {
+         mapMovingBody< BoundaryHandling_T >( *bodyIt, *blockIt, blockStorage, boundaryHandlingID, bodyFieldID, obstacle );
+      }
+   }
+}
 
 
 } // namespace pe_coupling
diff --git a/tests/pe_coupling/CMakeLists.txt b/tests/pe_coupling/CMakeLists.txt
index d2ee5d2ad07204515ac61f375dde9608a9224662..b956c82da8a313411a476bb030f0de629de0955a 100644
--- a/tests/pe_coupling/CMakeLists.txt
+++ b/tests/pe_coupling/CMakeLists.txt
@@ -99,8 +99,9 @@ waLBerla_execute_test( NAME LubricationCorrectionMEMSphereWallTest   COMMAND $<T
 waLBerla_compile_test( FILES momentum_exchange_method/PeriodicParticleChannelMEM.cpp DEPENDS blockforest pe timeloop )
 waLBerla_execute_test( NAME PeriodicParticleChannelMEMTest COMMAND $<TARGET_FILE:PeriodicParticleChannelMEM> --shortrun PROCESSES 4 LABELS longrun )
 
-waLBerla_compile_test( FILES momentum_exchange_method/CouetteFlowCylinder.cpp DEPENDS blockforest pe timeloop )
-waLBerla_execute_test( NAME CouetteFlowCylinder COMMAND $<TARGET_FILE:CouetteFlowCylinder> --shortrun PROCESSES 4 LABELS longrun )
+waLBerla_compile_test( FILES momentum_exchange_method/TaylorCouetteFlowMEM.cpp DEPENDS blockforest pe timeloop )
+waLBerla_execute_test( NAME TaylorCouetteFlowMEMTest COMMAND $<TARGET_FILE:TaylorCouetteFlowMEM> --shortrun )
+waLBerla_execute_test( NAME TaylorCouetteFlowMEMTest2 COMMAND $<TARGET_FILE:TaylorCouetteFlowMEM>  PROCESSES 4 LABELS longrun)
 
 waLBerla_compile_test( FILES momentum_exchange_method/SegreSilberbergMEM.cpp DEPENDS blockforest pe timeloop )
 waLBerla_execute_test( NAME SegreSilberbergMEMSyncNextNeighbor    COMMAND $<TARGET_FILE:SegreSilberbergMEM> --shortrun                    PROCESSES 9 )
diff --git a/tests/pe_coupling/momentum_exchange_method/PeriodicParticleChannelMEM.cpp b/tests/pe_coupling/momentum_exchange_method/PeriodicParticleChannelMEM.cpp
index 51ae2623cf6413f54c9efa451cebcc94b7f64e18..46e97a1129656e65c26e5383cf9c471fa71f3b59 100644
--- a/tests/pe_coupling/momentum_exchange_method/PeriodicParticleChannelMEM.cpp
+++ b/tests/pe_coupling/momentum_exchange_method/PeriodicParticleChannelMEM.cpp
@@ -13,7 +13,7 @@
 //  You should have received a copy of the GNU General Public License along
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file PeriodicParticleChannelMEMPe.cpp
+//! \file PeriodicParticleChannelMEM.cpp
 //! \ingroup pe_coupling
 //! \author Florian Schornbaum <florian.schornbaum@fau.de>
 //! \author Christoph Rettinger <christoph.rettinger@fau.de>
@@ -65,8 +65,9 @@
 
 #include "vtk/all.h"
 
+#include <vector>
 
-namespace periodic_particle_channel_mem_pe
+namespace periodic_particle_channel_mem
 {
 
 ///////////
@@ -423,7 +424,7 @@ int main( int argc, char **argv )
    // create pe bodies
    const auto material = pe::createMaterial( "granular", real_c(1.2), real_c(0.25), real_c(0.4), real_c(0.4), real_c(0.35), real_c(1.39e11), real_c(5.18e7), real_c(1.07e2), real_c(1.07e2) );
 
-   // global (fixed) bodies
+   // global bodies
    // bounding planes
    pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,0,1), Vector3<real_t>(0,0,0), material );
    pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,0,-1), Vector3<real_t>(0,0,real_c(width)), material );
@@ -431,14 +432,18 @@ int main( int argc, char **argv )
    pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,-1,0), Vector3<real_t>(0,real_c(width),0), material );
 
    // spheres as obstacles
-   pe::createSphere( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, Vector3<real_t>(real_c(length) / real_t(2), real_t(50), real_t(110)), real_t(60), material, false, true, true );
-   pe::createSphere( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, Vector3<real_t>(                 real_t(0), real_t(50), -real_t(60)), real_t(80), material, false, true, true );
-   pe::createSphere( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, Vector3<real_t>(            real_c(length), real_t(50), -real_t(60)), real_t(80), material, false, true, true );
+   std::vector<walberla::id_t> globalBodiesToBeMapped;
+   auto globalSphere1 = pe::createSphere( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, Vector3<real_t>(real_c(length) / real_t(2), real_t(50), real_t(110)), real_t(60), material, true, false, true );
+   globalBodiesToBeMapped.push_back(globalSphere1->getSystemID() );
+   auto globalSphere2 = pe::createSphere( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, Vector3<real_t>(                 real_t(0), real_t(50), -real_t(60)), real_t(80), material, true, false, true );
+   globalBodiesToBeMapped.push_back(globalSphere2->getSystemID() );
+   auto globalSphere3 = pe::createSphere( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, Vector3<real_t>(            real_c(length), real_t(50), -real_t(60)), real_t(80), material, true, false, true );
+   globalBodiesToBeMapped.push_back(globalSphere3->getSystemID() );
 
    // local bodies: moving spheres
    const real_t radius = real_t(10);
 
-   auto sphere = pe::createSphere( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, Vector3<real_t>( real_t(15), real_t(50), real_t(35) ), radius, material ); //TODO change
+   auto sphere = pe::createSphere( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, Vector3<real_t>( real_t(15), real_t(50), real_t(35) ), radius, material );
    if( sphere != NULL ) sphere->setLinearVel( velocity, real_t(0), real_t(0) );
 
    sphere = pe::createSphere( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, Vector3<real_t>( real_t(15), real_t(35), real_t(50) ), radius, material );
@@ -494,7 +499,11 @@ int main( int argc, char **argv )
 
    // map pe bodies into the LBM simulation
    // global bodies act as no-slip obstacles and are not treated by the fluid-solid coupling
-   pe_coupling::mapGlobalBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, *globalBodyStorage, NoSlip_Flag );
+   // special care has to be taken here that only the global spheres (not the planes) are mapped since the planes would overwrite the already set boundary flags
+   for( auto globalBodyIt = globalBodiesToBeMapped.begin(); globalBodyIt != globalBodiesToBeMapped.end(); ++globalBodyIt )
+   {
+      pe_coupling::mapGlobalBody< BoundaryHandling_T >( *globalBodyIt, blocks, boundaryHandlingID, *globalBodyStorage, NoSlip_Flag, false );
+   }
    // moving bodies are handled by the momentum exchange method
    pe_coupling::mapMovingBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, bodyFieldID, MO_Flag );
 
@@ -608,8 +617,8 @@ int main( int argc, char **argv )
    return 0;
 }
 
-} //namespace periodic_particle_channel_mem_pe
+} //namespace periodic_particle_channel_mem
 
 int main( int argc, char **argv ){
-   periodic_particle_channel_mem_pe::main(argc, argv);
+   periodic_particle_channel_mem::main(argc, argv);
 }
diff --git a/tests/pe_coupling/momentum_exchange_method/CouetteFlowCylinder.cpp b/tests/pe_coupling/momentum_exchange_method/TaylorCouetteFlowMEM.cpp
similarity index 50%
rename from tests/pe_coupling/momentum_exchange_method/CouetteFlowCylinder.cpp
rename to tests/pe_coupling/momentum_exchange_method/TaylorCouetteFlowMEM.cpp
index 748f155815566f07e41b11c4d9fc2f9bdeb74b16..b7d4fc43c277766caaacf7f03a129577dbc79bd1 100644
--- a/tests/pe_coupling/momentum_exchange_method/CouetteFlowCylinder.cpp
+++ b/tests/pe_coupling/momentum_exchange_method/TaylorCouetteFlowMEM.cpp
@@ -13,10 +13,10 @@
 //  You should have received a copy of the GNU General Public License along
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file PeriodicParticleChannelMEMPe.cpp
+//! \file TaylorCouetteFlowMEM.cpp
 //! \ingroup pe_coupling
-//! \author Florian Schornbaum <florian.schornbaum@fau.de>
 //! \author Christoph Rettinger <christoph.rettinger@fau.de>
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
 //
 //======================================================================================================================
 
@@ -36,7 +36,6 @@
 #include "domain_decomposition/SharedSweep.h"
 
 #include "field/AddToStorage.h"
-#include "field/StabilityChecker.h"
 #include "field/communication/PackInfo.h"
 #include "field/vtk/all.h"
 
@@ -46,27 +45,19 @@
 #include "lbm/field/PdfField.h"
 #include "lbm/lattice_model/D3Q19.h"
 #include "lbm/sweeps/CellwiseSweep.h"
-#include "lbm/sweeps/SplitPureSweep.h"
 #include "lbm/vtk/all.h"
 
-#include "stencil/D3Q27.h"
-
 #include "timeloop/SweepTimeloop.h"
 
 #include "pe/basic.h"
-#include "pe/cr/ICR.h"
 #include "pe/Types.h"
-#include "pe/vtk/BodyVtkOutput.h"
-#include "pe/vtk/SphereVtkOutput.h"
 
 #include "pe_coupling/mapping/all.h"
 #include "pe_coupling/momentum_exchange_method/all.h"
-#include "pe_coupling/utility/all.h"
 
 #include "vtk/all.h"
 
-
-namespace periodic_particle_channel_mem_pe
+namespace taylor_coette_flow_mem
 {
 
 ///////////
@@ -93,11 +84,9 @@ typedef GhostLayerField< pe::BodyID, 1 >  BodyField_T;
 
 const uint_t FieldGhostLayers = 1;
 
-typedef lbm::NoSlip< LatticeModel_T, flag_t >                 NoSlip_T;
-typedef lbm::SimpleUBB< LatticeModel_T, flag_t >              UBB_T;
-typedef pe_coupling::SimpleBB< LatticeModel_T, FlagField_T > MO_T;
+typedef pe_coupling::CurvedLinear< LatticeModel_T, FlagField_T > MO_T;
 
-typedef boost::tuples::tuple< NoSlip_T, UBB_T, MO_T > BoundaryConditions_T;
+typedef boost::tuples::tuple< MO_T > BoundaryConditions_T;
 typedef BoundaryHandling< FlagField_T, Stencil_T, BoundaryConditions_T > BoundaryHandling_T;
 
 typedef boost::tuple< pe::Capsule, pe::CylindricalBoundary > BodyTypeTuple;
@@ -107,8 +96,6 @@ typedef boost::tuple< pe::Capsule, pe::CylindricalBoundary > BodyTypeTuple;
 ///////////
 
 const FlagUID    Fluid_Flag( "fluid" );
-const FlagUID      UBB_Flag( "velocity bounce back" );
-const FlagUID   NoSlip_Flag( "no slip" );
 const FlagUID       MO_Flag( "moving obstacle" );
 const FlagUID FormerMO_Flag( "former moving obstacle" );
 
@@ -118,130 +105,63 @@ const FlagUID FormerMO_Flag( "former moving obstacle" );
 // CHECK FUNCTIONS //
 /////////////////////
 
-template< typename LatticeModel_T, typename FlagField_T >
-class VelocityCheck
+class VelocityChecker
 {
 public:
-
-   typedef lbm::PdfField< LatticeModel_T > PdfField;
-
-   VelocityCheck( const ConstBlockDataID & pdfFieldID, const ConstBlockDataID & flagFieldID, const Set< FlagUID > & cellsToCheck,
-                  const real_t uMax, const uint_t checkFrequency ) :
-      executionCounter_( uint_c(0) ), checkFrequency_( ( checkFrequency > 0 ) ? checkFrequency : uint_c(1) ),
-      cellsToCheck_( cellsToCheck ), flagFieldID_( flagFieldID ), pdfFieldID_( pdfFieldID ), uMax_( uMax * uMax ) {}
-
-   void operator()( const IBlock * const block )
+   VelocityChecker( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & pdfFieldID,
+                    real_t radius1, real_t radius2, real_t angularVel1, real_t angularVel2, real_t domainLength, real_t domainWidth ) :
+   blocks_( blocks ), pdfFieldID_( pdfFieldID ), radius1_( radius1 ), radius2_( radius2 ), angularVel1_( angularVel1 ), angularVel2_( angularVel2 ),
+   domainLength_( domainLength ), domainWidth_( domainWidth )
+   { }
+   real_t getMaximumRelativeError()
    {
-      ++executionCounter_;
-      if( ( executionCounter_ - uint_c(1) ) % checkFrequency_ != 0 )
-         return;
+      Vector3<real_t> midPoint( domainLength_ * real_t(0.5), domainWidth_ * real_t(0.5), domainWidth_ * real_t(0.5) );
+      Cell midCell = blocks_->getCell( midPoint );
+      CellInterval evaluationCells( midCell.x(), midCell.y(), midCell.z(), midCell.x(), blocks_->getDomainCellBB().yMax(), midCell.z() );
 
-      const FlagField_T * flagField = block->getData< FlagField_T >( flagFieldID_ );
-      const PdfField    *  pdfField = block->getData< PdfField    >(  pdfFieldID_ );
+      real_t maxError = real_t(0);
 
-      const CellInterval & cellBB = pdfField->xyzSize();
-      WALBERLA_ASSERT_EQUAL( flagField->xyzSize(), cellBB );
-
-      typename FlagField_T::flag_t mask = 0;
-      for( auto flag = cellsToCheck_.begin(); flag != cellsToCheck_.end(); ++flag )
-         mask = static_cast< typename FlagField_T::flag_t >( mask | flagField->getFlag( *flag ) );
-
-      for( cell_idx_t z = cellBB.zMin(); z <= cellBB.zMax(); ++z ) {
-         for( cell_idx_t y = cellBB.yMin(); y <= cellBB.yMax(); ++y ) {
-            for( cell_idx_t x = cellBB.xMin(); x <= cellBB.xMax(); ++x )
+      for( auto blockIt = blocks_->begin(); blockIt != blocks_->end(); ++blockIt )
+      {
+         auto pdfField = blockIt->getData< PdfField_T > ( pdfFieldID_ );
+         CellInterval intersection = blocks_->getBlockCellBB( *blockIt );
+         intersection.intersect(  evaluationCells );
+         blocks_->transformGlobalToBlockLocalCellInterval( intersection, *blockIt );
+         for( auto fieldIt = pdfField->beginSliceXYZ(intersection); fieldIt != pdfField->end(); ++fieldIt )
+         {
+            const Vector3< real_t > cellCenter = blocks_->getBlockLocalCellCenter( *blockIt, fieldIt.cell() );
+            auto diff = cellCenter - midPoint;
+            real_t radius = diff.length();
+            if( radius > radius1_ && radius < radius2_ )
             {
-               if( flagField->isPartOfMaskSet(x,y,z,mask) )
-               {
-                   WALBERLA_CHECK_LESS_EQUAL( pdfField->getVelocity( x, y, z ).sqrLength(), uMax_ );
-               }
+               real_t velSimulation = pdfField->getVelocity( fieldIt.cell() )[2];
+               real_t velAnalytical = getAnalyticalVelocity( radius );
+               real_t relVelDiff = std::fabs(velSimulation - velAnalytical) / std::fabs(velAnalytical);
+               maxError = std::max( maxError, relVelDiff );
             }
          }
       }
-   }
-
-private:
-
-         uint_t executionCounter_;
-   const uint_t checkFrequency_;
-
-   const Set< FlagUID > cellsToCheck_;
-
-   const ConstBlockDataID flagFieldID_;
-   const ConstBlockDataID  pdfFieldID_;
-
-   const real_t uMax_;
-
-}; // VelocityCheck
-
-
-
-class ObstacleLocationCheck
-{
-public:
-
-   ObstacleLocationCheck( const shared_ptr< StructuredBlockStorage > & blocks,
-                          const BlockDataID & bodyStorageID,
-                          const AABB & aabb, const real_t additionalSpace )
-   :  blocks_( blocks ), bodyStorageID_( bodyStorageID ),
-      aabb_( aabb.xMin() - additionalSpace, aabb.yMin() - additionalSpace, aabb.zMin() - additionalSpace,
-             aabb.xMax() + additionalSpace, aabb.yMax() + additionalSpace, aabb.zMax() + additionalSpace ) {}
-
-   void operator()()
-   {
-      for( auto blockIt = blocks_->begin(); blockIt != blocks_->end(); ++blockIt )
+      WALBERLA_MPI_SECTION()
       {
-         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID_); bodyIt != pe::BodyIterator::end(); ++bodyIt )
-         {
-            if( bodyIt->isFixed() || !bodyIt->isFinite() )
-                continue;
-
-            WALBERLA_CHECK( aabb_.contains( bodyIt->getPosition()[0], bodyIt->getPosition()[1], bodyIt->getPosition()[2] ) );
-         }
+         mpi::allReduceInplace( maxError, mpi::MAX );
       }
+      return maxError;
    }
-
 private:
-   shared_ptr< StructuredBlockStorage > blocks_;
-   const BlockDataID bodyStorageID_;
-   const AABB aabb_;
-
-}; // ObstacleLocationCheck
-
-
-
-class ObstacleLinearVelocityCheck
-{
-public:
-
-   ObstacleLinearVelocityCheck( const shared_ptr< StructuredBlockStorage > & blocks,
-                                const BlockDataID & bodyStorageID, const real_t uMax )
-   : blocks_( blocks ), bodyStorageID_( bodyStorageID ), uMax_( uMax * uMax )
-   {}
-
-   void operator()()
+   real_t getAnalyticalVelocity( real_t r )
    {
-      for( auto blockIt = blocks_->begin(); blockIt != blocks_->end(); ++blockIt )
-      {
-         for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID_); bodyIt != pe::BodyIterator::end(); ++bodyIt )
-         {
-            if( bodyIt->isFixed() || !bodyIt->isFinite() )
-               continue;
-
-            const auto & u = bodyIt->getLinearVel();
-            WALBERLA_CHECK_LESS_EQUAL( (u[0]*u[0] + u[1]*u[1] + u[2]*u[2]), uMax_ );
-
-         }
-      }
+      real_t radius1sqr = radius1_ * radius1_;
+      real_t radius2sqr = radius2_ * radius2_;
+      real_t a = ( angularVel2_ * radius2sqr - angularVel1_ * radius1sqr ) / ( radius2sqr - radius1sqr );
+      real_t b = ( angularVel1_ - angularVel2_ ) * radius1sqr * radius2sqr / ( radius2sqr - radius1sqr );
+      real_t velInAgularDir = a * r + b / r;
+      return velInAgularDir;
    }
 
-private:
    shared_ptr< StructuredBlockStorage > blocks_;
-   const BlockDataID bodyStorageID_;
-   const real_t uMax_;
-
-}; // ObstacleLinearVelocityCheck
-
-
+   const BlockDataID pdfFieldID_;
+   real_t radius1_, radius2_, angularVel1_, angularVel2_, domainLength_, domainWidth_;
+};
 
 /////////////////////////////////////
 // BOUNDARY HANDLING CUSTOMIZATION //
@@ -253,8 +173,8 @@ class MyBoundaryHandling
 {
 public:
 
-   MyBoundaryHandling( const BlockDataID & flagFieldID, const BlockDataID & pdfFieldID, const BlockDataID & bodyFieldID, const real_t velocity ) :
-      flagFieldID_( flagFieldID ), pdfFieldID_( pdfFieldID ), bodyFieldID_( bodyFieldID ), velocity_( velocity ) {}
+   MyBoundaryHandling( const BlockDataID & flagFieldID, const BlockDataID & pdfFieldID, const BlockDataID & bodyFieldID ) :
+      flagFieldID_( flagFieldID ), pdfFieldID_( pdfFieldID ), bodyFieldID_( bodyFieldID ) {}
 
    BoundaryHandling_T * operator()( IBlock* const block, const StructuredBlockStorage* const storage ) const;
 
@@ -264,8 +184,6 @@ private:
    const BlockDataID  pdfFieldID_;
    const BlockDataID bodyFieldID_;
 
-   const real_t velocity_;
-
 }; // class MyBoundaryHandling
 
 
@@ -282,11 +200,7 @@ BoundaryHandling_T * MyBoundaryHandling::operator()( IBlock * const block, const
    const auto fluid = flagField->flagExists( Fluid_Flag ) ? flagField->getFlag( Fluid_Flag ) : flagField->registerFlag( Fluid_Flag );
 
    BoundaryHandling_T * handling = new BoundaryHandling_T( "cf boundary handling", flagField, fluid,
-         boost::tuples::make_tuple( NoSlip_T( "NoSlip", NoSlip_Flag, pdfField ),
-                                       UBB_T( "UBB", UBB_Flag, pdfField, velocity_, real_c(0), real_c(0) ),
-                                        MO_T( "MO", MO_Flag, pdfField, flagField, bodyField, fluid, *storage, *block ) ) );
-
-   const auto ubb = flagField->getFlag( UBB_Flag );
+         boost::tuples::make_tuple( MO_T( "MO", MO_Flag, pdfField, flagField, bodyField, fluid, *storage, *block ) ) );
 
    CellInterval domainBB = storage->getDomainCellBB();
    domainBB.xMin() -= cell_idx_c( FieldGhostLayers );
@@ -295,29 +209,9 @@ BoundaryHandling_T * MyBoundaryHandling::operator()( IBlock * const block, const
    domainBB.yMin() -= cell_idx_c( FieldGhostLayers );
    domainBB.yMax() += cell_idx_c( FieldGhostLayers );
 
-   // SOUTH
-   CellInterval south( domainBB.xMin(), domainBB.yMin(), domainBB.zMin(), domainBB.xMax(), domainBB.yMin(), domainBB.zMax() );
-   storage->transformGlobalToBlockLocalCellInterval( south, *block );
-   handling->forceBoundary( ubb, south );
-
-   // NORTH
-   CellInterval north( domainBB.xMin(), domainBB.yMax(), domainBB.zMin(), domainBB.xMax(), domainBB.yMax(), domainBB.zMax() );
-   storage->transformGlobalToBlockLocalCellInterval( north, *block );
-   handling->forceBoundary( ubb, north );
-
    domainBB.zMin() -= cell_idx_c( FieldGhostLayers );
    domainBB.zMax() += cell_idx_c( FieldGhostLayers );
 
-   // BOTTOM
-   CellInterval bottom( domainBB.xMin(), domainBB.yMin(), domainBB.zMin(), domainBB.xMax(), domainBB.yMax(), domainBB.zMin() );
-   storage->transformGlobalToBlockLocalCellInterval( bottom, *block );
-   handling->forceBoundary( ubb, bottom );
-
-   // TOP
-   CellInterval top( domainBB.xMin(), domainBB.yMin(), domainBB.zMax(), domainBB.xMax(), domainBB.yMax(), domainBB.zMax() );
-   storage->transformGlobalToBlockLocalCellInterval( top, *block );
-   handling->forceBoundary( ubb, top );
-
    handling->fillWithDomain( FieldGhostLayers );
 
    return handling;
@@ -327,7 +221,10 @@ BoundaryHandling_T * MyBoundaryHandling::operator()( IBlock * const block, const
 //////////
 // MAIN //
 //////////
-
+/*
+ * This test features a so-called Taylor-Couette flow, i.e. a Couette flow between two rotating, coaxial cylinders
+ * For such a setup, the velocity in angular direction can be calculated analytically.
+ */
 int main( int argc, char **argv )
 {
    debug::enterTestMode();
@@ -341,46 +238,40 @@ int main( int argc, char **argv )
       return EXIT_FAILURE;
    }
 
-   bool shortrun       = false;
-   bool useFZYX        = false;
-   bool useSplitKernel = false;
-   bool fullPDFSync    = false;
-   bool vtkIO          = false;
+   bool shortrun = false;
+   uint_t vtkIOFreq = 0;
 
    for( int i = 1; i < argc; ++i )
    {
-      if( std::strcmp( argv[i], "--shortrun" ) == 0 ) { shortrun       = true; continue; }
-      if( std::strcmp( argv[i], "--fzyx" )     == 0 ) { useFZYX        = true; continue; }
-      if( std::strcmp( argv[i], "--split" )    == 0 ) { useSplitKernel = true; continue; }
-      if( std::strcmp( argv[i], "--full" )     == 0 ) { fullPDFSync    = true; continue; }
-      if( std::strcmp( argv[i], "--vtkIO" )    == 0 ) { vtkIO          = true; continue; }
+      if( std::strcmp( argv[i], "--shortrun" ) == 0 ) { shortrun = true; continue; }
+      if( std::strcmp( argv[i], "--vtkIOFreq" ) == 0 ) { vtkIOFreq = uint_c( std::atof( argv[++i] ) ); continue; }
       WALBERLA_ABORT("Unrecognized command line argument found: " << argv[i]);
    }
 
+   bool vtkIO =  (vtkIOFreq == 0 ) ? false : true;
+
    ///////////////////////////
    // SIMULATION PROPERTIES //
    ///////////////////////////
 
    const uint_t length = uint_c(20);
-   const uint_t width  = uint_c(100);
+   const uint_t width = uint_c(101);
+   if( width % 2 == 0 ) WALBERLA_ABORT("Width has to be odd, since otherwise the current implementation of the velocity evaluation does not work correctly!");
 
-   const real_t omega    = real_c(1.33);
-   const real_t velocity = real_c(0.001);
-   const real_t nu_SI    = real_c(0.00005); // kinematic viscosity - 50 times larger than water [ m^2 / s ]
-   //const real_t rho_SI   = real_c(1000);    // density of water [ kg / m^3 ]
-   const real_t L_SI     = real_c(0.04);    // length of channel [ m ]
+   const real_t omega = real_c(1.33);
 
-   const real_t nu_L  = ( real_t(1) / omega - real_c(0.5) ) / real_t(3);
-   const real_t dx_SI = L_SI / real_c( length );          // dx in [ m ]
-   const real_t dt_SI = ( nu_L * dx_SI * dx_SI ) / nu_SI; // dt in [ s ]
+   const uint_t timesteps = shortrun ? uint_c(10) : uint_c(5000);
 
-   const uint_t timesteps = shortrun ? uint_c(1000) : uint_c(150000); // -> ~10 seconds of real time
+   const real_t radius1 = real_c(width) * real_t(0.25); // radius of internal cylinder
+   const real_t radius2 = real_c(width) * real_t(0.45); // radius of external cylinder
+   const real_t angularVel1 = real_t(0.001); // angular velocity of internal cylinder
+   const real_t angularVel2 = real_t(-0.001); // angular velocity of external cylinder
 
    ///////////////////////////
    // BLOCK STRUCTURE SETUP //
    ///////////////////////////
 
-   // (200x100x100) - periodic in x-direction!
+   // (length x width x width) - periodic in x-direction!
    Vector3<uint_t> blockDist = getFactors3D( uint_c(processes), Vector3<real_t>( real_c(length), real_c(width), real_c(width) ) );
    if ( processes == 1 )
    {
@@ -408,26 +299,21 @@ int main( int argc, char **argv )
    // set up pe functionality
    shared_ptr<pe::BodyStorage>  globalBodyStorage = make_shared<pe::BodyStorage>();
    pe::SetBodyTypeIDs<BodyTypeTuple>::execute();
-
    auto bodyStorageID  = blocks->addBlockData(pe::createStorageDataHandling<BodyTypeTuple>(), "pe Body Storage");
-   auto ccdID          = blocks->addBlockData(pe::ccd::createHashGridsDataHandling( globalBodyStorage, bodyStorageID ), "CCD");
-   auto fcdID          = blocks->addBlockData(pe::fcd::createGenericFCDDataHandling<BodyTypeTuple, pe::fcd::AnalyticCollideFunctor>(), "FCD");
-
-   // set up collision response, here DEM solver
-   pe::cr::DEM cr(globalBodyStorage, blocks->getBlockStoragePointer(), bodyStorageID, ccdID, fcdID, NULL);
 
    // set up synchronization procedure
-   const real_t overlap = real_c( 1.5 ) * dx;
+   const real_t overlap = real_t( 1.5 ) * dx;
    boost::function<void(void)> syncCall = boost::bind( pe::syncShadowOwners<BodyTypeTuple>, boost::ref(blocks->getBlockForest()), bodyStorageID, static_cast<WcTimingTree*>(NULL), overlap, false );
 
    // create pe bodies
-   const auto material = pe::createMaterial( "granular", real_c(1.2), real_c(0.25), real_c(0.4), real_c(0.4), real_c(0.35), real_c(1.39e11), real_c(5.18e7), real_c(1.07e2), real_c(1.07e2) );
+   const auto material = pe::createMaterial( "granular", real_t(1.2), real_t(0.25), real_t(0.4), real_t(0.4), real_t(0.35), real_t(1.39e11), real_t(5.18e7), real_t(1.07e2), real_t(1.07e2) );
 
-   const real_t radius = real_t(10);
-   auto cb  = pe::createCylindricalBoundary( *globalBodyStorage, 0, pe::Vec3(length*real_t(0.5),50,50), 40 );
-   auto cap = pe::createCapsule( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, pe::Vec3(length * 0.5,50,50), 25, length*2, material, true, false, true );
-   cb->setAngularVel( pe::Vec3(1,0,0) * real_t(-0.01) );
-   cap->setAngularVel( pe::Vec3(1,0,0) * real_t(0.01) );
+   // instead of a cylinder, we use the capsule and make sure it extends the computational domain in x-direction to effectively get a cylinder
+   auto cap = pe::createCapsule( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, pe::Vec3(real_c(length),real_c(width),real_c(width)) * real_t(0.5), radius1, real_c(length)*real_t(2), material, true, false, true );
+   cap->setAngularVel( pe::Vec3(1,0,0) * angularVel1 );
+
+   auto cb = pe::createCylindricalBoundary( *globalBodyStorage, 0, pe::Vec3(real_c(length),real_c(width),real_c(width))*real_t(0.5), radius2 );
+   cb->setAngularVel( pe::Vec3(1,0,0) * angularVel2 );
 
    //synchronize the pe set up on all processes
    syncCall();
@@ -439,12 +325,7 @@ int main( int argc, char **argv )
    // add pdf field
    LatticeModel_T latticeModel = LatticeModel_T( lbm::collision_model::TRT::constructWithMagicNumber( omega ) );
 
-   BlockDataID pdfFieldID = useFZYX ? lbm::addPdfFieldToStorage( blocks, "pdf field (fzyx)", latticeModel,
-                                                                 Vector3< real_t >( velocity, real_t(0), real_t(0) ), real_t(1),
-                                                                 uint_t(1), field::fzyx ) :
-                                      lbm::addPdfFieldToStorage( blocks, "pdf field (zyxf)", latticeModel,
-                                                                 Vector3< real_t >( velocity, real_t(0), real_t(0) ), real_t(1),
-                                                                 uint_t(1), field::zyxf );
+   BlockDataID pdfFieldID = lbm::addPdfFieldToStorage( blocks, "pdf field (zyxf)", latticeModel, Vector3< real_t >( real_t(0) ), real_t(1), uint_t(1), field::zyxf );
 
    // add flag field
    BlockDataID flagFieldID = field::addFlagFieldToStorage<FlagField_T>( blocks, "flag field" );
@@ -454,13 +335,10 @@ int main( int argc, char **argv )
 
    // add boundary handling & initialize outer domain boundaries (moving walls on the front, back, top, and bottom plane)
    BlockDataID boundaryHandlingID = blocks->addStructuredBlockData< BoundaryHandling_T >(
-              MyBoundaryHandling( flagFieldID, pdfFieldID, bodyFieldID, real_c(6) * velocity ), "boundary handling" );
+              MyBoundaryHandling( flagFieldID, pdfFieldID, bodyFieldID ), "boundary handling" );
 
    // map pe bodies into the LBM simulation
-   // global bodies act as no-slip obstacles and are not treated by the fluid-solid coupling
-   //pe_coupling::mapGlobalBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, *globalBodyStorage, NoSlip_Flag, false );
-
-   // moving bodies are handled by the momentum exchange method
+   // moving bodies are handled by the momentum exchange method, thus act here as velocity boundary condition
    pe_coupling::mapMovingGlobalBodies< BoundaryHandling_T >( blocks, boundaryHandlingID, *globalBodyStorage, bodyFieldID, MO_Flag );
 
    ///////////////
@@ -471,18 +349,9 @@ int main( int argc, char **argv )
 
    // setup of the LBM communication for synchronizing the pdf field between neighboring blocks
    boost::function< void () > commFunction;
-   if( fullPDFSync )
-   {
-      blockforest::communication::UniformBufferedScheme< stencil::D3Q27 > scheme( blocks );
-      scheme.addPackInfo( make_shared< field::communication::PackInfo< PdfField_T > >( pdfFieldID ) );
-      commFunction = scheme;
-   }
-   else
-   {
-      blockforest::communication::UniformBufferedScheme< Stencil_T > scheme( blocks );
-      scheme.addPackInfo( make_shared< lbm::PdfFieldPackInfo< LatticeModel_T > >( pdfFieldID ) );
-      commFunction = scheme;
-   }
+   blockforest::communication::UniformBufferedScheme< Stencil_T > scheme( blocks );
+   scheme.addPackInfo( make_shared< lbm::PdfFieldPackInfo< LatticeModel_T > >( pdfFieldID ) );
+   commFunction = scheme;
 
    // add LBM communication function and boundary handling sweep
    timeloop.add()
@@ -490,10 +359,7 @@ int main( int argc, char **argv )
       << Sweep( BoundaryHandling_T::getBlockSweep( boundaryHandlingID ), "Boundary Handling" );
 
    // LBM stream collide sweep
-   if( useSplitKernel )
-      timeloop.add() << Sweep( lbm::SplitPureSweep< LatticeModel_T >( pdfFieldID ), "LBM SPLIT PURE" );
-   else
-      timeloop.add() << Sweep( makeSharedSweep( lbm::makeCellwiseSweep< LatticeModel_T, FlagField_T >( pdfFieldID, flagFieldID, Fluid_Flag ) ), "LBM DEFAULT" );
+   timeloop.add() << Sweep( makeSharedSweep( lbm::makeCellwiseSweep< LatticeModel_T, FlagField_T >( pdfFieldID, flagFieldID, Fluid_Flag ) ), "LBM DEFAULT" );
 
    ////////////////
    // VTK OUTPUT //
@@ -503,11 +369,6 @@ int main( int argc, char **argv )
    {
       const uint_t writeFrequency = 10; //vtk::determineWriteFrequency( dt_SI, uint_c(30) );
 
-      // spheres
-      auto bodyVtkOutput   = make_shared<pe::DefaultBodyVTKOutput>( bodyStorageID, blocks->getBlockStorage() );
-      auto bodyVTK   = vtk::createVTKOutput_PointData( bodyVtkOutput, "bodies", writeFrequency );
-      timeloop.addFuncAfterTimeStep( vtk::writeFiles( bodyVTK ), "VTK (sphere data)" );
-
       // flag field (written only once in the first time step, ghost layers are also written)
       auto flagFieldVTK = vtk::createVTKOutput_BlockData( blocks, "flag_field", timesteps, FieldGhostLayers );
       flagFieldVTK->addCellDataWriter( make_shared< field::VTKWriter< FlagField_T > >( flagFieldID, "FlagField" ) );
@@ -539,13 +400,22 @@ int main( int argc, char **argv )
 
    WcTimingPool timeloopTiming;
    timeloop.run( timeloopTiming );
-   timeloopTiming.logResultOnRoot();
+
+   VelocityChecker velChecker(blocks, pdfFieldID, radius1, radius2, angularVel1, angularVel2, real_c(length), real_c(width) );
+   real_t maxError = velChecker.getMaximumRelativeError();
+   if( !shortrun )
+   {
+      timeloopTiming.logResultOnRoot();
+      WALBERLA_LOG_RESULT_ON_ROOT("Maximum relative error in velocity in angular direction: " << maxError );
+      // error has to be below 10%
+      WALBERLA_CHECK_LESS( maxError, real_t(0.1) );
+   }
 
    return 0;
 }
 
-} //namespace periodic_particle_channel_mem_pe
+} //namespace taylor_coette_flow_mem
 
 int main( int argc, char **argv ){
-   periodic_particle_channel_mem_pe::main(argc, argv);
+   taylor_coette_flow_mem::main(argc, argv);
 }