diff --git a/src/pde/CMakeLists.txt b/src/pde/CMakeLists.txt
index 386a98c37d55db966c79de92b58eda5331724fdb..0b013dda8b65a581f5f5b99e300f84b1f2a0924e 100644
--- a/src/pde/CMakeLists.txt
+++ b/src/pde/CMakeLists.txt
@@ -4,7 +4,8 @@
 #
 ###################################################################################################
 
-waLBerla_add_module( DEPENDS blockforest 
+waLBerla_add_module( DEPENDS blockforest
+                             boundary 
                              core
                              domain_decomposition
                              field
diff --git a/src/pde/boundary/Dirichlet.h b/src/pde/boundary/Dirichlet.h
new file mode 100644
index 0000000000000000000000000000000000000000..f5543fdbe95ca04f90bf559604984c1f268a1d3f
--- /dev/null
+++ b/src/pde/boundary/Dirichlet.h
@@ -0,0 +1,339 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file Dirichlet.h
+//! \ingroup pde
+//! \author Dominik Bartuschat <dominik.bartuschat@fau.de>
+//! \author Michael Kuron <mkuron@icp.uni-stuttgart.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "field/Field.h"
+
+#include "boundary/Boundary.h"
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "core/config/Config.h"
+#include "core/debug/Debug.h"
+#include "core/logging/Logging.h"
+
+#include "field/FlagUID.h"
+
+#include "stencil/Directions.h"
+
+#include <vector>
+#include <limits>
+
+
+namespace walberla {
+namespace pde {
+
+
+
+//**********************************************************************************************************************
+/*!
+*   \brief Dirichlet boundary handling for PDEs
+*
+*   This boundary condition imposes a Dirichlet condition with arbitrary values on a PDE.
+*   It does so by modifying the right-hand side and the stencil field.
+*   Anything that has internal copies of the stencil field (e.g. the multigrid V-cycle's coarsened stencils) is
+*   responsible for updating its copies when boundary conditions are changed.
+*
+*   \tparam Stencil_T The stencil used for the discrete operator
+*   \tparam flag_t The integer type used by the flag field
+*/
+//**********************************************************************************************************************
+template< typename Stencil_T, typename flag_t >
+class Dirichlet : public Boundary< flag_t >
+{
+
+   typedef GhostLayerField< real_t, 1 > Field_T;
+   typedef GhostLayerField< real_t, Stencil_T::Size >  StencilField_T;
+
+public:
+
+   static const bool threadsafe = false;
+
+   class DirichletBC : public BoundaryConfiguration {
+   public:
+             DirichletBC( const real_t & _dirichletBC ) : dirichletBC_( _dirichletBC ) {}
+      inline DirichletBC( const Config::BlockHandle & config );
+
+      const real_t & dirichletBC() const { return dirichletBC_; }
+      real_t & dirichletBC() { return dirichletBC_; }
+
+   private:
+
+      real_t dirichletBC_;
+   };
+
+   static shared_ptr<DirichletBC> createConfiguration( const Config::BlockHandle & config ) { return make_shared<DirichletBC>( config ); }
+
+
+
+  //*******************************************************************************************************************
+  /*! Creates a Dirichlet boundary
+   * \param boundaryUID the UID of the boundary condition
+   * \param uid the UID of the flag that marks cells with this boundary condition
+   * \param rhsField pointer to the right-hand side field, which will be adapted by this boundary condition
+   * \param stencilField pointer to the operator stencil field. It should contain the stencil weights that don't take
+   *                     into account the boundary conditions.
+   * \param adaptBCStencilField pointer to the operator stencil field that will be adapted by this boundary condition. 
+   *                            Initially, this field needs to contain the same values as \p stencilField.
+   *                            This is the stencil field that should be passed to the actual PDE solver.
+   * \param flagField pointer to the flag field
+   *******************************************************************************************************************/
+   inline Dirichlet( const BoundaryUID & boundaryUID, const FlagUID & uid, Field_T* const rhsField, const StencilField_T* const stencilField,
+                     StencilField_T* const adaptBCStencilField, FlagField<flag_t> * const flagField );
+
+   void pushFlags( std::vector< FlagUID > & uids ) const { uids.push_back( uid_ ); }
+
+   void beforeBoundaryTreatment() const {}
+   void afterBoundaryTreatment();
+
+   template< typename Buffer_T >
+   inline void packCell( Buffer_T & buffer, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z ) const;
+
+   template< typename Buffer_T >
+   inline void registerCell( Buffer_T & buffer, const flag_t, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z );
+
+   inline void registerCell( const flag_t, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z, const BoundaryConfiguration & dirichletBC );
+   inline void registerCells( const flag_t, const CellInterval & cells, const BoundaryConfiguration & dirichletBC );
+   template< typename CellIterator >
+   inline void registerCells( const flag_t, const CellIterator & begin, const CellIterator & end, const BoundaryConfiguration & dirichletBC );
+
+   void unregisterCell( const flag_t, const cell_idx_t nx, const cell_idx_t ny, const cell_idx_t nz );
+
+   inline void treatDirection( const cell_idx_t  x, const cell_idx_t  y, const cell_idx_t  z, const stencil::Direction dir,
+                               const cell_idx_t nx, const cell_idx_t ny, const cell_idx_t nz, const flag_t mask );
+
+   inline const real_t & getValue( const cell_idx_t x, cell_idx_t y, cell_idx_t z ) const { return dirichletBC_->get(x,y,z); }
+
+private:
+
+   const FlagUID uid_;
+   const flag_t formerDirichlet_;
+   uint_t numDirtyCells_;
+
+   Field_T* const                rhsField_;
+   shared_ptr< Field_T >         dirichletBC_;
+   const StencilField_T * const  stencilField_;
+   StencilField_T * const        adaptBCStencilField_;
+   FlagField<flag_t> * const     flagField_;
+
+}; // class Dirichlet
+
+
+
+template< typename Stencil_T, typename flag_t >
+inline Dirichlet< Stencil_T, flag_t >::DirichletBC::DirichletBC( const Config::BlockHandle & config  )
+{
+   dirichletBC_ = ( config && config.isDefined( "val" ) ) ? config.getParameter<real_t>( "val" ) : real_c(0.0);
+}
+
+
+
+template< typename Stencil_T, typename flag_t >
+inline Dirichlet< Stencil_T, flag_t >::Dirichlet( const BoundaryUID & boundaryUID, const FlagUID & uid, Field_T* const rhsField, const StencilField_T* const stencilField, StencilField_T* const adaptBCStencilField, FlagField<flag_t> * const flagField ) :
+   Boundary<flag_t>( boundaryUID ), uid_( uid ), formerDirichlet_ (flagField->getOrRegisterFlag("FormerDirichlet")), numDirtyCells_(std::numeric_limits<uint_t>::max()), rhsField_( rhsField ), stencilField_ ( stencilField ), adaptBCStencilField_ ( adaptBCStencilField ), flagField_ (flagField)
+{
+   WALBERLA_ASSERT_NOT_NULLPTR( rhsField_ );
+   WALBERLA_ASSERT_NOT_NULLPTR( stencilField_ );
+   WALBERLA_ASSERT_NOT_NULLPTR( adaptBCStencilField_ );
+   WALBERLA_ASSERT_NOT_NULLPTR( flagField_ );
+
+   WALBERLA_ASSERT_EQUAL( rhsField_->xyzSize(), stencilField_->xyzSize() );
+#ifndef NDEBUG
+   WALBERLA_FOR_ALL_CELLS_XYZ( stencilField_,
+      for( auto dir = Stencil_T::begin(); dir != Stencil_T::end(); ++dir )
+      {
+         WALBERLA_ASSERT_IDENTICAL(stencilField_->get(x,y,z, dir.toIdx()), adaptBCStencilField_->get(x,y,z, dir.toIdx()));
+      }
+   )
+#endif
+
+   dirichletBC_ = make_shared< Field_T >( rhsField_->xSize(), rhsField_->ySize(), rhsField_->zSize(), uint_t(1), field::zyxf );
+
+}
+
+
+template< typename Stencil_T, typename flag_t >
+void Dirichlet< Stencil_T, flag_t >::afterBoundaryTreatment() {
+
+   if (numDirtyCells_>0 && numDirtyCells_ != std::numeric_limits<uint_t>::max()) {
+      WALBERLA_LOG_WARNING("De-registering cells requires re-running Galerkin coarsening");
+   }
+
+   numDirtyCells_=0;
+}
+
+template< typename Stencil_T, typename flag_t >
+template< typename Buffer_T >
+inline void Dirichlet< Stencil_T, flag_t >::packCell( Buffer_T & buffer, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z ) const
+{
+   buffer << dirichletBC_->get( x, y, z );
+}
+
+
+
+template< typename Stencil_T, typename flag_t >
+template< typename Buffer_T >
+inline void Dirichlet< Stencil_T, flag_t >::registerCell( Buffer_T & buffer, const flag_t, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z )
+{
+   buffer >> dirichletBC_->get( x, y, z );
+   if( flagField_->isFlagSet( x, y, z, formerDirichlet_ ) )
+   {
+      flagField_->removeFlag( x, y, z, formerDirichlet_ );
+      --numDirtyCells_;
+   }
+}
+
+
+
+template< typename Stencil_T, typename flag_t >
+inline void Dirichlet< Stencil_T, flag_t >::registerCell( const flag_t, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z,
+                                                                                       const BoundaryConfiguration & dirichletBC )
+{
+   WALBERLA_ASSERT_EQUAL( dynamic_cast< const DirichletBC * >( &dirichletBC ), &dirichletBC );
+   WALBERLA_ASSERT_NOT_NULLPTR( dirichletBC_ );
+
+   const DirichletBC & val = dynamic_cast< const DirichletBC & >( dirichletBC );
+
+   dirichletBC_->get( x, y, z ) = val.dirichletBC();
+
+   if( flagField_->isFlagSet( x, y, z, formerDirichlet_ ) )
+   {
+      flagField_->removeFlag( x, y, z, formerDirichlet_ );
+      --numDirtyCells_;
+   }
+
+}
+
+
+
+template< typename Stencil_T, typename flag_t >
+inline void Dirichlet< Stencil_T, flag_t >::registerCells( const flag_t, const CellInterval & cells, const BoundaryConfiguration & dirichletBC )
+{
+   WALBERLA_ASSERT_EQUAL( dynamic_cast< const DirichletBC * >( &dirichletBC ), &dirichletBC );
+   WALBERLA_ASSERT_NOT_NULLPTR( dirichletBC_ );
+
+   const DirichletBC & val = dynamic_cast< const DirichletBC & >( dirichletBC );
+
+   for( auto cell = dirichletBC_->beginSliceXYZ( cells ); cell != dirichletBC_->end(); ++cell ) {
+      *cell = val.dirichletBC();
+
+      if( flagField_->isFlagSet( cell.cell(), formerDirichlet_ ) )
+      {
+         flagField_->removeFlag( cell.cell(), formerDirichlet_ );
+         --numDirtyCells_;
+      }
+   }
+}
+
+
+
+template< typename Stencil_T, typename flag_t >
+template< typename CellIterator >
+inline void Dirichlet< Stencil_T, flag_t >::registerCells( const flag_t, const CellIterator & begin, const CellIterator & end,
+                                                                                        const BoundaryConfiguration & dirichletBC )
+{
+   WALBERLA_ASSERT_EQUAL( dynamic_cast< const DirichletBC * >( &dirichletBC ), &dirichletBC );
+   WALBERLA_ASSERT_NOT_NULLPTR( dirichletBC_ );
+
+   const DirichletBC & val = dynamic_cast< const DirichletBC & >( dirichletBC );
+
+   for( auto cell = begin; cell != end; ++cell )
+   {
+      dirichletBC_->get( cell->x(), cell->y(), cell->z() ) = val.dirichletBC();
+
+      if( flagField_->isFlagSet( cell->cell(), formerDirichlet_ ) )
+      {
+         flagField_->removeFlag( cell->cell(), formerDirichlet_ );
+         --numDirtyCells_;
+      }
+   }
+
+}
+
+// Remark: This unregister function works only properly for D3Q7 stencils and convex domains!
+template< typename Stencil_T, typename flag_t >
+void Dirichlet< Stencil_T, flag_t >::unregisterCell( const flag_t, const cell_idx_t nx, const cell_idx_t ny, const cell_idx_t nz )
+{
+   flagField_->addFlag( nx,ny,nz, formerDirichlet_ );
+   ++numDirtyCells_;
+
+   Cell boundaryCell( nx, ny, nz );
+
+   // Set stencil previously adapted to Dirichlet BC back to un-adapted state
+   for( auto d = Stencil_T::begin(); d != Stencil_T::end(); ++d )
+   {
+      Cell domainCell = boundaryCell - *d;
+      if( adaptBCStencilField_->isInInnerPart( domainCell ) )
+      {
+//         WALBERLA_LOG_DEVEL("Undo Dirichlet treatment at: " << domainCell );
+
+         // restore original non-center stencil entry of neighboring non-boundary cell
+         adaptBCStencilField_->get( domainCell, d.toIdx() ) = stencilField_->get( domainCell, d.toIdx() );
+
+         // restore original center stencil entry of neighboring non-boundary cell
+         adaptBCStencilField_->get( domainCell, Stencil_T::idx[stencil::C] ) += adaptBCStencilField_->get( domainCell, d.toIdx() );
+      }
+   }
+}
+
+
+//*************************************************************************************************
+/*! \brief Treat one direction by adapting RHS according to Dirichlet boundary value.
+ *
+ */
+
+template< typename Stencil_T, typename flag_t >
+#ifndef NDEBUG
+inline void Dirichlet< Stencil_T, flag_t >::treatDirection( const cell_idx_t  x, const cell_idx_t  y, const cell_idx_t  z, const stencil::Direction dir,
+                                                                                         const cell_idx_t nx, const cell_idx_t ny, const cell_idx_t nz, const flag_t mask )
+#else
+inline void Dirichlet< Stencil_T, flag_t >::treatDirection( const cell_idx_t  x, const cell_idx_t  y, const cell_idx_t  z, const stencil::Direction dir,
+                                                                                         const cell_idx_t nx, const cell_idx_t ny, const cell_idx_t nz, const flag_t /*mask*/ )
+#endif
+{
+
+   WALBERLA_ASSERT_EQUAL( nx, x + cell_idx_c( stencil::cx[ dir ] ) );
+   WALBERLA_ASSERT_EQUAL( ny, y + cell_idx_c( stencil::cy[ dir ] ) );
+   WALBERLA_ASSERT_EQUAL( nz, z + cell_idx_c( stencil::cz[ dir ] ) );
+   WALBERLA_ASSERT_UNEQUAL( mask & this->mask_, numeric_cast<flag_t>(0) );
+   WALBERLA_ASSERT_EQUAL( mask & this->mask_, this->mask_ ); // only true if "this->mask_" only contains one single flag, which is the case for the
+                                                             // current implementation of this boundary condition (Dirichlet)
+
+   // Adapt RHS to Dirichlet BC //
+   rhsField_->get( x, y, z ) -= stencilField_->get( x, y, z, Stencil_T::idx[dir] ) * real_t(2) * dirichletBC_->get( nx, ny, nz ); // possibly utilize that off-diagonal entries -1 anyway
+
+   // Adapt Stencils to BCs (former adaptStencilsBC) //
+   // Only required if any new BC cell was added or the BC type of any former BC cell has been changed
+   if (numDirtyCells_>0) {
+
+      // here not thread-safe! (workaround: mutex when adapting central entry)
+      adaptBCStencilField_->get( x, y, z, Stencil_T::idx[stencil::C] ) -= adaptBCStencilField_->get( x, y, z, Stencil_T::idx[dir] );
+      adaptBCStencilField_->get( x, y, z, Stencil_T::idx[dir] ) = 0;
+
+   }
+
+}
+
+
+} // namespace pde
+} // namespace walberla
diff --git a/src/pde/boundary/Dirichlet_withDx.h b/src/pde/boundary/Dirichlet_withDx.h
new file mode 100644
index 0000000000000000000000000000000000000000..c119206926c31891a3d17b73c52e02ec1f92ee3b
--- /dev/null
+++ b/src/pde/boundary/Dirichlet_withDx.h
@@ -0,0 +1,302 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file Dirichlet.h
+//! \ingroup pde
+//! \author Dominik Bartuschat <dominik.bartuschat@fau.de>
+//! \author Michael Kuron <mkuron@icp.uni-stuttgart.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "field/Field.h"
+
+#include "boundary/Boundary.h"
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "core/config/Config.h"
+#include "core/debug/Debug.h"
+#include "core/logging/Logging.h"
+
+#include "field/FlagUID.h"
+
+#include "stencil/Directions.h"
+
+#include <vector>
+#include <array>
+
+
+namespace walberla {
+namespace pde {
+
+
+
+template< typename Stencil_T, typename flag_t >
+class Dirichlet : public Boundary< flag_t >
+{
+
+   typedef GhostLayerField< real_t, 1 > Field_T;
+   typedef GhostLayerField< real_t, Stencil_T::Size >  StencilField_T;
+
+public:
+
+   static const bool threadsafe = false;
+
+   class DirichletBC : public BoundaryConfiguration {
+   public:
+             DirichletBC( const real_t & _dirichletBC ) : dirichletBC_( _dirichletBC ) {}
+      inline DirichletBC( const Config::BlockHandle & config );
+
+      const real_t & dirichletBC() const { return dirichletBC_; }
+      real_t & dirichletBC() { return dirichletBC_; }
+
+   private:
+
+      real_t dirichletBC_;
+   };
+
+   static shared_ptr<DirichletBC> createConfiguration( const Config::BlockHandle & config ) { return make_shared<DirichletBC>( config ); }
+
+
+
+   inline Dirichlet( const BoundaryUID & boundaryUID, const FlagUID & uid, Field_T* const rhsField, const StencilField_T* const stencilField,
+                     StencilField_T* const adaptBCStencilField, FlagField<flag_t> * const flagField, const StructuredBlockStorage& blocks );
+
+   void pushFlags( std::vector< FlagUID > & uids ) const { uids.push_back( uid_ ); }
+
+   void beforeBoundaryTreatment() const {}
+   void afterBoundaryTreatment();
+
+   template< typename Buffer_T >
+   inline void packCell( Buffer_T & buffer, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z ) const;
+
+   template< typename Buffer_T >
+   inline void registerCell( Buffer_T & buffer, const flag_t, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z );
+
+   inline void registerCell( const flag_t, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z, const BoundaryConfiguration & dirichletBC );
+   inline void registerCells( const flag_t, const CellInterval & cells, const BoundaryConfiguration & dirichletBC );
+   template< typename CellIterator >
+   inline void registerCells( const flag_t, const CellIterator & begin, const CellIterator & end, const BoundaryConfiguration & dirichletBC );
+
+   void unregisterCell( const flag_t, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z );
+
+   inline void treatDirection( const cell_idx_t  x, const cell_idx_t  y, const cell_idx_t  z, const stencil::Direction dir,
+                               const cell_idx_t nx, const cell_idx_t ny, const cell_idx_t nz, const flag_t mask );
+
+   inline const real_t & getValue( const cell_idx_t x, cell_idx_t y, cell_idx_t z ) const { return dirichletBC_->get(x,y,z); }
+
+private:
+
+   const FlagUID uid_;
+   const flag_t formerDirichlet_;
+   uint_t numDirtyCells_;
+
+   std::array<real_t, Stencil_T::Q> dx_;
+   Field_T* const                rhsField_;
+   shared_ptr< Field_T >         dirichletBC_;
+   const StencilField_T * const  stencilField_;
+   StencilField_T * const        adaptBCStencilField_;
+   FlagField<flag_t> * const     flagField_;
+
+}; // class Dirichlet
+
+
+
+template< typename Stencil_T, typename flag_t >
+inline Dirichlet< Stencil_T, flag_t >::DirichletBC::DirichletBC( const Config::BlockHandle & config  )
+{
+   dirichletBC_ = ( config && config.isDefined( "val" ) ) ? config.getParameter<real_t>( "val" ) : real_c(0.0);
+}
+
+
+
+template< typename Stencil_T, typename flag_t >
+inline Dirichlet< Stencil_T, flag_t >::Dirichlet( const BoundaryUID & boundaryUID, const FlagUID & uid, Field_T* const rhsField, const StencilField_T* const stencilField, StencilField_T* const adaptBCStencilField, FlagField<flag_t> * const flagField, const StructuredBlockStorage& blocks ) :
+   Boundary<flag_t>( boundaryUID ), uid_( uid ), formerDirichlet_ (flagField->getOrRegisterFlag("FormerDirichlet")), numDirtyCells_(0), rhsField_( rhsField ), stencilField_ ( stencilField ), adaptBCStencilField_ ( adaptBCStencilField ), flagField_ (flagField)
+{
+   WALBERLA_ASSERT_NOT_NULLPTR( rhsField_ );
+   WALBERLA_ASSERT_NOT_NULLPTR( stencilField_ );
+
+   WALBERLA_ASSERT_EQUAL( rhsField_->xyzSize(), stencilField_->xyzSize() );
+
+   dirichletBC_ = make_shared< Field_T >( rhsField_->xSize(), rhsField_->ySize(), rhsField_->zSize(), uint_t(1), field::zyxf );
+
+   for(auto d = Stencil_T::beginNoCenter(); d != Stencil_T::end(); ++d ){
+      dx_[d.toIdx()] = Vector3<real_t>(stencil::cx[d.toIdx()]*blocks.dx(), stencil::cy[d.toIdx()]*blocks.dy(), stencil::cz[d.toIdx()]*blocks.dz() ).sqrLength();
+      WALBERLA_LOG_DEVEL("dx in direction " << d.dirString() << ":" << dx_[d.toIdx()]);
+   }
+
+
+}
+
+
+template< typename Stencil_T, typename flag_t >
+void Dirichlet< Stencil_T, flag_t >::afterBoundaryTreatment() {
+
+   if (numDirtyCells_>0) {
+      WALBERLA_LOG_WARNING("De-registering cells requires re-running Galerkin coarsening");
+   }
+
+   numDirtyCells_=0;
+}
+
+template< typename Stencil_T, typename flag_t >
+template< typename Buffer_T >
+inline void Dirichlet< Stencil_T, flag_t >::packCell( Buffer_T & buffer, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z ) const
+{
+   buffer << dirichletBC_->get( x, y, z );
+}
+
+
+
+template< typename Stencil_T, typename flag_t >
+template< typename Buffer_T >
+inline void Dirichlet< Stencil_T, flag_t >::registerCell( Buffer_T & buffer, const flag_t, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z )
+{
+   buffer >> dirichletBC_->get( x, y, z );
+   if( flagField_->isFlagSet( x, y, z, formerDirichlet_ ) )
+   {
+      flagField_->removeFlag( x, y, z, formerDirichlet_ );
+      --numDirtyCells_;
+   }
+}
+
+
+
+template< typename Stencil_T, typename flag_t >
+inline void Dirichlet< Stencil_T, flag_t >::registerCell( const flag_t, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z,
+                                                                                       const BoundaryConfiguration & dirichletBC )
+{
+   WALBERLA_ASSERT_EQUAL( dynamic_cast< const DirichletBC * >( &dirichletBC ), &dirichletBC );
+   WALBERLA_ASSERT_NOT_NULLPTR( dirichletBC_ );
+
+   const DirichletBC & val = dynamic_cast< const DirichletBC & >( dirichletBC );
+
+   dirichletBC_->get( x, y, z ) = val.dirichletBC();
+
+   if( flagField_->isFlagSet( x, y, z, formerDirichlet_ ) )
+   {
+      flagField_->removeFlag( x, y, z, formerDirichlet_ );
+      --numDirtyCells_;
+   }
+
+}
+
+
+
+template< typename Stencil_T, typename flag_t >
+inline void Dirichlet< Stencil_T, flag_t >::registerCells( const flag_t, const CellInterval & cells, const BoundaryConfiguration & dirichletBC )
+{
+   WALBERLA_ASSERT_EQUAL( dynamic_cast< const DirichletBC * >( &dirichletBC ), &dirichletBC );
+   WALBERLA_ASSERT_NOT_NULLPTR( dirichletBC_ );
+
+   const DirichletBC & val = dynamic_cast< const DirichletBC & >( dirichletBC );
+
+   for( auto cell = dirichletBC_->beginSliceXYZ( cells ); cell != dirichletBC_->end(); ++cell ) {
+      *cell = val.dirichletBC();
+
+      if( flagField_->isFlagSet( cell.cell(), formerDirichlet_ ) )
+      {
+         flagField_->removeFlag( cell.cell(), formerDirichlet_ );
+         --numDirtyCells_;
+      }
+   }
+}
+
+
+
+template< typename Stencil_T, typename flag_t >
+template< typename CellIterator >
+inline void Dirichlet< Stencil_T, flag_t >::registerCells( const flag_t, const CellIterator & begin, const CellIterator & end,
+                                                                                        const BoundaryConfiguration & dirichletBC )
+{
+   WALBERLA_ASSERT_EQUAL( dynamic_cast< const DirichletBC * >( &dirichletBC ), &dirichletBC );
+   WALBERLA_ASSERT_NOT_NULLPTR( dirichletBC_ );
+
+   const DirichletBC & val = dynamic_cast< const DirichletBC & >( dirichletBC );
+
+   for( auto cell = begin; cell != end; ++cell )
+   {
+      dirichletBC_->get( cell->x(), cell->y(), cell->z() ) = val.dirichletBC();
+
+      if( flagField_->isFlagSet( cell->cell(), formerDirichlet_ ) )
+      {
+         flagField_->removeFlag( cell->cell(), formerDirichlet_ );
+         --numDirtyCells_;
+      }
+   }
+
+}
+
+
+template< typename Stencil_T, typename flag_t >
+void Dirichlet< Stencil_T, flag_t >::unregisterCell( const flag_t, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z )
+{
+   flagField_->addFlag( x,y,z, formerDirichlet_ );
+   ++numDirtyCells_;
+
+   // Set stencil adapted to BCs back to unadapted state
+   for(auto d = Stencil_T::begin(); d != Stencil_T::end(); ++d ){
+      adaptBCStencilField_->get(x,y,z,d.toIdx()) = stencilField_->get(x,y,z,d.toIdx());
+   }
+
+}
+
+
+//*************************************************************************************************
+/*! \brief Treat one direction by adapting RHS according to Dirichlet boundary value.
+ *
+ */
+
+template< typename Stencil_T, typename flag_t >
+#ifndef NDEBUG
+inline void Dirichlet< Stencil_T, flag_t >::treatDirection( const cell_idx_t  x, const cell_idx_t  y, const cell_idx_t  z, const stencil::Direction dir,
+                                                                                         const cell_idx_t nx, const cell_idx_t ny, const cell_idx_t nz, const flag_t mask )
+#else
+inline void Dirichlet< Stencil_T, flag_t >::treatDirection( const cell_idx_t  x, const cell_idx_t  y, const cell_idx_t  z, const stencil::Direction dir,
+                                                                                         const cell_idx_t nx, const cell_idx_t ny, const cell_idx_t nz, const flag_t /*mask*/ )
+#endif
+{
+
+   WALBERLA_ASSERT_EQUAL( nx, x + cell_idx_c( stencil::cx[ dir ] ) );
+   WALBERLA_ASSERT_EQUAL( ny, y + cell_idx_c( stencil::cy[ dir ] ) );
+   WALBERLA_ASSERT_EQUAL( nz, z + cell_idx_c( stencil::cz[ dir ] ) );
+   WALBERLA_ASSERT_UNEQUAL( mask & this->mask_, numeric_cast<flag_t>(0) );
+   WALBERLA_ASSERT_EQUAL( mask & this->mask_, this->mask_ ); // only true if "this->mask_" only contains one single flag, which is the case for the
+                                                             // current implementation of this boundary condition (Dirichlet)
+
+   // Adapt RHS to Dirichlet BC //
+   rhsField_->get( x, y, z ) -= stencilField_->get( x, y, z, Stencil_T::idx[dir] ) * real_t(2) * dx_[ Stencil_T::idx[dir] ] * dirichletBC_->get( nx, ny, nz ); // possibly utilize that off-diagonal entries -1 anyway
+
+   // WALBERLA_LOG_DEVEL("Adapt RHS to Dirichlet value " << dirichletBC_->get( nx, ny, nz ) << " on cell " << Cell(x,y,z) << " for stencil entry " << stencilField_->get( x, y, z, Stencil_T::idx[dir] ) );
+
+   // Adapt Stencils to BCs (former adaptStencilsBC) //
+   // Only required if any new BC cell was added or the BC type of any former BC cell has been changed
+   if (numDirtyCells_>0) {
+
+      // here not thread-safe! (workaround: mutex when adapting central entry)
+      adaptBCStencilField_->get( x, y, z, Stencil_T::idx[stencil::C] ) -= adaptBCStencilField_->get( x, y, z, Stencil_T::idx[dir] );
+      adaptBCStencilField_->get( x, y, z, Stencil_T::idx[dir] ) = 0;
+
+   }
+
+}
+
+
+
+} // namespace pde
+} // namespace walberla
diff --git a/src/pde/boundary/Neumann.h b/src/pde/boundary/Neumann.h
index 42a23295512300ba83b74fe4c38ec19095b534da..ba5fb0e8c7e09ee039a793aee5f1623a10053250 100644
--- a/src/pde/boundary/Neumann.h
+++ b/src/pde/boundary/Neumann.h
@@ -15,16 +15,34 @@
 //
 //! \file Neumann.h
 //! \ingroup pde
+//! \author Dominik Bartuschat <dominik.bartuschat@fau.de>
+//! \author Michael Kuron <mkuron@icp.uni-stuttgart.de>
 //! \author Florian Schornbaum <florian.schornbaum@fau.de>
 //
 //======================================================================================================================
 
 #pragma once
 
+#include "field/Field.h"
+
+#include "boundary/Boundary.h"
+
+#include "core/DataTypes.h"
 #include "core/cell/CellInterval.h"
+#include "core/config/Config.h"
+#include "core/debug/Debug.h"
+#include "core/logging/Logging.h"
+
 #include "domain_decomposition/StructuredBlockStorage.h"
+
+#include "field/FlagUID.h"
+
+#include "stencil/Directions.h"
 #include "stencil/D3Q6.h"
 
+#include <vector>
+#include <limits>
+#include <array>
 
 
 namespace walberla {
@@ -207,5 +225,305 @@ void NeumannDomainBoundary< PdeField >::apply( PdeField * p, const CellInterval
 
 
 
+//**********************************************************************************************************************
+/*!
+*   \brief Neumann boundary handling for PDEs
+*
+*   This boundary condition imposes a Neumann condition with arbitrary values on a PDE.
+*   It does so by modifying the right-hand side and the stencil field.
+*   Anything that has internal copies of the stencil field (e.g. the multigrid V-cycle's coarsened stencils) is
+*   responsible for updating its copies when boundary conditions are changed.
+*
+*   \tparam Stencil_T The stencil used for the discrete operator
+*   \tparam flag_t The integer type used by the flag field
+*/
+//**********************************************************************************************************************
+template< typename Stencil_T, typename flag_t >
+class Neumann : public Boundary< flag_t >
+{
+
+   typedef GhostLayerField< real_t, 1 > Field_T;
+   typedef GhostLayerField< real_t, Stencil_T::Size >  StencilField_T;
+
+public:
+
+   static const bool threadsafe = false;
+
+   class NeumannBC : public BoundaryConfiguration {
+   public:
+             NeumannBC( const real_t & _neumannBC ) : neumannBC_( _neumannBC ) {}
+      inline NeumannBC( const Config::BlockHandle & config );
+
+      const real_t & neumannBC() const { return neumannBC_; }
+      real_t & neumannBC() { return neumannBC_; }
+
+   private:
+
+      real_t neumannBC_;
+   };
+
+   static shared_ptr<NeumannBC> createConfiguration( const Config::BlockHandle & config ) { return make_shared<NeumannBC>( config ); }
+
+
+  //*******************************************************************************************************************
+  /*! Creates a Neumann boundary
+   * \param boundaryUID the UID of the boundary condition
+   * \param uid the UID of the flag that marks cells with this boundary condition
+   * \param rhsField pointer to the right-hand side field, which will be adapted by this boundary condition
+   * \param stencilField pointer to the operator stencil field. It should contain the stencil weights that don't take
+   *                     into account the boundary conditions.
+   * \param adaptBCStencilField pointer to the operator stencil field that will be adapted by this boundary condition. 
+   *                            Initially, this field needs to contain the same values as \p stencilField.
+   *                            This is the stencil field that should be passed to the actual PDE solver.
+   * \param flagField pointer to the flag field
+   * \param blocks
+   *******************************************************************************************************************/
+   inline Neumann( const BoundaryUID & boundaryUID, const FlagUID & uid, Field_T* const rhsField, const StencilField_T* const stencilField,
+                     StencilField_T* const adaptBCStencilField, FlagField<flag_t> * const flagField, const StructuredBlockStorage& blocks );
+
+   void pushFlags( std::vector< FlagUID > & uids ) const { uids.push_back( uid_ ); }
+
+   void beforeBoundaryTreatment() const {}
+   void afterBoundaryTreatment();
+
+   template< typename Buffer_T >
+   inline void packCell( Buffer_T & buffer, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z ) const;
+
+   template< typename Buffer_T >
+   inline void registerCell( Buffer_T & buffer, const flag_t, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z );
+
+   inline void registerCell( const flag_t, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z, const BoundaryConfiguration & neumannBC );
+   inline void registerCells( const flag_t, const CellInterval & cells, const BoundaryConfiguration & neumannBC );
+   template< typename CellIterator >
+   inline void registerCells( const flag_t, const CellIterator & begin, const CellIterator & end, const BoundaryConfiguration & neumannBC );
+
+   void unregisterCell( const flag_t, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z );
+
+   inline void treatDirection( const cell_idx_t  x, const cell_idx_t  y, const cell_idx_t  z, const stencil::Direction dir,
+                               const cell_idx_t nx, const cell_idx_t ny, const cell_idx_t nz, const flag_t mask );
+
+   inline const real_t & getValue( const cell_idx_t x, cell_idx_t y, cell_idx_t z ) const { return neumannBC_->get(x,y,z); }
+
+private:
+
+   const FlagUID uid_;
+   const flag_t formerNeumann_;
+   uint_t numDirtyCells_;
+
+   std::array<real_t, Stencil_T::Q> dx_;
+   Field_T* const                rhsField_;
+   shared_ptr< Field_T >         neumannBC_;
+   const StencilField_T * const  stencilField_;
+   StencilField_T * const        adaptBCStencilField_;
+   FlagField<flag_t> * const     flagField_;
+
+}; // class Neumann
+
+
+
+template< typename Stencil_T, typename flag_t >
+inline Neumann< Stencil_T, flag_t >::NeumannBC::NeumannBC( const Config::BlockHandle & config  )
+{
+   neumannBC_ = ( config && config.isDefined( "val" ) ) ? config.getParameter<real_t>( "val" ) : real_c(0.0);
+}
+
+
+
+template< typename Stencil_T, typename flag_t >
+inline Neumann< Stencil_T, flag_t >::Neumann( const BoundaryUID & boundaryUID, const FlagUID & uid, Field_T* const rhsField, const StencilField_T* const stencilField,
+                                              StencilField_T* const adaptBCStencilField, FlagField<flag_t> * const flagField, const StructuredBlockStorage& blocks  )
+                                              : Boundary<flag_t>( boundaryUID ), uid_( uid ), formerNeumann_ (flagField->getOrRegisterFlag("FormerNeumann")), numDirtyCells_(std::numeric_limits<uint_t>::max()),
+                                                rhsField_( rhsField ), stencilField_ ( stencilField ), adaptBCStencilField_ ( adaptBCStencilField ), flagField_ (flagField)
+{
+   WALBERLA_ASSERT_NOT_NULLPTR( rhsField_ );
+   WALBERLA_ASSERT_NOT_NULLPTR( stencilField_ );
+   WALBERLA_ASSERT_NOT_NULLPTR( adaptBCStencilField_ );
+   WALBERLA_ASSERT_NOT_NULLPTR( flagField_ );
+
+   WALBERLA_ASSERT_EQUAL( rhsField_->xyzSize(), stencilField_->xyzSize() );
+#ifndef NDEBUG
+   WALBERLA_FOR_ALL_CELLS_XYZ( stencilField_,
+      for( auto dir = Stencil_T::begin(); dir != Stencil_T::end(); ++dir )
+      {
+         WALBERLA_ASSERT_IDENTICAL(stencilField_->get(x,y,z, dir.toIdx()), adaptBCStencilField_->get(x,y,z, dir.toIdx()));
+      }
+   )
+#endif
+
+   neumannBC_ = make_shared< Field_T >( rhsField_->xSize(), rhsField_->ySize(), rhsField_->zSize(), uint_t(1), field::zyxf );
+
+   for(auto d = Stencil_T::beginNoCenter(); d != Stencil_T::end(); ++d ){
+      dx_[d.toIdx()] = Vector3<real_t>(real_c(stencil::cx[d.toIdx()])*blocks.dx(), real_c(stencil::cy[d.toIdx()])*blocks.dy(), real_c(stencil::cz[d.toIdx()])*blocks.dz() ).sqrLength();
+      // WALBERLA_LOG_DEVEL("dx in direction " << d.dirString() << ":" << dx_[d.toIdx()]);
+   }
+
+}
+
+
+template< typename Stencil_T, typename flag_t >
+void Neumann< Stencil_T, flag_t >::afterBoundaryTreatment() {
+
+   if (numDirtyCells_>0 && numDirtyCells_ != std::numeric_limits<uint_t>::max()) {
+      WALBERLA_LOG_WARNING("De-registering cells requires re-running Galerkin coarsening");
+   }
+
+   numDirtyCells_=0;
+}
+
+template< typename Stencil_T, typename flag_t >
+template< typename Buffer_T >
+inline void Neumann< Stencil_T, flag_t >::packCell( Buffer_T & buffer, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z ) const
+{
+   buffer << neumannBC_->get( x, y, z );
+}
+
+
+
+template< typename Stencil_T, typename flag_t >
+template< typename Buffer_T >
+inline void Neumann< Stencil_T, flag_t >::registerCell( Buffer_T & buffer, const flag_t, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z )
+{
+   buffer >> neumannBC_->get( x, y, z );
+   if( flagField_->isFlagSet( x, y, z, formerNeumann_ ) )
+   {
+      flagField_->removeFlag( x, y, z, formerNeumann_ );
+      --numDirtyCells_;
+   }
+}
+
+
+
+template< typename Stencil_T, typename flag_t >
+inline void Neumann< Stencil_T, flag_t >::registerCell( const flag_t, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z,
+                                                                                       const BoundaryConfiguration & neumannBC )
+{
+   WALBERLA_ASSERT_EQUAL( dynamic_cast< const NeumannBC * >( &neumannBC ), &neumannBC );
+   WALBERLA_ASSERT_NOT_NULLPTR( neumannBC_ );
+
+   const NeumannBC & val = dynamic_cast< const NeumannBC & >( neumannBC );
+
+   neumannBC_->get( x, y, z ) = val.neumannBC();
+
+   if( flagField_->isFlagSet( x, y, z, formerNeumann_ ) )
+   {
+      flagField_->removeFlag( x, y, z, formerNeumann_ );
+      --numDirtyCells_;
+   }
+
+}
+
+
+
+template< typename Stencil_T, typename flag_t >
+inline void Neumann< Stencil_T, flag_t >::registerCells( const flag_t, const CellInterval & cells, const BoundaryConfiguration & neumannBC )
+{
+   WALBERLA_ASSERT_EQUAL( dynamic_cast< const NeumannBC * >( &neumannBC ), &neumannBC );
+   WALBERLA_ASSERT_NOT_NULLPTR( neumannBC_ );
+
+   const NeumannBC & val = dynamic_cast< const NeumannBC & >( neumannBC );
+
+   for( auto cell = neumannBC_->beginSliceXYZ( cells ); cell != neumannBC_->end(); ++cell ) {
+      *cell = val.neumannBC();
+
+      if( flagField_->isFlagSet( cell.cell(), formerNeumann_ ) )
+      {
+         flagField_->removeFlag( cell.cell(), formerNeumann_ );
+         --numDirtyCells_;
+      }
+   }
+}
+
+
+
+template< typename Stencil_T, typename flag_t >
+template< typename CellIterator >
+inline void Neumann< Stencil_T, flag_t >::registerCells( const flag_t, const CellIterator & begin, const CellIterator & end,
+                                                                                        const BoundaryConfiguration & neumannBC )
+{
+   WALBERLA_ASSERT_EQUAL( dynamic_cast< const NeumannBC * >( &neumannBC ), &neumannBC );
+   WALBERLA_ASSERT_NOT_NULLPTR( neumannBC_ );
+
+   const NeumannBC & val = dynamic_cast< const NeumannBC & >( neumannBC );
+
+   for( auto cell = begin; cell != end; ++cell )
+   {
+      neumannBC_->get( cell->x(), cell->y(), cell->z() ) = val.neumannBC();
+
+      if( flagField_->isFlagSet( cell->cell(), formerNeumann_ ) )
+      {
+         flagField_->removeFlag( cell->cell(), formerNeumann_ );
+         --numDirtyCells_;
+      }
+   }
+
+}
+
+// Remark: This unregister function works only properly for D3Q7 stencils and convex domains!
+template< typename Stencil_T, typename flag_t >
+void Neumann< Stencil_T, flag_t >::unregisterCell( const flag_t, const cell_idx_t nx, const cell_idx_t ny, const cell_idx_t nz )
+{
+   flagField_->addFlag( nx, ny, nz, formerNeumann_ );
+   ++numDirtyCells_;
+
+   Cell boundaryCell( nx, ny, nz );
+
+   // Set stencil previously adapted to Neumann BC back to un-adapted state
+   for( auto d = Stencil_T::begin(); d != Stencil_T::end(); ++d )
+   {
+      Cell domainCell = boundaryCell - *d;
+      if( adaptBCStencilField_->isInInnerPart( domainCell ) )
+      {
+         // restore original non-center stencil entry of neighboring non-boundary cell
+         adaptBCStencilField_->get( domainCell, d.toIdx() ) = stencilField_->get( domainCell, d.toIdx() );
+
+         // restore original center stencil entry of neighboring non-boundary cell
+         adaptBCStencilField_->get( domainCell, Stencil_T::idx[stencil::C] ) -=  adaptBCStencilField_->get( domainCell, d.toIdx() );
+
+      }
+   }
+
+}
+
+
+//*************************************************************************************************
+/*! \brief Treat one direction by adapting RHS according to Neumann boundary value.
+ *
+ */
+
+template< typename Stencil_T, typename flag_t >
+#ifndef NDEBUG
+inline void Neumann< Stencil_T, flag_t >::treatDirection( const cell_idx_t  x, const cell_idx_t  y, const cell_idx_t  z, const stencil::Direction dir,
+                                                                                         const cell_idx_t nx, const cell_idx_t ny, const cell_idx_t nz, const flag_t mask )
+#else
+inline void Neumann< Stencil_T, flag_t >::treatDirection( const cell_idx_t  x, const cell_idx_t  y, const cell_idx_t  z, const stencil::Direction dir,
+                                                                                         const cell_idx_t nx, const cell_idx_t ny, const cell_idx_t nz, const flag_t /*mask*/ )
+#endif
+{
+
+   WALBERLA_ASSERT_EQUAL( nx, x + cell_idx_c( stencil::cx[ dir ] ) );
+   WALBERLA_ASSERT_EQUAL( ny, y + cell_idx_c( stencil::cy[ dir ] ) );
+   WALBERLA_ASSERT_EQUAL( nz, z + cell_idx_c( stencil::cz[ dir ] ) );
+   WALBERLA_ASSERT_UNEQUAL( mask & this->mask_, numeric_cast<flag_t>(0) );
+   WALBERLA_ASSERT_EQUAL( mask & this->mask_, this->mask_ ); // only true if "this->mask_" only contains one single flag, which is the case for the
+                                                             // current implementation of this boundary condition (Neumann)
+
+   // WALBERLA_LOG_DEVEL("Neumann treatment at: " << Cell(x,y,z));
+
+   // Adapt RHS to Neumann BC //
+   rhsField_->get( x, y, z ) -= stencilField_->get( x, y, z, Stencil_T::idx[dir] ) * dx_[ Stencil_T::idx[dir] ] * neumannBC_->get( nx, ny, nz ); // possibly utilize that off-diagonal entries -1 anyway
+
+   // Adapt Stencils to BCs (former adaptStencilsBC) //
+   // Only required if any new BC cell was added or the BC type of any former BC cell has been changed
+   if (numDirtyCells_>0) {
+
+      // here not thread-safe! (workaround: mutex when adapting central entry)
+      adaptBCStencilField_->get( x, y, z, Stencil_T::idx[stencil::C] ) += adaptBCStencilField_->get( x, y, z, Stencil_T::idx[dir] );
+      adaptBCStencilField_->get( x, y, z, Stencil_T::idx[dir] ) = 0;
+
+   }
+
+}
+
+
 } // namespace pde
 } // namespace walberla
diff --git a/src/pde/boundary/all.h b/src/pde/boundary/all.h
index 16d35f8642b56d4147c7dd6472234fcf343e433d..9e392663e08bda508eab068879508b8c3579d5b3 100644
--- a/src/pde/boundary/all.h
+++ b/src/pde/boundary/all.h
@@ -22,4 +22,5 @@
 
 #pragma once
 
+#include "Dirichlet.h"
 #include "Neumann.h"
diff --git a/tests/pde/BoundaryTest.cpp b/tests/pde/BoundaryTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..17e6b05a3815621c94360ecc2700bd9c5be7a512
--- /dev/null
+++ b/tests/pde/BoundaryTest.cpp
@@ -0,0 +1,445 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file BoundaryTest.cpp
+//! \ingroup pde
+//! \author Dominik Bartuschat <dominik.bartuschat@fau.de>
+//! \author Michael Kuron <mkuron@icp.uni-stuttgart.de>
+//
+//======================================================================================================================
+
+#include "blockforest/Initialization.h"
+#include "blockforest/communication/UniformBufferedScheme.h"
+#include "boundary/BoundaryHandling.h"
+
+#include "core/Abort.h"
+#include "core/debug/TestSubsystem.h"
+#include "core/mpi/Environment.h"
+#include "core/mpi/MPIManager.h"
+
+#include "field/AddToStorage.h"
+#include "field/GhostLayerField.h"
+#include "field/FlagField.h"
+#include "field/communication/PackInfo.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "pde/iterations/CGFixedStencilIteration.h"
+#include "pde/iterations/CGIteration.h"
+#include "pde/boundary/Dirichlet.h"
+#include "pde/boundary/Neumann.h"
+
+#include "stencil/D2Q5.h"
+
+#include "timeloop/SweepTimeloop.h"
+
+#include "vtk/VTKOutput.h"
+
+#include <cmath>
+
+namespace walberla {
+
+
+
+typedef GhostLayerField< real_t, 1 > Field_T;
+typedef stencil::D2Q5                Stencil_T;
+typedef pde::CGIteration<Stencil_T>::StencilField_T  StencilField_T;
+
+typedef walberla::uint8_t      flag_t;
+typedef FlagField < flag_t >   FlagField_T;
+typedef pde::Dirichlet< Stencil_T, flag_t >  Dirichlet_T;
+typedef pde::Neumann< Stencil_T, flag_t >  Neumann_T;
+
+typedef boost::tuples::tuple< Dirichlet_T, Neumann_T >  BoundaryConditions_T;
+
+typedef BoundaryHandling< FlagField_T, Stencil_T, BoundaryConditions_T > BoundaryHandling_T;
+
+
+const FlagUID  Domain_Flag( "domain" );
+const FlagUID  Dirichlet_Flag( "dirichlet" );
+const FlagUID  Neumann_Flag( "neumann" );
+
+
+
+///////////////////////
+// BOUNDARY HANDLING //
+///////////////////////
+
+//**********************************************************************************************************************
+/*!
+*   \brief Functor that is used to add a boundary handling object to each block
+*
+*   The member function "BoundaryHandling_T * operator()( IBlock* const block ) const" is called by the framework in
+*   order to add a boundary handling object of type 'BoundaryHandling_T' to each block.
+*/
+//**********************************************************************************************************************
+
+class MyBoundaryHandling
+{
+public:
+
+   MyBoundaryHandling( const BlockDataID & rhsFieldId, const BlockDataID & stencilFieldId, const BlockDataID & adaptBCStencilFieldId, const BlockDataID & flagFieldId, const StructuredBlockStorage& blocks ) :
+      rhsFieldId_( rhsFieldId ), stencilFieldId_ ( stencilFieldId ), adaptBCStencilFieldId_ ( adaptBCStencilFieldId ), flagFieldId_ (flagFieldId), blockStorage_(blocks) {}
+
+   BoundaryHandling_T * operator()( IBlock* const block ) const;
+
+private:
+
+   const BlockDataID rhsFieldId_;
+   const BlockDataID stencilFieldId_;
+   const BlockDataID adaptBCStencilFieldId_;
+   const BlockDataID flagFieldId_;
+   const StructuredBlockStorage & blockStorage_;
+
+}; // class MyBoundaryHandling
+
+BoundaryHandling_T * MyBoundaryHandling::operator()( IBlock * const block ) const
+{
+
+   Field_T * rhsField = block->getData< Field_T >( rhsFieldId_ );
+   StencilField_T * stencilField = block->getData< StencilField_T >( stencilFieldId_ );
+   StencilField_T * adaptStencilField = block->getData< StencilField_T >( adaptBCStencilFieldId_ );
+   FlagField_T * flagField = block->getData< FlagField_T >( flagFieldId_ );
+
+   const flag_t domain = flagField->registerFlag( Domain_Flag ); // register the fluid flag at the flag field
+
+   // A new boundary handling instance that uses the just fetched flag field is created:
+   // Additional, internal flags used by the boundary handling will be stored in this flag field.
+   return new BoundaryHandling_T( "boundary handling", flagField, domain,
+         boost::tuples::make_tuple( Dirichlet_T( "dirichlet", Dirichlet_Flag, rhsField, stencilField, adaptStencilField, flagField ) ,
+                                    Neumann_T( "neumann", Neumann_Flag, rhsField, stencilField, adaptStencilField, flagField, blockStorage_ )
+         )
+   );
+}
+
+
+
+void initRHS( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & rhsId )
+{
+   for( auto block = blocks->begin(); block != blocks->end(); ++block )
+   {
+      Field_T * rhs = block->getData< Field_T >( rhsId );
+      CellInterval xyz = rhs->xyzSize();
+      for( auto cell = xyz.begin(); cell != xyz.end(); ++cell )
+      {
+         const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell );
+         rhs->get( *cell ) = real_t(4) * math::PI * math::PI * std::sin( real_t(2) * math::PI * p[0] ) * std::sinh( real_t(2) * math::PI * p[1] );
+      }
+   }
+}
+
+
+
+void setRHSConstValue( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & rhsId, const real_t value )
+{
+   for( auto block = blocks->begin(); block != blocks->end(); ++block )
+   {
+      Field_T * rhs = block->getData< Field_T >( rhsId );
+      CellInterval xyz = rhs->xyzSize();
+      for( auto cell = xyz.begin(); cell != xyz.end(); ++cell )
+      {
+         rhs->get( *cell ) = value;
+      }
+   }
+}
+
+
+
+void setBoundaryConditionsDirichl( shared_ptr< StructuredBlockForest > & blocks, const BlockDataID & boundaryHandlingId )
+{
+   CellInterval domainBBInGlobalCellCoordinates = blocks->getDomainCellBB();
+   domainBBInGlobalCellCoordinates.expand(Cell(1,1,0));
+
+   // Iterate through all blocks that are allocated on this process and part of the block structure 'blocks'
+   for( auto block = blocks->begin(); block != blocks->end(); ++block )
+   {
+
+      BoundaryHandling_T * boundaryHandling = block->getData< BoundaryHandling_T >( boundaryHandlingId );
+
+      CellInterval domainBB( domainBBInGlobalCellCoordinates );
+      blocks->transformGlobalToBlockLocalCellInterval( domainBB, *block );
+
+      CellInterval west( domainBB.xMin(), domainBB.yMin(), domainBB.zMin(), domainBB.xMin(), domainBB.yMax(), domainBB.zMax() );
+      CellInterval east( domainBB.xMax(), domainBB.yMin(), domainBB.zMin(), domainBB.xMax(), domainBB.yMax(), domainBB.zMax() );
+      CellInterval south( domainBB.xMin(), domainBB.yMin(), domainBB.zMin(), domainBB.xMax(), domainBB.yMin(), domainBB.zMax() );
+      CellInterval north( domainBB.xMin(), domainBB.yMax(), domainBB.zMin(), domainBB.xMax(), domainBB.yMax(), domainBB.zMax() );
+
+      // Set north boundary to defined function
+      for( auto cell = north.begin(); cell != north.end(); ++cell )
+      {
+
+         const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell );
+         real_t val = std::sin( real_t( 2 ) * math::PI * p[0] ) * std::sinh( real_t( 2 ) * math::PI * p[1] );
+
+         boundaryHandling->forceBoundary( Dirichlet_Flag, cell->x(), cell->y(), cell->z(), pde::Dirichlet< Stencil_T, flag_t >::DirichletBC( val ) );
+
+      }
+
+      // Set all other boundaries to zero
+      for( auto & ci : { west, east, south } )
+      {
+         boundaryHandling->forceBoundary( Dirichlet_Flag, ci, pde::Dirichlet< Stencil_T, flag_t >::DirichletBC( real_t( 0 ) ) );
+      }
+
+      // All the remaining cells need to be marked as being fluid. The 'fillWithDomain' call does just that.
+      boundaryHandling->fillWithDomain( domainBB );
+   }
+}
+
+
+
+void setBoundaryConditionsMixed( shared_ptr< StructuredBlockForest > & blocks, const BlockDataID & boundaryHandlingId )
+{
+   CellInterval domainBBInGlobalCellCoordinates = blocks->getDomainCellBB();
+   domainBBInGlobalCellCoordinates.expand(Cell(1,1,0));
+
+   // Iterate through all blocks that are allocated on this process and part of the block structure 'blocks'
+   for( auto block = blocks->begin(); block != blocks->end(); ++block )
+   {
+
+      BoundaryHandling_T * boundaryHandling = block->getData< BoundaryHandling_T >( boundaryHandlingId );
+
+      CellInterval domainBB( domainBBInGlobalCellCoordinates );
+      blocks->transformGlobalToBlockLocalCellInterval( domainBB, *block );
+
+      CellInterval west( domainBB.xMin(), domainBB.yMin(), domainBB.zMin(), domainBB.xMin(), domainBB.yMax(), domainBB.zMax() );
+      CellInterval east( domainBB.xMax(), domainBB.yMin(), domainBB.zMin(), domainBB.xMax(), domainBB.yMax(), domainBB.zMax() );
+      CellInterval south( domainBB.xMin(), domainBB.yMin(), domainBB.zMin(), domainBB.xMax(), domainBB.yMin(), domainBB.zMax() );
+      CellInterval north( domainBB.xMin(), domainBB.yMax(), domainBB.zMin(), domainBB.xMax(), domainBB.yMax(), domainBB.zMax() );
+
+      // Set north boundary to large value
+      for( auto cell = north.begin(); cell != north.end(); ++cell )
+      {
+
+         const real_t val = real_t(2);
+         boundaryHandling->forceBoundary( Dirichlet_Flag, cell->x(), cell->y(), cell->z(), pde::Dirichlet< Stencil_T, flag_t >::DirichletBC( val ) );
+
+      }
+
+      // Set south boundary to large value
+      for( auto cell = south.begin(); cell != south.end(); ++cell )
+      {
+
+         const real_t val = real_t(-2);
+         boundaryHandling->forceBoundary( Dirichlet_Flag, cell->x(), cell->y(), cell->z(), pde::Dirichlet< Stencil_T, flag_t >::DirichletBC( val ) );
+
+      }
+
+      // Set all other boundaries to homogeneous Neumann BCs
+      for( auto & ci : { west, east} )
+      {
+         boundaryHandling->forceBoundary( Neumann_Flag, ci, pde::Neumann< Stencil_T, flag_t >::NeumannBC( real_t( 0 ) ) );
+      }
+
+      // All the remaining cells need to be marked as being fluid. The 'fillWithDomain' call does just that.
+      boundaryHandling->fillWithDomain( domainBB );
+   }
+}
+
+
+
+void copyWeightsToStencilField( const shared_ptr< StructuredBlockStorage > & blocks, const std::vector<real_t> & weights, const BlockDataID & stencilId )
+{
+   for( auto block = blocks->begin(); block != blocks->end(); ++block )
+   {
+      StencilField_T * stencil = block->getData< StencilField_T >( stencilId );
+      
+      WALBERLA_FOR_ALL_CELLS_XYZ(stencil,
+         for( auto dir = Stencil_T::begin(); dir != Stencil_T::end(); ++dir )
+            stencil->get(x,y,z,dir.toIdx()) = weights[ dir.toIdx() ];
+      );
+   }
+}
+
+
+
+
+int main( int argc, char** argv )
+{
+   debug::enterTestMode();
+
+   mpi::Environment env( argc, argv );
+
+   const uint_t processes = uint_c( MPIManager::instance()->numProcesses() );
+   if( processes != uint_t(1) && processes != uint_t(4) && processes != uint_t(8) )
+      WALBERLA_ABORT( "The number of processes must be equal to 1, 4, or 8!" );
+
+   logging::Logging::printHeaderOnStream();
+   WALBERLA_ROOT_SECTION() { logging::Logging::instance()->setLogLevel( logging::Logging::PROGRESS ); }
+
+   bool shortrun = false;
+   for( int i = 1; i < argc; ++i )
+      if( std::strcmp( argv[i], "--shortrun" ) == 0 ) shortrun = true;
+
+   const uint_t xBlocks = ( processes == uint_t(1) ) ? uint_t(1) : ( ( processes == uint_t(4) ) ? uint_t(2) : uint_t(4) );
+   const uint_t yBlocks = ( processes == uint_t(1) ) ? uint_t(1) : uint_t(2);
+   const uint_t xCells = ( processes == uint_t(1) ) ? uint_t(200) : ( ( processes == uint_t(4) ) ? uint_t(100) : uint_t(50) );
+   const uint_t yCells = ( processes == uint_t(1) ) ? uint_t(100) : uint_t(50);
+   const real_t xSize = real_t(2);
+   const real_t ySize = real_t(1);
+   const real_t dx = xSize / real_c( xBlocks * xCells + uint_t(1) );
+   const real_t dy = ySize / real_c( yBlocks * yCells + uint_t(1) );
+   auto blocks = blockforest::createUniformBlockGrid( math::AABB( real_t(0.5) * dx, real_t(0.5) * dy, real_t(0),
+                                                                  xSize - real_t(0.5) * dx, ySize - real_t(0.5) * dy, dx ),
+                                                      xBlocks, yBlocks, uint_t(1),
+                                                      xCells, yCells, uint_t(1),
+                                                      true,
+                                                      false, false, false );
+
+   BlockDataID solId = field::addToStorage< Field_T >( blocks, "sol", real_t(0), field::zyxf, uint_t(1) );
+   BlockDataID rId = field::addToStorage< Field_T >( blocks, "r", real_t(0), field::zyxf, uint_t(1) );
+   BlockDataID dId = field::addToStorage< Field_T >( blocks, "d", real_t(0), field::zyxf, uint_t(1) );
+   BlockDataID zId = field::addToStorage< Field_T >( blocks, "z", real_t(0), field::zyxf, uint_t(1) );
+
+   BlockDataID rhsId = field::addToStorage< Field_T >( blocks, "rhs", real_t(0), field::zyxf, uint_t(1) );
+
+   initRHS( blocks, rhsId );
+
+   blockforest::communication::UniformBufferedScheme< Stencil_T > synchronizeD( blocks );
+   synchronizeD.addPackInfo( make_shared< field::communication::PackInfo< Field_T > >( dId ) );
+
+   std::vector< real_t > weights( Stencil_T::Size );
+   weights[ Stencil_T::idx[ stencil::C ] ] = real_t(2) / ( blocks->dx() * blocks->dx() ) + real_t(2) / ( blocks->dy() * blocks->dy() ) + real_t(4) * math::PI * math::PI;
+   weights[ Stencil_T::idx[ stencil::N ] ] = real_t(-1) / ( blocks->dy() * blocks->dy() );
+   weights[ Stencil_T::idx[ stencil::S ] ] = real_t(-1) / ( blocks->dy() * blocks->dy() );
+   weights[ Stencil_T::idx[ stencil::E ] ] = real_t(-1) / ( blocks->dx() * blocks->dx() );
+   weights[ Stencil_T::idx[ stencil::W ] ] = real_t(-1) / ( blocks->dx() * blocks->dx() );
+
+
+   BlockDataID stencilId = field::addToStorage< StencilField_T >( blocks, "w" );
+   BlockDataID stencilBCadaptedId = field::addToStorage< StencilField_T >( blocks, "w" );
+
+   BlockDataID flagId = field::addFlagFieldToStorage< FlagField_T >( blocks, "flagField" );
+
+   copyWeightsToStencilField( blocks, weights, stencilId );
+   copyWeightsToStencilField( blocks, weights, stencilBCadaptedId );
+
+   BlockDataID boundaryHandlingId = blocks->addBlockData< BoundaryHandling_T >( MyBoundaryHandling( rhsId, stencilId, stencilBCadaptedId, flagId, *blocks ),
+                                                                                "boundary handling" );
+
+   // Test Dirichlet BCs //
+   setBoundaryConditionsDirichl( blocks, boundaryHandlingId );
+
+   
+   SweepTimeloop timeloop( blocks, uint_t(2) );
+
+   timeloop.add()
+            << Sweep( BoundaryHandling_T::getBlockSweep( boundaryHandlingId ), "boundary handling" )
+            << AfterFunction(
+                     pde::CGIteration< Stencil_T >( blocks->getBlockStorage(), solId, rId, dId, zId, rhsId, stencilBCadaptedId,
+                                                    shortrun ? uint_t( 10 ) : uint_t( 10000 ), synchronizeD, real_c( 1e-6 ) ), "CG iteration" );
+
+   timeloop.singleStep();
+
+   // Check values for Dirichlet BCs //
+   if( !shortrun )
+   {
+      Cell cellNearBdry( 75, 2, 0 );
+      real_t solNearBdry( real_c(-0.16347) );
+      Cell cellNearBdryLrg( 24, 95, 0 );
+      real_t solNearBdryLrg( real_c(201.47) );
+      Cell cellDomCentr( 100, 50, 0 );
+      real_t solDomCentr( real_c(0.37587) );
+
+      for( auto block = blocks->begin(); block != blocks->end(); ++block )
+      {
+         Field_T * sol = block->getData < Field_T > ( solId );
+         if( blocks->getBlockCellBB( *block ).contains( cellNearBdry ) )
+         {
+            blocks->transformGlobalToBlockLocalCell(cellNearBdry,*block);
+            // WALBERLA_LOG_DEVEL( "Solution value at cell " << cellNearBdry << ": " << sol->get(cellNearBdry) );
+            WALBERLA_CHECK_LESS( std::fabs( solNearBdry - sol->get( cellNearBdry ) ) / solNearBdry, 0.0001, "Invalid value in cell " << cellNearBdry );
+         }
+         if( blocks->getBlockCellBB( *block ).contains( cellNearBdryLrg ) )
+         {
+            blocks->transformGlobalToBlockLocalCell(cellNearBdryLrg,*block);
+            // WALBERLA_LOG_DEVEL( "Solution value at cell " << cellNearBdryLrg << ": " << sol->get(cellNearBdryLrg) );
+            WALBERLA_CHECK_LESS( std::fabs( solNearBdryLrg - sol->get( cellNearBdryLrg ) ) / solNearBdryLrg, 0.0001, "Invalid value in cell " << cellNearBdryLrg );
+         }
+         if( blocks->getBlockCellBB( *block ).contains( cellDomCentr ) )
+         {
+            blocks->transformGlobalToBlockLocalCell(cellDomCentr,*block);
+            // WALBERLA_LOG_DEVEL( "Solution value at cell " << cellDomCentr << ": " << sol->get(cellDomCentr) );
+            WALBERLA_CHECK_LESS( std::fabs( solDomCentr - sol->get( cellDomCentr ) ) / solDomCentr, 0.0001, "Invalid value in cell " << cellDomCentr );
+         }
+      }
+   }
+   else
+   {
+      Cell cellNearBdry( 75, 2, 0 );
+      real_t solNearBdry( real_c(-0.008355) );
+      Cell cellNearBdryLrg( 24, 95, 0 );
+      real_t solNearBdryLrg( real_c(132.188) );
+      Cell cellDomCentr( 100, 50, 0 );
+      real_t solDomCentr( 0.017603f );
+
+      for( auto block = blocks->begin(); block != blocks->end(); ++block )
+      {
+         Field_T * sol = block->getData < Field_T > ( solId );
+         if( blocks->getBlockCellBB( *block ).contains( cellNearBdry ) )
+         {
+             blocks->transformGlobalToBlockLocalCell(cellNearBdry,*block);
+             // WALBERLA_LOG_DEVEL( "Solution value at cell " << cellNearBdry << ": " << sol->get(cellNearBdry) );
+             WALBERLA_CHECK_LESS( std::fabs( solNearBdry - sol->get( cellNearBdry ) ) / solNearBdry, 0.0001, "Invalid value in cell " << cellNearBdry );
+         }
+         if( blocks->getBlockCellBB( *block ).contains( cellNearBdryLrg ) )
+         {
+             blocks->transformGlobalToBlockLocalCell(cellNearBdryLrg,*block);
+             // WALBERLA_LOG_DEVEL( "Solution value at cell " << cellNearBdryLrg << ": " << sol->get(cellNearBdryLrg) );
+             WALBERLA_CHECK_LESS( std::fabs( solNearBdryLrg - sol->get( cellNearBdryLrg ) ) / solNearBdryLrg, 0.0001, "Invalid value in cell " << cellNearBdryLrg );
+         }
+         if( blocks->getBlockCellBB( *block ).contains( cellDomCentr ) )
+         {
+             blocks->transformGlobalToBlockLocalCell(cellDomCentr,*block);
+             // WALBERLA_LOG_DEVEL( "Solution value at cell " << cellDomCentr << ": " << sol->get(cellDomCentr) );
+             WALBERLA_CHECK_LESS( std::fabs( solDomCentr - sol->get( cellDomCentr ) ) / solDomCentr, 0.0001, "Invalid value in cell " << cellDomCentr );
+         }
+      }
+
+   }
+
+   if( !shortrun )
+   {
+      vtk::writeDomainDecomposition( blocks );
+      field::createVTKOutput< Field_T >( solId, *blocks, "solution_Dirich" )();
+   }
+
+
+   // Test Mixed BCs and re-setting BCs //
+   setBoundaryConditionsMixed( blocks, boundaryHandlingId );
+   // set RHS to zero to get 'ramp' as solution
+   setRHSConstValue( blocks, rhsId, real_t(0));
+
+   timeloop.singleStep();
+
+   // Check values for mixed  BCs //
+   // TODO
+
+   if( !shortrun )
+   {
+      field::createVTKOutput< Field_T >( solId, *blocks, "solution_Mixed" )();
+//      field::createVTKOutput< Field_T >( rhsId, *blocks, "rhs_Mixed" )();
+//      field::createVTKOutput< StencilField_T >( stencilId, *blocks, "originalStencils_Mixed" )();
+//      field::createVTKOutput< StencilField_T >( stencilBCadaptedId, *blocks, "adaptedStencils_Mixed" )();
+   }
+
+   logging::Logging::printFooterOnStream();
+   return EXIT_SUCCESS;
+}
+}
+
+int main( int argc, char* argv[] )
+{
+  return walberla::main( argc, argv );
+}
diff --git a/tests/pde/CMakeLists.txt b/tests/pde/CMakeLists.txt
index 58845042e3ce5173ec1da1baa1a03f03c0279464..5dde027cce3295929ba57314b00a8494a11000a3 100644
--- a/tests/pde/CMakeLists.txt
+++ b/tests/pde/CMakeLists.txt
@@ -22,4 +22,8 @@ waLBerla_execute_test( NAME MGShortTest COMMAND $<TARGET_FILE:MGTest> --shortrun
 waLBerla_execute_test( NAME MGTest COMMAND $<TARGET_FILE:MGTest> PROCESSES 8 CONFIGURATIONS Release RelWithDbgInfo )
 
 waLBerla_compile_test( FILES MGConvergenceTest.cpp DEPENDS blockforest timeloop vtk )
-waLBerla_execute_test( NAME MGConvergenceTest COMMAND $<TARGET_FILE:MGConvergenceTest> PROCESSES 8 CONFIGURATIONS Release RelWithDbgInfo )
\ No newline at end of file
+waLBerla_execute_test( NAME MGConvergenceTest COMMAND $<TARGET_FILE:MGConvergenceTest> PROCESSES 8 CONFIGURATIONS Release RelWithDbgInfo )
+
+waLBerla_compile_test( FILES BoundaryTest.cpp DEPENDS blockforest timeloop vtk boundary )
+waLBerla_execute_test( NAME BoundaryShortTest COMMAND $<TARGET_FILE:BoundaryTest> --shortrun PROCESSES 8 )
+waLBerla_execute_test( NAME BoundaryTest COMMAND $<TARGET_FILE:BoundaryTest> PROCESSES 8 CONFIGURATIONS Release RelWithDbgInfo )