diff --git a/src/pde/iterations/VCycles.h b/src/pde/iterations/VCycles.h
index eca2b908fc5420cb24b1889e0bba71c2c2cbe86d..1d6f12fa61b1d5225cbd053584d9830139a819a2 100644
--- a/src/pde/iterations/VCycles.h
+++ b/src/pde/iterations/VCycles.h
@@ -32,6 +32,7 @@
 
 #include "pde/sweeps/RBGSFixedStencil.h"
 #include "pde/sweeps/RBGS.h"
+#include "pde/sweeps/Multigrid.h"
 
 #include <functional>
 
@@ -41,7 +42,21 @@
 namespace walberla {
 namespace pde {
 
-template< typename Stencil_T >
+//**********************************************************************************************************************
+/*!
+ *   \brief Class for multigrid V-cycle
+ *
+ *   \tparam Stencil_T The stencil used for the discrete operator
+ *   \tparam OperatorCoarsening_T The coarsening operator to use, defaults to direct coarsening
+ *   \tparam Restrict_T The restriction operator to use
+ *   \tparam ProlongateAndCorrect_T The prolongation and correction operator to use
+ */
+//**********************************************************************************************************************
+template< typename Stencil_T,
+          typename OperatorCoarsening_T = CoarsenStencilFieldsDCA<Stencil_T>,
+          typename Restrict_T = Restrict<Stencil_T>,
+          typename ProlongateAndCorrect_T = ProlongateAndCorrect<Stencil_T>
+        >
 class VCycles
 {
 public:
@@ -50,6 +65,21 @@ public:
    typedef std::vector< real_t  > Weight_T;
    typedef GhostLayerField< real_t, Stencil_T::Size >  StencilField_T;
 
+   //*******************************************************************************************************************
+   /*! Creates a multigrid V-cycle with a fixed stencil
+    * \param blocks the block storage where the fields are stored
+    * \param uFieldId the block data id of the solution field on the finest level
+    * \param fFieldId the block data id of the right-hand side field on the finest level
+    * \param weights vector of stencil weights for the discrete operator
+    * \param iterations maximum number of V-cycles to perform
+    * \param numLvl number of grid levels to use (including the finest level)
+    * \param preSmoothingIters number of Gauss-Seidel iterations before restriction
+    * \param postSmoothingIters number of Gauss-Seidel iterations after prolongation
+    * \param coarseIters number of Conjugate Gradient iterations on coarsest grid
+    * \param residualNorm function that returns the norm of the current residuum
+    * \param residualNormThreshold norm threshold below which the iteration is terminated
+    * \param residualCheckFrequency how often to check whether the threshold has been reached
+    *******************************************************************************************************************/
    VCycles( shared_ptr< StructuredBlockForest > blocks, const BlockDataID & uFieldId, const BlockDataID & fFieldId,
             const Weight_T weights,
             const uint_t iterations, const uint_t numLvl,
@@ -59,8 +89,25 @@ public:
             const Set<SUID> & requiredSelectors     = Set<SUID>::emptySet(),
             const Set<SUID> & incompatibleSelectors = Set<SUID>::emptySet() );
 
+   //*******************************************************************************************************************
+   /*! Creates a multigrid V-cycle with a stencil field
+    * \param blocks the block storage where the fields are stored
+    * \param uFieldId the block data id of the solution field on the finest level
+    * \param fFieldId the block data id of the right-hand side field on the finest level
+    * \param stencilFieldId the block data id of the stencil field for the finest level.
+    *                       The values stored in the field must not change after this class has been constructed.
+    * \param operatorCoarsening function that performs the stencil coarsening
+    * \param iterations maximum number of V-cycles to perform
+    * \param numLvl number of grid levels to use (including the finest level)
+    * \param preSmoothingIters number of Gauss-Seidel iterations before restriction
+    * \param postSmoothingIters number of Gauss-Seidel iterations after prolongation
+    * \param coarseIters number of Conjugate Gradient iterations on coarsest grid
+    * \param residualNorm function that returns the norm of the current residuum
+    * \param residualNormThreshold norm threshold below which the iteration is terminated
+    * \param residualCheckFrequency how often to check whether the threshold has been reached
+    *******************************************************************************************************************/
    VCycles( shared_ptr< StructuredBlockForest > blocks, const BlockDataID & uFieldId, const BlockDataID & fFieldId,
-            const BlockDataID & stencilFieldId,
+            const BlockDataID & stencilFieldId, const OperatorCoarsening_T & operatorCoarsening,
             const uint_t iterations, const uint_t numLvl,
             const uint_t preSmoothingIters, const uint_t postSmoothingIters,
             const uint_t coarseIters, const std::function< real_t () > & residualNorm,
@@ -76,9 +123,7 @@ public:
    const std::vector<real_t> & convergenceRate() { return convergenceRate_; }
    static Vector3<uint_t> getSizeForLevel( const uint_t level, const shared_ptr< StructuredBlockStorage > & blocks, IBlock * const block );
 
-protected:
-
-   void coarsenStencilFields();
+private:
 
    StructuredBlockForest & blocks_;
    std::vector< Weight_T  > weights_;
diff --git a/src/pde/iterations/VCycles.impl.h b/src/pde/iterations/VCycles.impl.h
index a35cf35779f8dd3396a6792a4fedeaad7e7957e3..f202d89e20b45e8c3cd26fb9398b0acbff6fb1cc 100644
--- a/src/pde/iterations/VCycles.impl.h
+++ b/src/pde/iterations/VCycles.impl.h
@@ -39,10 +39,10 @@
 namespace walberla {
 namespace pde {
 
-template< typename Stencil_T >
-VCycles< Stencil_T >::VCycles( shared_ptr< StructuredBlockForest > blocks, const BlockDataID & uFieldId, const BlockDataID & fFieldId,
-        const Weight_T weights,
-        const uint_t iterations, const uint_t numLvl,
+template< typename Stencil_T, typename OperatorCoarsening_T, typename Restrict_T, typename ProlongateAndCorrect_T >
+VCycles< Stencil_T, OperatorCoarsening_T, Restrict_T, ProlongateAndCorrect_T >::VCycles(
+		shared_ptr< StructuredBlockForest > blocks, const BlockDataID & uFieldId, const BlockDataID & fFieldId,
+        const Weight_T weights, const uint_t iterations, const uint_t numLvl,
         const uint_t preSmoothingIters, const uint_t postSmoothingIters,
         const uint_t coarseIters, const std::function< real_t () > & residualNorm,
         const real_t residualNormThreshold, const uint_t residualCheckFrequency,
@@ -54,6 +54,9 @@ VCycles< Stencil_T >::VCycles( shared_ptr< StructuredBlockForest > blocks, const
    iterationsPerformed_( uint_t(0) ), thresholdReached_( false ), residualNorm_( residualNorm ), convergenceRate_(), stencilId_(),
    requiredSelectors_( requiredSelectors ), incompatibleSelectors_( incompatibleSelectors )
 {
+
+   static_assert(std::is_same<OperatorCoarsening_T, CoarsenStencilFieldsDCA<Stencil_T>>::value, "Use of weight requires DCA, use constructor with stencil field if you want to employ GCA");
+
    // Set up fields for finest level
    uId_.push_back( uFieldId );
    fId_.push_back( fFieldId );
@@ -87,7 +90,7 @@ VCycles< Stencil_T >::VCycles( shared_ptr< StructuredBlockForest > blocks, const
    // Set up fields for coarser levels
    for ( uint_t lvl = 1; lvl < numLvl; ++lvl )
    {
-      auto getSize = std::bind(VCycles<Stencil_T>::getSizeForLevel, lvl, std::placeholders::_1, std::placeholders::_2);
+      auto getSize = std::bind(VCycles<Stencil_T, OperatorCoarsening_T, Restrict_T, ProlongateAndCorrect_T>::getSizeForLevel, lvl, std::placeholders::_1, std::placeholders::_2);
       uId_.push_back( field::addToStorage< PdeField_T >( blocks, "u_"+boost::lexical_cast<std::string>(lvl), getSize, real_t(0), field::zyxf, uint_t(1) ) );
       fId_.push_back( field::addToStorage< PdeField_T >( blocks, "f_"+boost::lexical_cast<std::string>(lvl), getSize, real_t(0), field::zyxf, uint_t(1) ) );
       rId_.push_back( field::addToStorage< PdeField_T >( blocks, "r_"+boost::lexical_cast<std::string>(lvl), getSize, real_t(0), field::zyxf, uint_t(1) ) );
@@ -100,7 +103,7 @@ VCycles< Stencil_T >::VCycles( shared_ptr< StructuredBlockForest > blocks, const
    }
 
    // Set up fields for CG on coarsest level
-   auto getFineSize = std::bind(VCycles<Stencil_T>::getSizeForLevel, numLvl-1, std::placeholders::_1, std::placeholders::_2);
+   auto getFineSize = std::bind(VCycles<Stencil_T, OperatorCoarsening_T, Restrict_T, ProlongateAndCorrect_T>::getSizeForLevel, numLvl-1, std::placeholders::_1, std::placeholders::_2);
    dId_ = field::addToStorage< PdeField_T >( blocks, "d", getFineSize, real_t(0), field::zyxf, uint_t(1) );
    zId_ = field::addToStorage< PdeField_T >( blocks, "z", getFineSize, real_t(0), field::zyxf, uint_t(1) );
 
@@ -132,9 +135,9 @@ VCycles< Stencil_T >::VCycles( shared_ptr< StructuredBlockForest > blocks, const
    for (uint_t lvl = 0; lvl < numLvl-1; ++lvl)
    {
       computeResidual_.push_back( ComputeResidualFixedStencil< Stencil_T >( blocks, uId_[lvl], fId_[lvl], weights_[lvl], rId_[lvl] ) );
-      restrict_.push_back( Restrict< Stencil_T >(blocks, rId_[lvl], fId_[lvl+1] ) );
+      restrict_.push_back( Restrict_T(blocks, rId_[lvl], fId_[lvl+1] ) );
       zeroize_.push_back( Zeroize(blocks, uId_[lvl+1]) );
-      prolongateAndCorrect_.push_back( ProlongateAndCorrect< Stencil_T >(blocks, uId_[lvl+1], uId_[lvl]) );
+      prolongateAndCorrect_.push_back( ProlongateAndCorrect_T(blocks, uId_[lvl+1], uId_[lvl]) );
    }
 
    // Set up CG coarse-grid iteration
@@ -145,9 +148,10 @@ VCycles< Stencil_T >::VCycles( shared_ptr< StructuredBlockForest > blocks, const
 
 
 
-template< typename Stencil_T >
-VCycles< Stencil_T >::VCycles( shared_ptr< StructuredBlockForest > blocks, const BlockDataID & uFieldId, const BlockDataID & fFieldId,
-        const BlockDataID & stencilFieldId,
+template< typename Stencil_T, typename OperatorCoarsening_T, typename Restrict_T, typename ProlongateAndCorrect_T >
+VCycles< Stencil_T, OperatorCoarsening_T, Restrict_T, ProlongateAndCorrect_T >::VCycles(
+		shared_ptr< StructuredBlockForest > blocks, const BlockDataID & uFieldId, const BlockDataID & fFieldId,
+        const BlockDataID & stencilFieldId, const OperatorCoarsening_T & operatorCoarsening,
         const uint_t iterations, const uint_t numLvl,
         const uint_t preSmoothingIters, const uint_t postSmoothingIters,
         const uint_t coarseIters, const std::function< real_t () > & residualNorm,
@@ -192,17 +196,19 @@ VCycles< Stencil_T >::VCycles( shared_ptr< StructuredBlockForest > blocks, const
    // Set up fields for coarser levels
    for ( uint_t lvl = 1; lvl < numLvl; ++lvl )
    {
-      auto getSize = std::bind(VCycles<Stencil_T>::getSizeForLevel, lvl, std::placeholders::_1, std::placeholders::_2);
+      auto getSize = std::bind(VCycles<Stencil_T, OperatorCoarsening_T, Restrict_T, ProlongateAndCorrect_T>::getSizeForLevel, lvl, std::placeholders::_1, std::placeholders::_2);
       uId_.push_back( field::addToStorage< PdeField_T >( blocks, "u_"+boost::lexical_cast<std::string>(lvl), getSize, real_t(0), field::zyxf, uint_t(1) ) );
       fId_.push_back( field::addToStorage< PdeField_T >( blocks, "f_"+boost::lexical_cast<std::string>(lvl), getSize, real_t(0), field::zyxf, uint_t(1) ) );
       rId_.push_back( field::addToStorage< PdeField_T >( blocks, "r_"+boost::lexical_cast<std::string>(lvl), getSize, real_t(0), field::zyxf, uint_t(1) ) );
       stencilId_.push_back( field::addToStorage< StencilField_T >( blocks, "w_"+boost::lexical_cast<std::string>(lvl), getSize, real_t(0), field::zyxf, uint_t(1) ) );
    }
 
-   coarsenStencilFields(); // scaling by ( 1/h^2 )^lvl
+   // CoarsenStencilFieldsDCA<Stencil_T>( blocks, stencilId_, numLvl, uint_t(2)) ();  // scaling by ( 1/h^2 )^lvl
+   // CoarsenStencilFieldsGCA<Stencil_T>( blocks, stencilId_, numLvl, real_t(2)) ();
+   operatorCoarsening(stencilId_);
 
    // Set up fields for CG on coarsest level
-   auto getFineSize = std::bind(VCycles<Stencil_T>::getSizeForLevel, numLvl-1, std::placeholders::_1, std::placeholders::_2);
+   auto getFineSize = std::bind(VCycles<Stencil_T, OperatorCoarsening_T, Restrict_T, ProlongateAndCorrect_T>::getSizeForLevel, numLvl-1, std::placeholders::_1, std::placeholders::_2);
    dId_ = field::addToStorage< PdeField_T >( blocks, "d", getFineSize, real_t(0), field::zyxf, uint_t(1) );
    zId_ = field::addToStorage< PdeField_T >( blocks, "z", getFineSize, real_t(0), field::zyxf, uint_t(1) );
 
@@ -234,9 +240,9 @@ VCycles< Stencil_T >::VCycles( shared_ptr< StructuredBlockForest > blocks, const
    for (uint_t lvl = 0; lvl < numLvl-1; ++lvl)
    {
       computeResidual_.push_back( ComputeResidual< Stencil_T >( blocks, uId_[lvl], fId_[lvl], stencilId_[lvl], rId_[lvl] ) );
-      restrict_.push_back( Restrict< Stencil_T >(blocks, rId_[lvl], fId_[lvl+1] ) );
+      restrict_.push_back( Restrict_T(blocks, rId_[lvl], fId_[lvl+1] ) );
       zeroize_.push_back( Zeroize(blocks, uId_[lvl+1]) );
-      prolongateAndCorrect_.push_back( ProlongateAndCorrect< Stencil_T >(blocks, uId_[lvl+1], uId_[lvl]) );
+      prolongateAndCorrect_.push_back( ProlongateAndCorrect_T(blocks, uId_[lvl+1], uId_[lvl]) );
    }
 
    // Set up CG coarse-grid iteration
@@ -247,8 +253,8 @@ VCycles< Stencil_T >::VCycles( shared_ptr< StructuredBlockForest > blocks, const
 
 
 
-template< typename Stencil_T >
-void VCycles< Stencil_T >::operator()()
+template< typename Stencil_T, typename OperatorCoarsening_T, typename Restrict_T, typename ProlongateAndCorrect_T >
+void VCycles< Stencil_T, OperatorCoarsening_T, Restrict_T, ProlongateAndCorrect_T >::operator()()
 {
    WALBERLA_LOG_PROGRESS_ON_ROOT( "Starting VCycle iteration with a maximum number of " << iterations_ << " cycles and " << numLvl_ << " levels" );
    thresholdReached_ = false;
@@ -300,32 +306,8 @@ void VCycles< Stencil_T >::operator()()
 
 
 
-template< typename Stencil_T >
-void VCycles< Stencil_T >::coarsenStencilFields()
-{
-   const real_t scalingFactor = real_t(0.25); // scaling by ( 1/h^2 )^lvl
-
-   WALBERLA_ASSERT_EQUAL(numLvl_, stencilId_.size(), "This function can only be called when operating with stencil fields!");
-
-   for ( uint_t lvl = 1; lvl < numLvl_; ++lvl )
-   {
-      for( auto block = blocks_.begin( requiredSelectors_, incompatibleSelectors_ ); block != blocks_.end(); ++block )
-      {
-         StencilField_T * fine   = block->template getData< StencilField_T >( stencilId_[lvl-1] );
-         StencilField_T * coarse = block->template getData< StencilField_T >( stencilId_[lvl] );
-         
-         WALBERLA_FOR_ALL_CELLS_XYZ(coarse,
-            for( auto dir = Stencil_T::begin(); dir != Stencil_T::end(); ++dir )
-               coarse->get(x,y,z, dir.toIdx()) = scalingFactor * fine->get(x,y,z, dir.toIdx());
-         )
-      }
-   }
-}
-
-
-
-template< typename Stencil_T >
-void VCycles< Stencil_T >::VCycle()
+template< typename Stencil_T, typename OperatorCoarsening_T, typename Restrict_T, typename ProlongateAndCorrect_T >
+void VCycles< Stencil_T, OperatorCoarsening_T, Restrict_T, ProlongateAndCorrect_T >::VCycle()
 {
    // pre-smoothen -- go from fine to coarse
    for (uint_t l = 0; l < numLvl_-1; ++l)
@@ -376,8 +358,8 @@ void VCycles< Stencil_T >::VCycle()
 
 
 
-template< typename Stencil_T >
-Vector3<uint_t> VCycles< Stencil_T >::getSizeForLevel( const uint_t level, const shared_ptr< StructuredBlockStorage > & blocks, IBlock * const block )
+template< typename Stencil_T, typename OperatorCoarsening_T, typename Restrict_T, typename ProlongateAndCorrect_T >
+Vector3<uint_t> VCycles< Stencil_T, OperatorCoarsening_T, Restrict_T, ProlongateAndCorrect_T >::getSizeForLevel( const uint_t level, const shared_ptr< StructuredBlockStorage > & blocks, IBlock * const block )
 {
    Vector3<uint_t> cells( blocks->getNumberOfXCells( *block ), blocks->getNumberOfYCells( *block ), blocks->getNumberOfZCells( *block ) );
 
diff --git a/src/pde/sweeps/Multigrid.h b/src/pde/sweeps/Multigrid.h
index 2398790832454c2f90eb91a7098c2ce8f678afaa..58d4d8f031afa649526cc3814b1115e9b8d6992b 100644
--- a/src/pde/sweeps/Multigrid.h
+++ b/src/pde/sweeps/Multigrid.h
@@ -31,6 +31,13 @@ namespace pde {
 
 
 
+//**********************************************************************************************************************
+/*!
+ *   \brief Restriction sweep for multigrid
+ *
+ *   \tparam Stencil_T The stencil used for the discrete operator
+ */
+//**********************************************************************************************************************
 template< typename Stencil_T >
 class Restrict
 {
@@ -38,6 +45,11 @@ public:
 
    typedef GhostLayerField< real_t, 1 > Field_T;
 
+   //*******************************************************************************************************************
+   /* \param blocks the block storage where the fields are stored
+    * \param fineFieldId the block data id of the fine field
+    * \param coarseFieldId the block data id of the coarse field
+    *******************************************************************************************************************/
    Restrict( const shared_ptr< domain_decomposition::StructuredBlockStorage > & blocks,
                 const BlockDataID & fineFieldId, const BlockDataID & coarseFieldId ) :
       blocks_( blocks ), fineFieldId_( fineFieldId ), coarseFieldId_( coarseFieldId ) {}
@@ -53,6 +65,13 @@ private:
 
 
 
+//**********************************************************************************************************************
+/*!
+ *   \brief Prolongation and correction sweep for multigrid
+ *
+ *   \tparam Stencil_T The stencil used for the discrete operator
+ */
+//**********************************************************************************************************************
 template< typename Stencil_T >
 class ProlongateAndCorrect
 {
@@ -60,6 +79,11 @@ public:
 
    typedef GhostLayerField< real_t, 1 > Field_T;
 
+   //*******************************************************************************************************************
+   /* \param blocks the block storage where the fields are stored
+    * \param coarseFieldId the block data id of the coarse field
+    * \param fineFieldId the block data id of the fine field
+    *******************************************************************************************************************/
    ProlongateAndCorrect( const shared_ptr< domain_decomposition::StructuredBlockStorage > & blocks,
                  const BlockDataID & coarseFieldId, const BlockDataID & fineFieldId ) :
    blocks_( blocks ), fineFieldId_( fineFieldId ), coarseFieldId_( coarseFieldId ) {}
@@ -75,6 +99,13 @@ private:
 
 
 
+//**********************************************************************************************************************
+/*!
+ *   \brief Residual calculation sweep for multigrid with stencil field
+ *
+ *   \tparam Stencil_T The stencil used for the discrete operator
+ */
+//**********************************************************************************************************************
 template< typename Stencil_T >
 class ComputeResidual
 {
@@ -83,6 +114,13 @@ public:
    typedef GhostLayerField< real_t, 1 >                Field_T;
    typedef GhostLayerField< real_t, Stencil_T::Size >  StencilField_T;
 
+   //*******************************************************************************************************************
+   /* \param blocks the block storage where the fields are stored
+    * \param uId the block data id of the solution field
+    * \param fId the block data id of the right-hand side field
+    * \param stencilId the block data id of the stencil field
+    * \param rId the block data id of the residual field
+    *******************************************************************************************************************/
    ComputeResidual( const shared_ptr< domain_decomposition::StructuredBlockStorage > & blocks, const BlockDataID & uId,
                     const BlockDataID & fId, const BlockDataID & stencilId, const BlockDataID & rId )
             : blocks_( blocks ), uId_( uId ), fId_( fId ), stencilId_( stencilId ), rId_( rId )
@@ -101,6 +139,13 @@ private:
 
 
 
+//**********************************************************************************************************************
+/*!
+ *   \brief Residual calculation sweep for multigrid with fixed stencil weights
+ *
+ *   \tparam Stencil_T The stencil used for the discrete operator
+ */
+//**********************************************************************************************************************
 template< typename Stencil_T >
 class ComputeResidualFixedStencil
 {
@@ -109,6 +154,13 @@ public:
    typedef GhostLayerField< real_t, 1 > Field_T;
    typedef std::vector< real_t  > Weight_T;
 
+   //*******************************************************************************************************************
+   /* \param blocks the block storage where the fields are stored
+    * \param uId the block data id of the solution field
+    * \param fId the block data id of the right-hand side field
+    * \param weights vector of stencil weights for the discrete operator
+    * \param rId the block data id of the residual field
+    *******************************************************************************************************************/
    ComputeResidualFixedStencil( const shared_ptr< domain_decomposition::StructuredBlockStorage > & blocks, const BlockDataID & uId,
                                 const BlockDataID & fId, const std::vector <real_t> & weights, const BlockDataID & rId )
             : blocks_( blocks ), uId_( uId ), fId_( fId ), weights_( weights ), rId_( rId )
@@ -127,12 +179,21 @@ private:
 
 
 
+//**********************************************************************************************************************
+/*!
+ *   \brief Sweep that sets all values in a field to zero
+ */
+//**********************************************************************************************************************
 class Zeroize
 {
 public:
 
    typedef GhostLayerField< real_t, 1 > Field_T;
 
+   //*******************************************************************************************************************
+   /* \param blocks the block storage where the field is stored
+    * \param fieldId the block data id of the field
+    *******************************************************************************************************************/
    Zeroize( const shared_ptr< domain_decomposition::StructuredBlockStorage > & blocks, const BlockDataID & fieldId )
             : blocks_( blocks ), fieldId_( fieldId )
    { }
@@ -150,6 +211,97 @@ private:
 
 
 
+//**********************************************************************************************************************
+/*!
+ *   \brief Direct Coarsening Approach for the stencil field
+ *
+ *   \tparam Stencil_T The stencil used for the discrete operator
+ */
+//**********************************************************************************************************************
+template< typename Stencil_T >
+class CoarsenStencilFieldsDCA
+{
+public:
+
+   typedef GhostLayerField< real_t, Stencil_T::Size >  StencilField_T;
+
+   //*******************************************************************************************************************
+   /* \param blocks the block storage where the fields are stored
+    * \param numLvl number of grid levels to use (including the finest level)
+    * \param operatorOrder the order of the (continuum) differential operator, e.g. 2 for Laplace
+    *******************************************************************************************************************/
+   CoarsenStencilFieldsDCA( shared_ptr< StructuredBlockForest > blocks,
+                      const uint_t numLvl, const uint_t operatorOrder,
+                      const Set<SUID> & requiredSelectors     = Set<SUID>::emptySet(),
+                      const Set<SUID> & incompatibleSelectors = Set<SUID>::emptySet() )
+         : blocks_( blocks ), numLvl_(numLvl), operatorOrder_(operatorOrder),
+           requiredSelectors_( requiredSelectors ), incompatibleSelectors_( incompatibleSelectors )
+   { }
+
+   //*******************************************************************************************************************
+   /* \param stencilFieldId a vector of the block data ids of the stencil field for all levels (finest first)
+    *******************************************************************************************************************/
+   void operator()(const std::vector<BlockDataID> & stencilFieldId) const;
+
+private:
+
+   shared_ptr< domain_decomposition::StructuredBlockStorage > blocks_;
+
+   uint_t numLvl_;
+   uint_t operatorOrder_;
+
+   Set< SUID > requiredSelectors_;
+   Set< SUID > incompatibleSelectors_;
+
+};
+
+
+
+//**********************************************************************************************************************
+/*!
+ *   \brief Galerkin Coarsening Approach for the stencil field
+ *
+ *   \tparam Stencil_T The stencil used for the discrete operator
+ */
+//**********************************************************************************************************************
+template< typename Stencil_T >
+class CoarsenStencilFieldsGCA
+{
+public:
+
+   typedef GhostLayerField< real_t, Stencil_T::Size >  StencilField_T;
+
+   //*******************************************************************************************************************
+   /* \param blocks the block storage where the fields are stored
+    * \param numLvl number of grid levels to use (including the finest level)
+    * \param overrelaxFact overrelaxation factor, e.g. 2 for the Poisson equation
+    *******************************************************************************************************************/
+   CoarsenStencilFieldsGCA( shared_ptr< StructuredBlockForest > blocks,
+                      const uint_t numLvl, const real_t overrelaxFact = real_t(1),
+                      const Set<SUID> & requiredSelectors     = Set<SUID>::emptySet(),
+                      const Set<SUID> & incompatibleSelectors = Set<SUID>::emptySet() )
+         : blocks_( blocks ), numLvl_(numLvl), overrelaxFact_(overrelaxFact),
+           requiredSelectors_( requiredSelectors ), incompatibleSelectors_( incompatibleSelectors )
+   { }
+
+   //*******************************************************************************************************************
+   /* \param stencilFieldId a vector of the block data ids of the stencil field for all levels (finest first)
+    *******************************************************************************************************************/
+   void operator()(const std::vector<BlockDataID> & stencilFieldId) const;
+
+private:
+
+   shared_ptr< domain_decomposition::StructuredBlockStorage > blocks_;
+
+   uint_t numLvl_;
+   real_t overrelaxFact_;
+
+   Set< SUID > requiredSelectors_;
+   Set< SUID > incompatibleSelectors_;
+
+};
+
+
 } // namespace pde
 } // namespace walberla
 
diff --git a/src/pde/sweeps/Multigrid.impl.h b/src/pde/sweeps/Multigrid.impl.h
index e83cf18b7f1ff496cefcf0496c62a42896f221a6..e4b5decf62bcd597fc7ef11eea707d1b3f85f42a 100644
--- a/src/pde/sweeps/Multigrid.impl.h
+++ b/src/pde/sweeps/Multigrid.impl.h
@@ -24,6 +24,19 @@
 
 #include "Multigrid.h"
 
+#include "stencil/D3Q7.h"
+
+#ifdef _MSC_VER
+#  pragma warning(push)
+// disable warning for multi_array: "declaration of 'extents' hides global declaration"
+#  pragma warning( disable : 4459 )
+#endif //_MSC_VER
+
+#include <boost/multi_array.hpp>
+
+#ifdef _MSC_VER
+#  pragma warning(pop)
+#endif //_MSC_VER
 
 
 namespace walberla {
@@ -53,6 +66,7 @@ void Restrict< Stencil_T >::operator()( IBlock * const block ) const
       else
       {
          WALBERLA_ASSERT_EQUAL(z, 0);
+         coarse->get(x,y,z) = real_t(0);
       }
 
       coarse->get(x,y,z) +=   fine->get(fx  , fy  , fz  ) + fine->get(fx+1, fy  , fz  )
@@ -140,5 +154,120 @@ void ComputeResidualFixedStencil< Stencil_T >::operator()( IBlock * const block
 
 
 
+template< typename Stencil_T >
+void CoarsenStencilFieldsDCA<Stencil_T >::operator()( const std::vector<BlockDataID> & stencilFieldId ) const
+{
+   const real_t scalingFactor = real_t(1)/real_c(2<< (operatorOrder_-1) ); // scaling by ( 1/h^operatorOrder )^lvl
+
+   WALBERLA_ASSERT_EQUAL(numLvl_, stencilFieldId.size(), "This function can only be called when operating with stencil fields!");
+
+   for ( uint_t lvl = 1; lvl < numLvl_; ++lvl )
+   {
+      for( auto block = blocks_->begin( requiredSelectors_, incompatibleSelectors_ ); block != blocks_->end(); ++block )
+      {
+         StencilField_T * fine   = block->template getData< StencilField_T >( stencilFieldId[lvl-1] );
+         StencilField_T * coarse = block->template getData< StencilField_T >( stencilFieldId[lvl] );
+
+         WALBERLA_FOR_ALL_CELLS_XYZ(coarse,
+            for( auto dir = Stencil_T::begin(); dir != Stencil_T::end(); ++dir )
+               coarse->get(x,y,z, dir.toIdx()) = scalingFactor * fine->get(x,y,z, dir.toIdx());
+         )
+      }
+   }
+}
+
+
+
+template< >
+void CoarsenStencilFieldsGCA< stencil::D3Q7 >::operator()( const std::vector<BlockDataID> & stencilFieldId ) const
+{
+
+   WALBERLA_ASSERT_EQUAL(numLvl_, stencilFieldId.size(), "This function can only be called when operating with stencil fields!");
+
+   // Apply Galerkin coarsening to each level //
+   // currently only implemented for CCMG with constant restriction and prolongation
+
+   for ( uint_t lvl = 1; lvl < numLvl_; ++lvl )
+   {
+      for( auto block = blocks_->begin( requiredSelectors_, incompatibleSelectors_ ); block != blocks_->end(); ++block )
+      {
+         StencilField_T * fine   = block->getData< StencilField_T >( stencilFieldId[lvl-1] );
+         StencilField_T * coarse = block->getData< StencilField_T >( stencilFieldId[lvl] );
+
+
+         typedef boost::multi_array<real_t, 3> Array3D;
+         Array3D p(boost::extents[7][7][7]);
+         Array3D r(boost::extents[2][2][2]);
+
+         // Set to restriction weights from constant restrict operator
+          for(Array3D::index z=0; z<2; ++z) {
+            for(Array3D::index y=0; y<2; ++y) {
+               for(Array3D::index x=0; x<2; ++x) {
+                  r[x][y][z] = real_t(1);
+               }
+            }
+         }
+
+         // Init to 0 //
+         for(Array3D::index k=0; k<7; ++k) {
+            for(Array3D::index j=0; j<7; ++j) {
+               for(Array3D::index i=0; i<7; ++i) {
+                  p[i][j][k] = real_t(0);
+               }
+            }
+         }
+
+         // Set center to prolongation weights, including overrelaxation factor (latter therefore no longer needed in ProlongateAndCorrect)
+         for(Array3D::index k=0; k<2; ++k) {
+            for(Array3D::index j=0; j<2; ++j) {
+               for(Array3D::index i=0; i<2; ++i) {
+                  p[i+2][j+2][k+2] = real_t(0.125)/overrelaxFact_;   // Factor 0.125 such that same prolongation operator for DCA and GCA
+               }
+            }
+         }
+
+
+         WALBERLA_FOR_ALL_CELLS_XYZ(coarse,
+
+            Array3D ap(boost::extents[7][7][7]);
+
+            const cell_idx_t fx = 2*x;
+            const cell_idx_t fy = 2*y;
+            const cell_idx_t fz = 2*z;
+
+            for(Array3D::index k=0; k<7; ++k)
+               for(Array3D::index j=0; j<7; ++j)
+                  for(Array3D::index i=0; i<7; ++i)
+                     ap[i][j][k] = real_t(0);
+
+
+            // Tested for spatially varying stencils! //
+            for(Array3D::index k=1; k<5; ++k)
+               for(Array3D::index j=1; j<5; ++j)
+                  for(Array3D::index i=1; i<5; ++i) {
+                     ap[i][j][k] = real_t(0);
+                     for(auto d = stencil::D3Q7::begin(); d != stencil::D3Q7::end(); ++d ){
+                        ap[i][j][k] += p[ i+d.cx() ] [ j+d.cy() ] [k+d.cz() ] * fine->get( fx+cell_idx_c(i%2), fy+cell_idx_c(j%2), fz+cell_idx_c(k%2), d.toIdx() ); // contains elements of one row of coarse grid matrix
+                     }
+                  }
+
+            // Checked, correct! //
+            for(auto d = stencil::D3Q7::begin(); d != stencil::D3Q7::end(); ++d ){
+               real_t sum = 0;
+               for(Array3D::index k=0; k<2; ++k)
+                  for(Array3D::index j=0; j<2; ++j)
+                     for(Array3D::index i=0; i<2; ++i) {
+                        sum += ap[i+2-2*d.cx()] [j+2-2*d.cy()] [k+2-2*d.cz()] * r[i][j][k];
+                        // either i+0 or i+4    // either j+0 or j+4    // either k+0 or k+4       // always 1 here
+                     }
+
+               coarse->get(x,y,z,*d) = sum;
+            }
+         )
+      }
+   }
+}
+
+
 } // namespace pde
 } // namespace walberla
diff --git a/tests/pde/MGTest.cpp b/tests/pde/MGTest.cpp
index e673a61090673c62fff051a88fa2d7fa9fa5a9dc..fa08a2bf8a08a5b6df3006fe842c032a90d38f43 100644
--- a/tests/pde/MGTest.cpp
+++ b/tests/pde/MGTest.cpp
@@ -160,6 +160,8 @@ int main( int argc, char** argv )
    for( int i = 1; i < argc; ++i )
       if( std::strcmp( argv[i], "--shortrun" ) == 0 ) shortrun = true;
 
+   const uint_t numLvl = shortrun ? uint_t(3) : uint_t(5);
+
    const uint_t xBlocks = ( processes == uint_t(1) ) ? uint_t(1) : uint_t(2);
    const uint_t yBlocks = ( processes == uint_t(1) ) ? uint_t(1) : uint_t(2);
    const uint_t zBlocks = ( processes == uint_t(1) ) ? uint_t(1) : uint_t(2);
@@ -201,19 +203,19 @@ int main( int argc, char** argv )
    weights[ Stencil_T::idx[ stencil::T ] ] = real_t(-1) / ( blocks->dx() * blocks->dz() );
    weights[ Stencil_T::idx[ stencil::B ] ] = real_t(-1) / ( blocks->dx() * blocks->dz() );
 
-   auto solver = walberla::make_shared<pde::VCycles< Stencil_T > >( blocks, uId, fId, weights,
-                                                                    shortrun ? uint_t(1) : uint_t(20),                                              // iterations
-                                                                    shortrun ? uint_t(3) : uint_t(5),                                               // levels
-                                                                    3, 3, 10,                                                                       // pre-smoothing, post-smoothing, coarse-grid iterations
-                                                                    pde::ResidualNorm< Stencil_T >( blocks->getBlockStorage(), uId, fId, weights ), // residual norm functor
-                                                                    real_c(1e-12) );                                                                // target precision
-   timeloop.addFuncBeforeTimeStep( makeSharedFunctor(solver), "Cell-centered multigrid V-cycles" );
+   auto solverDCA = walberla::make_shared<pde::VCycles< Stencil_T > >( blocks, uId, fId, weights,
+                                                                       shortrun ? uint_t(1) : uint_t(20),                                              // iterations
+                                                                       numLvl,                                               							// levels
+                                                                       3, 3, 10,                                                                       // pre-smoothing, post-smoothing, coarse-grid iterations
+                                                                       pde::ResidualNorm< Stencil_T >( blocks->getBlockStorage(), uId, fId, weights ), // residual norm functor
+                                                                       real_c(1e-12) );                                                                // target precision
+   timeloop.addFuncBeforeTimeStep( makeSharedFunctor(solverDCA), "Cell-centered multigrid V-cycles with uniformly constant stencil" );
 
    timeloop.run();
 
    if( !shortrun )
    {
-      auto & convrate = solver->convergenceRate();
+      auto & convrate = solverDCA->convergenceRate();
       for (uint_t i = 1; i < convrate.size(); ++i)
       {
          WALBERLA_LOG_RESULT_ON_ROOT("Convergence rate in iteration " << i << ": " << convrate[i]);
@@ -224,7 +226,7 @@ int main( int argc, char** argv )
       field::createVTKOutput< PdeField_T >( uId, *blocks, "solution" )();
    }
 
-   // rerun the test with a stencil field
+   // rerun the test with a stencil field and DCA
 
    clearField<PdeField_T>( blocks, uId);
    initU( blocks, uId );
@@ -235,19 +237,52 @@ int main( int argc, char** argv )
 
    copyWeightsToStencilField( blocks, weights, stencilId );
 
-   solver = walberla::make_shared<pde::VCycles< Stencil_T > >( blocks, uId, fId, stencilId,
-                                                              shortrun ? uint_t(1) : uint_t(20),                                              // iterations
-                                                              shortrun ? uint_t(3) : uint_t(5),                                               // levels
+   pde::CoarsenStencilFieldsDCA<Stencil_T>  coarsenWithDCA( blocks, numLvl, uint_t(2));		// Set up DCA object with operator order 2 (Laplace)
+
+   solverDCA = walberla::make_shared<pde::VCycles< Stencil_T, decltype(coarsenWithDCA) > >(
+		   	   	   	   	   	   	   	   	   	   	   	   	   	  blocks, uId, fId, stencilId, coarsenWithDCA,
+                                                              shortrun ? uint_t(1) : uint_t(20),                                              	// iterations
+                                                              numLvl,                                               							// levels
+                                                              3, 3, 10,                                                                       	// pre-smoothing, post-smoothing, coarse-grid iterations
+                                                              pde::ResidualNormStencilField< Stencil_T >( blocks->getBlockStorage(), uId, fId, stencilId ), // residual norm functor
+                                                              real_c(1e-12) );                                                                	// target precision
+   timeloop2.addFuncBeforeTimeStep( makeSharedFunctor(solverDCA), "Cell-centered multigrid V-cycles with stencil field and direct coarsening " );
+
+   timeloop2.run();
+
+   if( !shortrun )
+   {
+      auto & convrate = solverDCA->convergenceRate();
+      for (uint_t i = 1; i < convrate.size(); ++i)
+      {
+         WALBERLA_LOG_RESULT_ON_ROOT("Convergence rate in iteration " << i << ": " << convrate[i]);
+         WALBERLA_CHECK_LESS(convrate[i], real_t(0.1));
+      }
+   }
+
+   // rerun the test with a stencil field and GCA
+
+   clearField<PdeField_T>( blocks, uId);
+   initU( blocks, uId );
+
+   SweepTimeloop timeloop3( blocks, uint_t(1) );
+
+   pde::CoarsenStencilFieldsGCA<Stencil_T>  coarsenWithGCA( blocks, numLvl, real_t(2));		// Set up GCA object with overrelaxation factor 2 (only valid for Poisson equation)
+
+   auto solverGCA = walberla::make_shared<pde::VCycles< Stencil_T, decltype(coarsenWithGCA) > >(
+		                                                      blocks, uId, fId, stencilId, coarsenWithGCA,
+                                                              shortrun ? uint_t(1) : uint_t(20),                                            // iterations
+                                                              numLvl,                                               						// levels
                                                               3, 3, 10,                                                                       // pre-smoothing, post-smoothing, coarse-grid iterations
                                                               pde::ResidualNormStencilField< Stencil_T >( blocks->getBlockStorage(), uId, fId, stencilId ), // residual norm functor
                                                               real_c(1e-12) );                                                                // target precision
-   timeloop2.addFuncBeforeTimeStep( makeSharedFunctor(solver), "Cell-centered multigrid V-cycles" );
+   timeloop3.addFuncBeforeTimeStep( makeSharedFunctor(solverGCA), "Cell-centered multigrid V-cycles with stencil field and Galerkin coarsening " );
 
-   timeloop2.run();
+   timeloop3.run();
 
    if( !shortrun )
    {
-      auto & convrate = solver->convergenceRate();
+      auto & convrate = solverGCA->convergenceRate();
       for (uint_t i = 1; i < convrate.size(); ++i)
       {
          WALBERLA_LOG_RESULT_ON_ROOT("Convergence rate in iteration " << i << ": " << convrate[i]);