diff --git a/src/blockforest/loadbalancing/DynamicParMetis.cpp b/src/blockforest/loadbalancing/DynamicParMetis.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a612d47759d6e5425c9b699ef290cd1237387c7b
--- /dev/null
+++ b/src/blockforest/loadbalancing/DynamicParMetis.cpp
@@ -0,0 +1,312 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file DynamicParMetis.cpp
+//! \ingroup blockforest
+//! \author Christian Godenschwager <christian.godenschwager@fau.de>
+//
+//======================================================================================================================
+
+#include "DynamicParMetis.h"
+
+#include "core/load_balancing/ParMetisWrapper.h"
+
+#include "core/logging/Logging.h"
+
+#include "core/mpi/BufferSystem.h"
+#include "core/mpi/MPIHelper.h"
+#include "core/mpi/MPIManager.h"
+#include "core/mpi/Gather.h"
+#include "core/mpi/Gatherv.h"
+
+#include "core/timing/Timer.h"
+
+#include <boost/algorithm/string/case_conv.hpp>
+#include <boost/algorithm/string/trim.hpp>
+
+namespace walberla {
+namespace blockforest {
+
+std::pair<uint_t, uint_t> getBlockSequenceRange( const PhantomBlockForest & phantomForest, MPI_Comm comm )
+{
+   const uint_t rank = uint_c(mpi::translateRank(mpi::MPIManager::instance()->comm(), comm, MPIManager::instance()->rank()));
+
+   uint_t numLocalBlocks = phantomForest.getNumberOfBlocks();
+
+   size_t sequenceStartOnProcess = 0;
+   MPI_Exscan( &numLocalBlocks, &sequenceStartOnProcess, 1, MPITrait<uint_t>::type(), MPI_SUM, comm );
+   if( rank == 0 )
+      sequenceStartOnProcess = uint_t( 0 );
+
+   return std::make_pair( sequenceStartOnProcess, sequenceStartOnProcess + numLocalBlocks );
+}
+
+std::map< blockforest::BlockID, uint_t > getBlockIdToSequenceMapping( const PhantomBlockForest & phantomForest, const std::pair<uint_t, uint_t> & blockSequenceRange, MPI_Comm comm )
+{
+   std::map< blockforest::BlockID, uint_t > mapping;
+
+   const auto & blockMap = phantomForest.getBlockMap();
+   uint_t sequenceId = blockSequenceRange.first;
+   for( auto it = blockMap.begin(); it != blockMap.end(); ++it )
+      mapping.insert( std::make_pair( it->first, sequenceId++ ) );
+   WALBERLA_ASSERT_EQUAL( sequenceId, blockSequenceRange.second );
+
+   const std::vector<uint_t> neighborProcesses = phantomForest.getNeighboringProcesses();
+   
+   mpi::BufferSystem bs( comm );
+
+   for( auto it = neighborProcesses.begin(); it != neighborProcesses.end(); ++it )
+      bs.sendBuffer( mpi::translateRank(mpi::MPIManager::instance()->comm(), comm, int_c(*it)) ) << mapping;
+
+   bs.setReceiverInfoFromSendBufferState( false, true );
+
+   bs.sendAll();
+
+   for( auto it = bs.begin(); it != bs.end(); ++it )
+   {
+      std::map< blockforest::BlockID, uint_t > remoteMapping;
+      it.buffer() >> remoteMapping;
+
+      for( auto remoteIt = remoteMapping.begin(); remoteIt != remoteMapping.end(); ++remoteIt )
+      {
+         auto result = mapping.insert( *remoteIt );
+         WALBERLA_UNUSED( result );
+         WALBERLA_ASSERT( result.second, "BlockId should be unique!" );
+      }
+   }
+
+   return mapping;
+}
+
+template< typename T >
+T * ptr( std::vector<T> & v )
+{
+   if( v.empty() )
+      return NULL;
+   else
+      return &( v.front() );
+}
+
+typedef uint_t idx_t;
+
+bool DynamicParMetis::operator()( std::vector< std::pair< const PhantomBlock *, uint_t > > & targetProcess,
+                                  std::set< uint_t > & processesToRecvFrom,
+                                  const PhantomBlockForest & phantomForest,
+                                  const uint_t /*iteration*/ ) const
+{
+   WcTimer globalTimer;
+   WcTimer parmetisTimer;
+   globalTimer.start();
+
+   //create new communicator which excludes processes which do not have blocks
+   MPI_Comm subComm;
+   MPI_Group allGroup, subGroup;
+   MPI_Comm_group( MPIManager::instance()->comm(), &allGroup );
+   std::vector<int> ranks;
+   if (phantomForest.getNumberOfBlocks() > 0)
+      ranks.push_back( MPIManager::instance()->rank() );
+   ranks = mpi::allGatherv( ranks, MPIManager::instance()->comm() );
+   auto numSubProcesses = ranks.size();
+   WALBERLA_UNUSED(numSubProcesses);
+   MPI_Group_incl(allGroup, int_c(ranks.size()), &ranks[0], &subGroup);
+   MPI_Comm_create( MPIManager::instance()->comm(), subGroup, &subComm);
+
+   int64_t edgecut = 0;
+   std::vector<int64_t> part( phantomForest.getNumberOfBlocks(), int64_c( MPIManager::instance()->rank() ) );
+
+   if (subComm != MPI_COMM_NULL)
+   {
+      const std::pair<uint_t, uint_t> blockSequenceRange = getBlockSequenceRange( phantomForest, subComm );
+      const std::map< blockforest::BlockID, uint_t > mapping = getBlockIdToSequenceMapping( phantomForest, blockSequenceRange, subComm );
+
+      std::vector<int64_t> vtxdist = mpi::allGather( int64_c( blockSequenceRange.second ), subComm );
+      vtxdist.insert( vtxdist.begin(), uint_t( 0 ) );
+
+      std::vector<int64_t> adjncy, xadj, vsize, vwgt, adjwgt;
+      std::vector<double> xyz;
+
+      for( auto it = targetProcess.begin(); it != targetProcess.end(); ++it )
+      {
+         xadj.push_back( int64_c( adjncy.size() ) );
+         const PhantomBlock & block = *( it->first );
+         auto bi = block.getData< DynamicParMetisBlockInfo >();
+
+         switch( edgeSource_ )
+         {
+         case PARMETIS_EDGES_FROM_FOREST:
+            for( auto nit = block.getNeighborhood().begin(); nit != block.getNeighborhood().end(); ++nit )
+            {
+               auto mapIt = mapping.find( nit->getId() );
+               WALBERLA_ASSERT_UNEQUAL( mapIt, mapping.end(), "BlockId of neighbor is not contained in sequence mapping!" );
+               adjncy.push_back( int64_c( mapIt->second ) );
+               auto edgeWeightIt = bi.getEdgeWeights().find( nit->getId() );
+               adjwgt.push_back( edgeWeightIt == bi.getEdgeWeights().end() ? int64_t( 0 ) : edgeWeightIt->second );
+            }
+            break;
+         case PARMETIS_EDGES_FROM_EDGE_WEIGHTS:
+            for( auto edgeIt = bi.getEdgeWeights().begin(); edgeIt != bi.getEdgeWeights().end(); ++edgeIt )
+            {
+               auto mapIt = mapping.find( edgeIt->first );
+               WALBERLA_ASSERT_UNEQUAL( mapIt, mapping.end(), "BlockId of neighbor is not contained in sequence mapping!" );
+               adjncy.push_back( int64_c( mapIt->second ) );
+               adjwgt.push_back( edgeIt->second );
+            }
+         }
+         vwgt.push_back( bi.getVertexWeight() );
+         vsize.push_back( bi.getVertexSize() );
+         xyz.push_back( bi.getVertexCoords()[0] );
+         xyz.push_back( bi.getVertexCoords()[1] );
+         xyz.push_back( bi.getVertexCoords()[2] );
+      }
+      xadj.push_back( int64_c( adjncy.size() ) );
+
+      WALBERLA_ASSERT_EQUAL( vtxdist.size(), numSubProcesses + uint_t( 1 ) );
+      WALBERLA_ASSERT_EQUAL( xadj.size(), phantomForest.getNumberOfBlocks() + 1 );
+      WALBERLA_ASSERT_EQUAL( vwgt.size(), phantomForest.getNumberOfBlocks() );
+      WALBERLA_ASSERT_EQUAL( vsize.size(), phantomForest.getNumberOfBlocks() );
+      WALBERLA_ASSERT_EQUAL( adjncy.size(), adjwgt.size() );
+
+      int64_t wgtflag = weightsToUse_;
+      int64_t numflag = 0; // C-style ordering
+      int64_t ncon = 1; // Number of constraints
+      int64_t ndims = 3; // Number of dimensions
+      double ubvec[] = { real_t( 1.05 ) }; // imbalance tolerance
+      int64_t nparts = int64_c( MPIManager::instance()->numProcesses() ); // number of subdomains
+      double ipc2redist = real_t( 1000000.0 ); // compute repartitioning with low edge cut (set lower (down to 0.000001) to get minimal repartitioning )
+      MPI_Comm comm = subComm; //MPIManager::instance()->comm();
+      std::vector<double> tpwgts( uint_c(nparts * ncon), 1.0 / double_c( nparts ) ); // vertex weight fraction that is stored in a subdomain
+      int64_t options[] = { int64_t( 1 ), int64_t( 0 ), int64_t( 23 ), int64_t( 1 ) };
+
+      int metisResult = core::METIS_OK;
+
+      switch( algorithm_ )
+      {
+      case PARMETIS_PART_GEOM_KWAY:
+         parmetisTimer.start();
+         metisResult = core::ParMETIS_V3_PartGeomKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ndims, ptr( xyz ), &ncon, &nparts, ptr( tpwgts ), ubvec, options, &edgecut, ptr( part ), &comm );
+         parmetisTimer.end();
+         break;
+      case PARMETIS_PART_KWAY:
+         parmetisTimer.start();
+         metisResult = core::ParMETIS_V3_PartKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ubvec, options, &edgecut, ptr( part ), &comm );
+         parmetisTimer.end();
+         break;
+      case PARMETIS_ADAPTIVE_REPART:
+         parmetisTimer.start();
+         metisResult = core::ParMETIS_V3_AdaptiveRepart( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( vsize ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ubvec, &ipc2redist, options, &edgecut, ptr( part ), &comm );
+         parmetisTimer.end();
+         break;
+      case PARMETIS_REFINE_KWAY:
+         parmetisTimer.start();
+         metisResult = core::ParMETIS_V3_RefineKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ubvec, options, &edgecut, ptr( part ), &comm );
+         parmetisTimer.end();
+         break;
+      }
+
+      if( metisResult != core::METIS_OK )
+         WALBERLA_ABORT("ParMetis failed!");
+   }
+
+   // Determine which processes will receive a block from this process
+   std::vector<uint8_t> isSendingBlockToProcess( uint_c(MPIManager::instance()->numProcesses()), uint8_t( 0 ) );
+   for( auto it = part.begin(); it != part.end(); ++it )
+   {
+      WALBERLA_ASSERT_GREATER_EQUAL( *it, 0 );
+      WALBERLA_ASSERT_LESS( *it, MPIManager::instance()->numProcesses() );
+      isSendingBlockToProcess[uint_c(*it)] = uint8_t( 1 );
+   }
+   isSendingBlockToProcess[uint_c(MPIManager::instance()->rank())] = uint8_t( 0 );
+
+   std::vector<uint8_t> isReceivingBlockFromProcess( uint_c(MPIManager::instance()->numProcesses()), uint8_t( 0 ) );
+   MPI_Alltoall( ptr( isSendingBlockToProcess ), 1, MPITrait<uint8_t>::type(), ptr( isReceivingBlockFromProcess ), 1, MPITrait<uint8_t>::type(), MPIManager::instance()->comm() );
+   for( uint_t i = 0; i < isReceivingBlockFromProcess.size(); ++i )
+      if( isReceivingBlockFromProcess[i] == uint8_t( 1 ) )
+         processesToRecvFrom.insert( i );
+
+   // assign target processes to blocks
+
+   uint_t i = 0;
+   for( auto it = targetProcess.begin(); it != targetProcess.end(); ++it, ++i )
+   {
+      it->second = uint_c(part[i]);
+   }
+
+   globalTimer.end();
+   if (subComm != MPI_COMM_NULL)
+   {
+      int rank = -1;
+      MPI_Comm_rank( subComm, &rank);
+      if (rank == 0)  WALBERLA_LOG_INFO("ParMetis finished successfully after " << globalTimer.last() << " s (ParMetis took " << parmetisTimer.last() << " s = " << std::setprecision(2) << parmetisTimer.last() / globalTimer.last() * 100.0 << "%) with an edge - cut of " << edgecut );
+   }
+
+   MPI_Group_free(&allGroup);
+   MPI_Group_free(&subGroup);
+   //MPI_Comm_free(&subComm);
+
+   return false; // no further iterations
+}
+
+DynamicParMetis::Algorithm DynamicParMetis::stringToAlgorithm( std::string s )
+{
+   boost::algorithm::to_upper( s );
+   boost::algorithm::trim( s );
+
+   if( s == "PART_GEOM_KWAY" )
+      return PARMETIS_PART_GEOM_KWAY;
+   else if( s == "PART_KWAY" )
+      return PARMETIS_PART_KWAY;
+   else if( s == "PART_ADAPTIVE_REPART" )
+      return PARMETIS_ADAPTIVE_REPART;
+   else if( s == "REFINE_KWAY" )
+      return PARMETIS_REFINE_KWAY;
+   else
+      WALBERLA_ABORT( "Illegal ParMetis algorithm specified! Valid choices are: \"PART_GEOM_KWAY\", \"PART_KWAY\", \"PART_ADAPTIVE_REPART\", or \"REFINE_KWAY\"." );
+}
+
+
+DynamicParMetis::WeightsToUse DynamicParMetis::stringToWeightsToUse( std::string s )
+{
+   boost::algorithm::to_upper( s );
+   boost::algorithm::trim( s );
+
+   if( s == "NO_WEIGHTS" )
+      return PARMETIS_NO_WEIGHTS;
+   else if( s == "EDGE_WEIGHTS" )
+      return PARMETIS_EDGE_WEIGHTS;
+   else if( s == "VERTEX_WEIGHTS" )
+      return PARMETIS_VERTEX_WEIGHTS;
+   else if( s == "BOTH_WEIGHTS" )
+      return PARMETIS_BOTH_WEIGHTS;
+   else
+      WALBERLA_ABORT( "Illegal ParMetis weights usage specified! Valid choices are: \"NO_WEIGHTS\", \"EDGE_WEIGHTS\", \"VERTEX_WEIGHTS\", or \"BOTH_WEIGHTS\"." );
+}
+
+
+DynamicParMetis::EdgeSource DynamicParMetis::stringToEdgeSource( std::string s )
+{
+   boost::algorithm::to_upper( s );
+   boost::algorithm::trim( s );
+
+   if( s == "EDGES_FROM_FOREST" )
+      return PARMETIS_EDGES_FROM_FOREST;
+   else if( s == "EDGES_FROM_EDGE_WEIGHTS" )
+      return PARMETIS_EDGES_FROM_EDGE_WEIGHTS;
+   else
+      WALBERLA_ABORT( "Illegal ParMetis weights usage specified! Valid choices are: \"EDGES_FROM_FOREST\" or \"EDGES_FROM_EDGE_WEIGHTS\"" );
+}
+
+
+} // namespace blockforest
+} // namespace walberla
diff --git a/src/blockforest/loadbalancing/DynamicParMetis.h b/src/blockforest/loadbalancing/DynamicParMetis.h
new file mode 100644
index 0000000000000000000000000000000000000000..a39bc7c76238913d72dd3fb57d2e1dbe8dd2b298
--- /dev/null
+++ b/src/blockforest/loadbalancing/DynamicParMetis.h
@@ -0,0 +1,133 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file DynamicParMetis.h
+//! \ingroup blockforest
+//! \author Christian Godenschwager <christian.godenschwager@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/PhantomBlockForest.h"
+
+#include "core/DataTypes.h"
+#include "core/math/Vector3.h"
+#include "core/mpi/MPIWrapper.h"
+
+#include <map>
+
+namespace walberla {
+namespace blockforest {
+
+std::pair<uint_t, uint_t> getBlockSequenceRange( const PhantomBlockForest & phantomForest, MPI_Comm comm );
+
+std::map< blockforest::BlockID, uint_t > getBlockIdToSequenceMapping( const PhantomBlockForest & phantomForest, const std::pair<uint_t, uint_t> & blockSequenceRange, MPI_Comm comm );
+
+
+class DynamicParMetis
+{
+public:
+   enum Algorithm    { PARMETIS_PART_GEOM_KWAY, PARMETIS_PART_KWAY, PARMETIS_ADAPTIVE_REPART, PARMETIS_REFINE_KWAY };
+   enum WeightsToUse { PARMETIS_NO_WEIGHTS = 0, PARMETIS_EDGE_WEIGHTS = 1, PARMETIS_VERTEX_WEIGHTS = 2, PARMETIS_BOTH_WEIGHTS = 3 };
+   enum EdgeSource   { PARMETIS_EDGES_FROM_FOREST, PARMETIS_EDGES_FROM_EDGE_WEIGHTS };
+
+   DynamicParMetis( const Algorithm algorithm = PARMETIS_PART_GEOM_KWAY,
+                    const WeightsToUse weightsToUse = PARMETIS_BOTH_WEIGHTS,
+                    const EdgeSource edgeSource = PARMETIS_EDGES_FROM_EDGE_WEIGHTS )
+      : algorithm_( algorithm ), weightsToUse_( weightsToUse ), edgeSource_( edgeSource ) {}
+
+   bool operator()( std::vector< std::pair< const PhantomBlock *, uint_t > > & targetProcess,
+                    std::set< uint_t > & processesToRecvFrom,
+                    const PhantomBlockForest & phantomForest,
+                    const uint_t iteration ) const;
+
+   bool edgeWeightsUsed()   const { return ( weightsToUse_ == PARMETIS_EDGE_WEIGHTS   ) || ( weightsToUse_ == PARMETIS_BOTH_WEIGHTS ); }
+   bool vertexWeightsUsed() const { return ( weightsToUse_ == PARMETIS_VERTEX_WEIGHTS ) || ( weightsToUse_ == PARMETIS_BOTH_WEIGHTS ); }
+   bool vertexSizeUsed()    const { return algorithm_ == PARMETIS_ADAPTIVE_REPART; }
+
+   static Algorithm    stringToAlgorithm( std::string s );
+   static WeightsToUse stringToWeightsToUse( std::string s );
+   static EdgeSource   stringToEdgeSource( std::string s );
+
+protected:
+   Algorithm algorithm_;
+   WeightsToUse weightsToUse_;
+   EdgeSource edgeSource_;
+};
+
+class DynamicParMetisBlockInfo
+{
+public:
+
+   typedef int64_t weight_t;
+   typedef int64_t vsize_t;
+
+   DynamicParMetisBlockInfo( const weight_t vertexWeight )
+      : vertexWeight_(vertexWeight), vertexSize_(1)
+   { }
+
+   DynamicParMetisBlockInfo( mpi::RecvBuffer & buffer )
+   {
+      buffer >> vertexWeight_ >> vertexSize_ >> vertexCoords_ >> edgeWeights_;
+   }
+
+   weight_t getVertexWeight() const { return vertexWeight_; }
+   void     setVertexWeight( const weight_t vertexWeight ) { vertexWeight_ = vertexWeight; }
+
+   vsize_t getVertexSize() const { return vertexSize_; }
+   void setVertexSize( const vsize_t size ) { vertexSize_ = size; }
+
+   const Vector3<double> & getVertexCoords() const { return vertexCoords_; }
+   void setVertexCoords( const Vector3<double> & p ) { vertexCoords_ = p; }
+
+   void setEdgeWeight( const blockforest::BlockID & blockId, const weight_t edgeWeight ) { edgeWeights_[blockId] = edgeWeight; }
+   void setEdgeWeights( const std::map< blockforest::BlockID, weight_t > & edgeWeights ) { edgeWeights_ = edgeWeights; }
+
+   const std::map< blockforest::BlockID, weight_t > & getEdgeWeights() const { return edgeWeights_; }
+
+   void toBuffer( mpi::SendBuffer & buffer )
+   {
+      buffer << vertexWeight_ << vertexSize_ << vertexCoords_ << edgeWeights_;
+   }
+
+private:
+
+   weight_t vertexWeight_; /// Defines the weight of a block
+   vsize_t vertexSize_;    /// Defines the cost of rebalancing a block
+   Vector3<double> vertexCoords_; /// Defines where the block is located in space. Needed by some ParMetis algorithms
+   std::map< blockforest::BlockID, weight_t > edgeWeights_; /// Defines the cost of communication with other blocks
+};
+
+
+struct DynamicParMetisBlockInfoPackUnpack
+{
+   void operator()( mpi::SendBuffer & buffer, const PhantomBlock & block )
+   {
+      block.getData< DynamicParMetisBlockInfo >().toBuffer( buffer );
+   }
+
+   void operator()( mpi::RecvBuffer & buffer, const PhantomBlock &, boost::any & data )
+   {
+      data = DynamicParMetisBlockInfo( buffer );
+   }
+};
+
+
+
+
+
+
+
+} // namespace blockforest
+} // namespace walberla
diff --git a/src/blockforest/loadbalancing/StaticParMetis.cpp b/src/blockforest/loadbalancing/StaticParMetis.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f3ccb4e452c7fca40ba33bdcf0f748cec264c28d
--- /dev/null
+++ b/src/blockforest/loadbalancing/StaticParMetis.cpp
@@ -0,0 +1,238 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file StaticParMetis.cpp
+//! \ingroup blockforest
+//! \author Christian Godenschwager <christian.godenschwager@fau.de>
+//
+//======================================================================================================================
+
+#include "StaticParMetis.h"
+
+#include "core/load_balancing/ParMetisWrapper.h"
+
+#include "core/logging/Logging.h"
+
+#include "core/mpi/BufferSystem.h"
+#include "core/mpi/MPIManager.h"
+#include "core/mpi/Gather.h"
+#include "core/mpi/Gatherv.h"
+#include "core/mpi/Tokenizing.h"
+
+#include "core/timing/Timer.h"
+
+#include <boost/algorithm/string/case_conv.hpp>
+#include <boost/algorithm/string/trim.hpp>
+
+namespace walberla {
+namespace blockforest {
+
+template< typename T >
+T * ptr( std::vector<T> & v )
+{
+   if( v.empty() )
+      return NULL;
+   else
+      return &( v.front() );
+}
+
+typedef uint_t idx_t;
+
+
+uint_t StaticLevelwiseParMetis::operator()( SetupBlockForest & forest, const uint_t numberOfProcesses, const memory_t /*perProcessMemoryLimit*/ ) const
+{
+   WcTimer globalTimer, parmetisTimer;
+
+   int numRunnerProcesses = MPIManager::instance()->numProcesses(); // Number of processes running ParMetis (!= number of processes the partition is computed for)
+   int rank = MPIManager::instance()->rank();
+
+   for( uint_t level = forest.getMinLevel(); level <= forest.getMaxLevel(); ++level )
+   {
+      globalTimer.start();
+      WALBERLA_LOG_INFO_ON_ROOT( "Running static ParMetis partitioning on blocks of level " << level );
+
+      std::vector< SetupBlock* > blocks;
+      forest.getBlocks( blocks, level );
+
+      const uint_t numBlocks = blocks.size();
+
+      if( numBlocks < numberOfProcesses )
+      {
+         for( uint_t i = 0; i < numBlocks; ++i )
+            blocks[i]->assignTargetProcess( i );
+
+         WALBERLA_LOG_INFO_ON_ROOT( "Less blocks on level " << level << " (" << numBlocks << ") than processes (" << numberOfProcesses << "). Using simple load balancing."  );
+
+         globalTimer.end();
+         continue;
+      }
+
+      for( uint_t i = 0; i < uint_c( blocks.size() ); ++i )
+      {
+         blocks[i]->setIndex( i );
+      }
+
+      const uint_t chunkSize  = uint_c( std::ceil( real_c( numBlocks ) / real_c( numRunnerProcesses ) ) );
+      const uint_t chunkBegin = std::min( chunkSize * uint_c( rank ), numBlocks );
+      const uint_t chunkEnd   = std::min( chunkSize * uint_c( rank + 1 ), numBlocks );
+
+      std::vector<int64_t> vtxdist;
+      vtxdist.reserve( uint_c(numRunnerProcesses) + uint_t(1) );
+      for( uint_t i = 0; i < uint_c(numRunnerProcesses); ++i )
+         vtxdist.push_back( int64_c( std::min( i * chunkSize, numBlocks ) ) );
+      vtxdist.push_back( int64_t( forest.getNumberOfBlocks() ) );
+
+      std::vector<int64_t> adjncy, xadj, vwgt, adjwgt;
+      std::vector<double> xyz;
+      std::vector< BlockPair > blockPairs;
+
+      for( uint_t i = chunkBegin; i < chunkEnd; ++i )
+      {
+         const SetupBlock & block = *blocks[i];
+
+         xadj.push_back( int64_c( adjncy.size() ) );
+
+
+         for( auto nit = block.getNeighborhood().begin(); nit != block.getNeighborhood().end(); ++nit )
+         {
+            if( (*nit)->getLevel() != level )
+               continue; // ignore neighbor blocks on other levels
+
+            adjncy.push_back( int64_c( (*nit)->getIndex() ) );
+
+            if(weightsToUse_ == PARMETIS_EDGE_WEIGHTS || weightsToUse_ == PARMETIS_BOTH_WEIGHTS)
+            {
+               blockPairs.push_back( BlockPair( blocks[i], *nit ) );
+            }
+         }
+
+         vwgt.push_back( int64_c( block.getWorkload() ) );
+         Vector3<real_t> center = block.getAABB().center();
+         xyz.push_back( center[0] );
+         xyz.push_back( center[1] );
+         xyz.push_back( center[2] );
+      }
+      xadj.push_back( int64_c( adjncy.size() ) );
+
+      if( weightsToUse_ == PARMETIS_EDGE_WEIGHTS || weightsToUse_ == PARMETIS_BOTH_WEIGHTS )
+      {
+         WALBERLA_ASSERT_EQUAL( adjncy.size(), blockPairs.size() );
+         adjwgt.resize( blockPairs.size(), int64_t(1) );
+         commWeightFunction_( blockPairs, adjwgt );
+
+         if( adjwgt.empty() )
+            adjwgt.push_back( int64_t(0) ); // dummy value to circumvent dumb NULL pointer check of ParMetis
+      }
+
+      WALBERLA_ASSERT_EQUAL( vtxdist.size(), uint_c(numRunnerProcesses) + uint_t( 1 ) );
+      WALBERLA_ASSERT_EQUAL( uint_c(vtxdist[uint_c(rank)]), chunkBegin );
+      WALBERLA_ASSERT_EQUAL( uint_c(vtxdist[uint_c(rank + 1)]), chunkEnd );
+      WALBERLA_ASSERT_EQUAL( xadj.size(), (chunkEnd - chunkBegin) + 1 );
+      WALBERLA_ASSERT_EQUAL( vwgt.size(), chunkEnd - chunkBegin );
+
+      std::vector<int64_t> part( chunkEnd - chunkBegin, int64_c( MPIManager::instance()->rank() ) );
+
+      int64_t wgtflag = weightsToUse_;
+      int64_t numflag = 0; // C-style ordering
+      int64_t ncon = 1; // Number of constraints
+      int64_t ndims = 3; // Number of dimensions
+      double ubvec[] = { real_t( 1.05 ) }; // imbalance tolerance
+      int64_t nparts = int64_c( numberOfProcesses ); // number of subdomains
+      MPI_Comm comm = MPIManager::instance()->comm();
+      std::vector<double> tpwgts( uint_c(nparts * ncon), 1.0 / double_c( nparts ) ); // vertex weight fraction that is stored in a subdomain
+      int64_t options[] = { int64_t( 1 ), int64_t( 0 ), int64_t( 23 ), int64_t( 1 ) };
+
+      // add dummy element to circumvent null pointer check if less blocks than processes
+      adjncy.resize( std::max( adjncy.size(), size_t(1) ) );
+      part.resize( std::max( part.size(), size_t(1) ) );
+      vwgt.resize( std::max( vwgt.size(), size_t(1) ) );
+
+      int64_t edgecut = 0;
+      int metisResult = core::METIS_ERROR;
+
+      switch( algorithm_ )
+      {
+      case PARMETIS_PART_GEOM_KWAY:
+         parmetisTimer.start();
+         metisResult = core::ParMETIS_V3_PartGeomKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ndims, ptr( xyz ), &ncon, &nparts, ptr( tpwgts ), ubvec, options, &edgecut, ptr( part ), &comm );
+         parmetisTimer.end();
+         break;
+      case PARMETIS_PART_KWAY:
+         parmetisTimer.start();
+         metisResult = core::ParMETIS_V3_PartKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ubvec, options, &edgecut, ptr( part ), &comm );
+         parmetisTimer.end();
+         break;
+      }
+
+      if( metisResult != core::METIS_OK )
+         WALBERLA_ABORT("ParMetis failed!");
+
+      std::vector< int64_t > parts = mpi::allGatherv( part, comm );
+
+      WALBERLA_ASSERT_EQUAL( parts.size(), numBlocks );
+
+      for( uint_t i = 0; i < numBlocks; ++i )
+         blocks[i]->assignTargetProcess( uint_c( parts[i] ) );
+
+      globalTimer.end();
+      WALBERLA_LOG_INFO_ON_ROOT( "ParMetis partitioning finished for level " << level << " successfully after " << globalTimer.last() << " s (ParMetis took " << parmetisTimer.last() << " s = " << std::setprecision(2) << parmetisTimer.last() / globalTimer.last() * 100.0 << "%) with an edge cut of " << edgecut );
+   }
+
+   WALBERLA_LOG_INFO_ON_ROOT( "ParMetis partitioning finished after " << globalTimer.total() << " s (ParMetis took " << parmetisTimer.total() << " s = " << std::setprecision(2) << parmetisTimer.total() / globalTimer.total() * 100.0 << "%)" );
+
+   //count number of used processes
+   std::vector<bool> processUsed( numberOfProcesses, false );
+   for(auto blockIt = forest.begin(); blockIt != forest.end(); ++blockIt)
+   {
+      processUsed[ blockIt->getTargetProcess() ] = true;
+   }
+
+   return uint_c(std::count( processUsed.begin(), processUsed.end(), true ));
+}
+
+StaticLevelwiseParMetis::Algorithm StaticLevelwiseParMetis::stringToAlgorithm( std::string s )
+{
+   boost::algorithm::to_upper( s );
+   boost::algorithm::trim( s );
+
+   if( s == "PART_GEOM_KWAY" )
+      return PARMETIS_PART_GEOM_KWAY;
+   else if( s == "PART_KWAY" )
+      return PARMETIS_PART_KWAY;
+   else
+      WALBERLA_ABORT( "Illegal ParMetis algorithm specified! Valid choices are: \"PART_GEOM_KWAY\" or \"PART_KWAY\"." );
+}
+
+
+StaticLevelwiseParMetis::WeightsToUse StaticLevelwiseParMetis::stringToWeightsToUse( std::string s )
+{
+   boost::algorithm::to_upper( s );
+   boost::algorithm::trim( s );
+
+   if( s == "NO_WEIGHTS" )
+      return PARMETIS_NO_WEIGHTS;
+   else if( s == "EDGE_WEIGHTS" )
+      return PARMETIS_EDGE_WEIGHTS;
+   else if( s == "VERTEX_WEIGHTS" )
+      return PARMETIS_VERTEX_WEIGHTS;
+   else if( s == "BOTH_WEIGHTS" )
+      return PARMETIS_BOTH_WEIGHTS;
+   else
+      WALBERLA_ABORT( "Illegal ParMetis weights usage specified! Valid choices are: \"NO_WEIGHTS\", \"EDGE_WEIGHTS\", \"VERTEX_WEIGHTS\", or \"BOTH_WEIGHTS\"." );
+}
+
+
+} // namespace blockforest
+} // namespace walberla
diff --git a/src/blockforest/loadbalancing/StaticParMetis.h b/src/blockforest/loadbalancing/StaticParMetis.h
new file mode 100644
index 0000000000000000000000000000000000000000..deb0a41b6a69e4a67a5ba7a501ca769c33f01c7f
--- /dev/null
+++ b/src/blockforest/loadbalancing/StaticParMetis.h
@@ -0,0 +1,69 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file StaticParMetis.h
+//! \ingroup blockforest
+//! \author Christian Godenschwager <christian.godenschwager@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/SetupBlockForest.h"
+
+#include "core/DataTypes.h"
+#include "core/math/Vector3.h"
+#include "core/mpi/MPIWrapper.h"
+
+#include <map>
+#include <vector>
+
+namespace walberla {
+namespace blockforest {
+
+
+class StaticLevelwiseParMetis
+{
+public:
+   enum Algorithm    { PARMETIS_PART_GEOM_KWAY, PARMETIS_PART_KWAY };
+   enum WeightsToUse { PARMETIS_NO_WEIGHTS = 0, PARMETIS_EDGE_WEIGHTS = 1, PARMETIS_VERTEX_WEIGHTS = 2, PARMETIS_BOTH_WEIGHTS = 3 };
+
+   typedef std::pair< const SetupBlock *, const SetupBlock * > BlockPair;
+   typedef boost::function<void (const std::vector< BlockPair > & edges, std::vector< int64_t > & weights ) > CommWeightFunction;
+
+   StaticLevelwiseParMetis( const Algorithm algorithm = PARMETIS_PART_GEOM_KWAY )
+      : algorithm_( algorithm ), weightsToUse_( PARMETIS_VERTEX_WEIGHTS ) {}
+
+   StaticLevelwiseParMetis( const CommWeightFunction & commWeightFunction, 
+                            const Algorithm algorithm = PARMETIS_PART_GEOM_KWAY,
+                            const WeightsToUse weightsToUse = PARMETIS_BOTH_WEIGHTS )
+      : algorithm_( algorithm ), weightsToUse_( weightsToUse ), commWeightFunction_( commWeightFunction ) {}
+
+   uint_t operator()( SetupBlockForest & forest, const uint_t numberOfProcesses, const memory_t perProcessMemoryLimit ) const;
+
+   bool edgeWeightsUsed()   const { return ( weightsToUse_ == PARMETIS_EDGE_WEIGHTS   ) || ( weightsToUse_ == PARMETIS_BOTH_WEIGHTS ); }
+   bool vertexWeightsUsed() const { return ( weightsToUse_ == PARMETIS_VERTEX_WEIGHTS ) || ( weightsToUse_ == PARMETIS_BOTH_WEIGHTS ); }
+
+   static Algorithm    stringToAlgorithm( std::string s );
+   static WeightsToUse stringToWeightsToUse( std::string s );
+
+protected:
+   Algorithm algorithm_;
+   WeightsToUse weightsToUse_;
+   CommWeightFunction commWeightFunction_;
+};
+
+
+
+} // namespace blockforest
+} // namespace walberla
\ No newline at end of file
diff --git a/src/core/mpi/MPIHelper.cpp b/src/core/mpi/MPIHelper.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8822c198d4ba19d6f76005c8a34aa6d374245970
--- /dev/null
+++ b/src/core/mpi/MPIHelper.cpp
@@ -0,0 +1,51 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file MPIHelper.cpp
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+#include "MPIHelper.h"
+
+namespace walberla {
+namespace mpi {
+
+int translateRank(const MPI_Comm srcComm, const MPI_Comm destComm, const int srcRank)
+{
+   int destRank = -1;
+   MPI_Group srcGroup, destGroup;
+   MPI_Comm_group(srcComm, &srcGroup);
+   MPI_Comm_group(destComm, &destGroup);
+   MPI_Group_translate_ranks(srcGroup, 1, const_cast<int*>(&srcRank), destGroup, &destRank);
+   MPI_Group_free(&srcGroup);
+   MPI_Group_free(&destGroup);
+   return destRank;
+}
+
+std::vector<int> translateRank(const MPI_Comm srcComm, const MPI_Comm destComm, const std::vector<int>& srcRank)
+{
+   std::vector<int> destRank(srcRank.size(), -1);
+   MPI_Group srcGroup, destGroup;
+   MPI_Comm_group(srcComm, &srcGroup);
+   MPI_Comm_group(destComm, &destGroup);
+   MPI_Group_translate_ranks(srcGroup, int_c(srcRank.size()), const_cast<int*>(&srcRank[0]), destGroup, &destRank[0]);
+   MPI_Group_free(&srcGroup);
+   MPI_Group_free(&destGroup);
+   return destRank;
+}
+
+} // namespace mpi
+} // namespace walberla
diff --git a/src/core/mpi/MPIHelper.h b/src/core/mpi/MPIHelper.h
new file mode 100644
index 0000000000000000000000000000000000000000..6c0d6f943494b847dbaae73de42e35f2909983b2
--- /dev/null
+++ b/src/core/mpi/MPIHelper.h
@@ -0,0 +1,34 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file MPIHelper.h
+//! \author Sebastian Eibl <sebastian.eibl@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "MPIWrapper.h"
+
+#include <vector>
+
+namespace walberla {
+namespace mpi {
+
+int translateRank(const MPI_Comm srcComm, const MPI_Comm destComm, const int srcRank);
+std::vector<int> translateRank(const MPI_Comm srcComm, const MPI_Comm destComm, const std::vector<int>& srcRank);
+
+} // namespace mpi
+} // namespace walberla