diff --git a/src/blockforest/loadbalancing/DynamicParMetis.cpp b/src/blockforest/loadbalancing/DynamicParMetis.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a612d47759d6e5425c9b699ef290cd1237387c7b --- /dev/null +++ b/src/blockforest/loadbalancing/DynamicParMetis.cpp @@ -0,0 +1,312 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file DynamicParMetis.cpp +//! \ingroup blockforest +//! \author Christian Godenschwager <christian.godenschwager@fau.de> +// +//====================================================================================================================== + +#include "DynamicParMetis.h" + +#include "core/load_balancing/ParMetisWrapper.h" + +#include "core/logging/Logging.h" + +#include "core/mpi/BufferSystem.h" +#include "core/mpi/MPIHelper.h" +#include "core/mpi/MPIManager.h" +#include "core/mpi/Gather.h" +#include "core/mpi/Gatherv.h" + +#include "core/timing/Timer.h" + +#include <boost/algorithm/string/case_conv.hpp> +#include <boost/algorithm/string/trim.hpp> + +namespace walberla { +namespace blockforest { + +std::pair<uint_t, uint_t> getBlockSequenceRange( const PhantomBlockForest & phantomForest, MPI_Comm comm ) +{ + const uint_t rank = uint_c(mpi::translateRank(mpi::MPIManager::instance()->comm(), comm, MPIManager::instance()->rank())); + + uint_t numLocalBlocks = phantomForest.getNumberOfBlocks(); + + size_t sequenceStartOnProcess = 0; + MPI_Exscan( &numLocalBlocks, &sequenceStartOnProcess, 1, MPITrait<uint_t>::type(), MPI_SUM, comm ); + if( rank == 0 ) + sequenceStartOnProcess = uint_t( 0 ); + + return std::make_pair( sequenceStartOnProcess, sequenceStartOnProcess + numLocalBlocks ); +} + +std::map< blockforest::BlockID, uint_t > getBlockIdToSequenceMapping( const PhantomBlockForest & phantomForest, const std::pair<uint_t, uint_t> & blockSequenceRange, MPI_Comm comm ) +{ + std::map< blockforest::BlockID, uint_t > mapping; + + const auto & blockMap = phantomForest.getBlockMap(); + uint_t sequenceId = blockSequenceRange.first; + for( auto it = blockMap.begin(); it != blockMap.end(); ++it ) + mapping.insert( std::make_pair( it->first, sequenceId++ ) ); + WALBERLA_ASSERT_EQUAL( sequenceId, blockSequenceRange.second ); + + const std::vector<uint_t> neighborProcesses = phantomForest.getNeighboringProcesses(); + + mpi::BufferSystem bs( comm ); + + for( auto it = neighborProcesses.begin(); it != neighborProcesses.end(); ++it ) + bs.sendBuffer( mpi::translateRank(mpi::MPIManager::instance()->comm(), comm, int_c(*it)) ) << mapping; + + bs.setReceiverInfoFromSendBufferState( false, true ); + + bs.sendAll(); + + for( auto it = bs.begin(); it != bs.end(); ++it ) + { + std::map< blockforest::BlockID, uint_t > remoteMapping; + it.buffer() >> remoteMapping; + + for( auto remoteIt = remoteMapping.begin(); remoteIt != remoteMapping.end(); ++remoteIt ) + { + auto result = mapping.insert( *remoteIt ); + WALBERLA_UNUSED( result ); + WALBERLA_ASSERT( result.second, "BlockId should be unique!" ); + } + } + + return mapping; +} + +template< typename T > +T * ptr( std::vector<T> & v ) +{ + if( v.empty() ) + return NULL; + else + return &( v.front() ); +} + +typedef uint_t idx_t; + +bool DynamicParMetis::operator()( std::vector< std::pair< const PhantomBlock *, uint_t > > & targetProcess, + std::set< uint_t > & processesToRecvFrom, + const PhantomBlockForest & phantomForest, + const uint_t /*iteration*/ ) const +{ + WcTimer globalTimer; + WcTimer parmetisTimer; + globalTimer.start(); + + //create new communicator which excludes processes which do not have blocks + MPI_Comm subComm; + MPI_Group allGroup, subGroup; + MPI_Comm_group( MPIManager::instance()->comm(), &allGroup ); + std::vector<int> ranks; + if (phantomForest.getNumberOfBlocks() > 0) + ranks.push_back( MPIManager::instance()->rank() ); + ranks = mpi::allGatherv( ranks, MPIManager::instance()->comm() ); + auto numSubProcesses = ranks.size(); + WALBERLA_UNUSED(numSubProcesses); + MPI_Group_incl(allGroup, int_c(ranks.size()), &ranks[0], &subGroup); + MPI_Comm_create( MPIManager::instance()->comm(), subGroup, &subComm); + + int64_t edgecut = 0; + std::vector<int64_t> part( phantomForest.getNumberOfBlocks(), int64_c( MPIManager::instance()->rank() ) ); + + if (subComm != MPI_COMM_NULL) + { + const std::pair<uint_t, uint_t> blockSequenceRange = getBlockSequenceRange( phantomForest, subComm ); + const std::map< blockforest::BlockID, uint_t > mapping = getBlockIdToSequenceMapping( phantomForest, blockSequenceRange, subComm ); + + std::vector<int64_t> vtxdist = mpi::allGather( int64_c( blockSequenceRange.second ), subComm ); + vtxdist.insert( vtxdist.begin(), uint_t( 0 ) ); + + std::vector<int64_t> adjncy, xadj, vsize, vwgt, adjwgt; + std::vector<double> xyz; + + for( auto it = targetProcess.begin(); it != targetProcess.end(); ++it ) + { + xadj.push_back( int64_c( adjncy.size() ) ); + const PhantomBlock & block = *( it->first ); + auto bi = block.getData< DynamicParMetisBlockInfo >(); + + switch( edgeSource_ ) + { + case PARMETIS_EDGES_FROM_FOREST: + for( auto nit = block.getNeighborhood().begin(); nit != block.getNeighborhood().end(); ++nit ) + { + auto mapIt = mapping.find( nit->getId() ); + WALBERLA_ASSERT_UNEQUAL( mapIt, mapping.end(), "BlockId of neighbor is not contained in sequence mapping!" ); + adjncy.push_back( int64_c( mapIt->second ) ); + auto edgeWeightIt = bi.getEdgeWeights().find( nit->getId() ); + adjwgt.push_back( edgeWeightIt == bi.getEdgeWeights().end() ? int64_t( 0 ) : edgeWeightIt->second ); + } + break; + case PARMETIS_EDGES_FROM_EDGE_WEIGHTS: + for( auto edgeIt = bi.getEdgeWeights().begin(); edgeIt != bi.getEdgeWeights().end(); ++edgeIt ) + { + auto mapIt = mapping.find( edgeIt->first ); + WALBERLA_ASSERT_UNEQUAL( mapIt, mapping.end(), "BlockId of neighbor is not contained in sequence mapping!" ); + adjncy.push_back( int64_c( mapIt->second ) ); + adjwgt.push_back( edgeIt->second ); + } + } + vwgt.push_back( bi.getVertexWeight() ); + vsize.push_back( bi.getVertexSize() ); + xyz.push_back( bi.getVertexCoords()[0] ); + xyz.push_back( bi.getVertexCoords()[1] ); + xyz.push_back( bi.getVertexCoords()[2] ); + } + xadj.push_back( int64_c( adjncy.size() ) ); + + WALBERLA_ASSERT_EQUAL( vtxdist.size(), numSubProcesses + uint_t( 1 ) ); + WALBERLA_ASSERT_EQUAL( xadj.size(), phantomForest.getNumberOfBlocks() + 1 ); + WALBERLA_ASSERT_EQUAL( vwgt.size(), phantomForest.getNumberOfBlocks() ); + WALBERLA_ASSERT_EQUAL( vsize.size(), phantomForest.getNumberOfBlocks() ); + WALBERLA_ASSERT_EQUAL( adjncy.size(), adjwgt.size() ); + + int64_t wgtflag = weightsToUse_; + int64_t numflag = 0; // C-style ordering + int64_t ncon = 1; // Number of constraints + int64_t ndims = 3; // Number of dimensions + double ubvec[] = { real_t( 1.05 ) }; // imbalance tolerance + int64_t nparts = int64_c( MPIManager::instance()->numProcesses() ); // number of subdomains + double ipc2redist = real_t( 1000000.0 ); // compute repartitioning with low edge cut (set lower (down to 0.000001) to get minimal repartitioning ) + MPI_Comm comm = subComm; //MPIManager::instance()->comm(); + std::vector<double> tpwgts( uint_c(nparts * ncon), 1.0 / double_c( nparts ) ); // vertex weight fraction that is stored in a subdomain + int64_t options[] = { int64_t( 1 ), int64_t( 0 ), int64_t( 23 ), int64_t( 1 ) }; + + int metisResult = core::METIS_OK; + + switch( algorithm_ ) + { + case PARMETIS_PART_GEOM_KWAY: + parmetisTimer.start(); + metisResult = core::ParMETIS_V3_PartGeomKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ndims, ptr( xyz ), &ncon, &nparts, ptr( tpwgts ), ubvec, options, &edgecut, ptr( part ), &comm ); + parmetisTimer.end(); + break; + case PARMETIS_PART_KWAY: + parmetisTimer.start(); + metisResult = core::ParMETIS_V3_PartKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ubvec, options, &edgecut, ptr( part ), &comm ); + parmetisTimer.end(); + break; + case PARMETIS_ADAPTIVE_REPART: + parmetisTimer.start(); + metisResult = core::ParMETIS_V3_AdaptiveRepart( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( vsize ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ubvec, &ipc2redist, options, &edgecut, ptr( part ), &comm ); + parmetisTimer.end(); + break; + case PARMETIS_REFINE_KWAY: + parmetisTimer.start(); + metisResult = core::ParMETIS_V3_RefineKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ubvec, options, &edgecut, ptr( part ), &comm ); + parmetisTimer.end(); + break; + } + + if( metisResult != core::METIS_OK ) + WALBERLA_ABORT("ParMetis failed!"); + } + + // Determine which processes will receive a block from this process + std::vector<uint8_t> isSendingBlockToProcess( uint_c(MPIManager::instance()->numProcesses()), uint8_t( 0 ) ); + for( auto it = part.begin(); it != part.end(); ++it ) + { + WALBERLA_ASSERT_GREATER_EQUAL( *it, 0 ); + WALBERLA_ASSERT_LESS( *it, MPIManager::instance()->numProcesses() ); + isSendingBlockToProcess[uint_c(*it)] = uint8_t( 1 ); + } + isSendingBlockToProcess[uint_c(MPIManager::instance()->rank())] = uint8_t( 0 ); + + std::vector<uint8_t> isReceivingBlockFromProcess( uint_c(MPIManager::instance()->numProcesses()), uint8_t( 0 ) ); + MPI_Alltoall( ptr( isSendingBlockToProcess ), 1, MPITrait<uint8_t>::type(), ptr( isReceivingBlockFromProcess ), 1, MPITrait<uint8_t>::type(), MPIManager::instance()->comm() ); + for( uint_t i = 0; i < isReceivingBlockFromProcess.size(); ++i ) + if( isReceivingBlockFromProcess[i] == uint8_t( 1 ) ) + processesToRecvFrom.insert( i ); + + // assign target processes to blocks + + uint_t i = 0; + for( auto it = targetProcess.begin(); it != targetProcess.end(); ++it, ++i ) + { + it->second = uint_c(part[i]); + } + + globalTimer.end(); + if (subComm != MPI_COMM_NULL) + { + int rank = -1; + MPI_Comm_rank( subComm, &rank); + if (rank == 0) WALBERLA_LOG_INFO("ParMetis finished successfully after " << globalTimer.last() << " s (ParMetis took " << parmetisTimer.last() << " s = " << std::setprecision(2) << parmetisTimer.last() / globalTimer.last() * 100.0 << "%) with an edge - cut of " << edgecut ); + } + + MPI_Group_free(&allGroup); + MPI_Group_free(&subGroup); + //MPI_Comm_free(&subComm); + + return false; // no further iterations +} + +DynamicParMetis::Algorithm DynamicParMetis::stringToAlgorithm( std::string s ) +{ + boost::algorithm::to_upper( s ); + boost::algorithm::trim( s ); + + if( s == "PART_GEOM_KWAY" ) + return PARMETIS_PART_GEOM_KWAY; + else if( s == "PART_KWAY" ) + return PARMETIS_PART_KWAY; + else if( s == "PART_ADAPTIVE_REPART" ) + return PARMETIS_ADAPTIVE_REPART; + else if( s == "REFINE_KWAY" ) + return PARMETIS_REFINE_KWAY; + else + WALBERLA_ABORT( "Illegal ParMetis algorithm specified! Valid choices are: \"PART_GEOM_KWAY\", \"PART_KWAY\", \"PART_ADAPTIVE_REPART\", or \"REFINE_KWAY\"." ); +} + + +DynamicParMetis::WeightsToUse DynamicParMetis::stringToWeightsToUse( std::string s ) +{ + boost::algorithm::to_upper( s ); + boost::algorithm::trim( s ); + + if( s == "NO_WEIGHTS" ) + return PARMETIS_NO_WEIGHTS; + else if( s == "EDGE_WEIGHTS" ) + return PARMETIS_EDGE_WEIGHTS; + else if( s == "VERTEX_WEIGHTS" ) + return PARMETIS_VERTEX_WEIGHTS; + else if( s == "BOTH_WEIGHTS" ) + return PARMETIS_BOTH_WEIGHTS; + else + WALBERLA_ABORT( "Illegal ParMetis weights usage specified! Valid choices are: \"NO_WEIGHTS\", \"EDGE_WEIGHTS\", \"VERTEX_WEIGHTS\", or \"BOTH_WEIGHTS\"." ); +} + + +DynamicParMetis::EdgeSource DynamicParMetis::stringToEdgeSource( std::string s ) +{ + boost::algorithm::to_upper( s ); + boost::algorithm::trim( s ); + + if( s == "EDGES_FROM_FOREST" ) + return PARMETIS_EDGES_FROM_FOREST; + else if( s == "EDGES_FROM_EDGE_WEIGHTS" ) + return PARMETIS_EDGES_FROM_EDGE_WEIGHTS; + else + WALBERLA_ABORT( "Illegal ParMetis weights usage specified! Valid choices are: \"EDGES_FROM_FOREST\" or \"EDGES_FROM_EDGE_WEIGHTS\"" ); +} + + +} // namespace blockforest +} // namespace walberla diff --git a/src/blockforest/loadbalancing/DynamicParMetis.h b/src/blockforest/loadbalancing/DynamicParMetis.h new file mode 100644 index 0000000000000000000000000000000000000000..a39bc7c76238913d72dd3fb57d2e1dbe8dd2b298 --- /dev/null +++ b/src/blockforest/loadbalancing/DynamicParMetis.h @@ -0,0 +1,133 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file DynamicParMetis.h +//! \ingroup blockforest +//! \author Christian Godenschwager <christian.godenschwager@fau.de> +// +//====================================================================================================================== + +#include "blockforest/PhantomBlockForest.h" + +#include "core/DataTypes.h" +#include "core/math/Vector3.h" +#include "core/mpi/MPIWrapper.h" + +#include <map> + +namespace walberla { +namespace blockforest { + +std::pair<uint_t, uint_t> getBlockSequenceRange( const PhantomBlockForest & phantomForest, MPI_Comm comm ); + +std::map< blockforest::BlockID, uint_t > getBlockIdToSequenceMapping( const PhantomBlockForest & phantomForest, const std::pair<uint_t, uint_t> & blockSequenceRange, MPI_Comm comm ); + + +class DynamicParMetis +{ +public: + enum Algorithm { PARMETIS_PART_GEOM_KWAY, PARMETIS_PART_KWAY, PARMETIS_ADAPTIVE_REPART, PARMETIS_REFINE_KWAY }; + enum WeightsToUse { PARMETIS_NO_WEIGHTS = 0, PARMETIS_EDGE_WEIGHTS = 1, PARMETIS_VERTEX_WEIGHTS = 2, PARMETIS_BOTH_WEIGHTS = 3 }; + enum EdgeSource { PARMETIS_EDGES_FROM_FOREST, PARMETIS_EDGES_FROM_EDGE_WEIGHTS }; + + DynamicParMetis( const Algorithm algorithm = PARMETIS_PART_GEOM_KWAY, + const WeightsToUse weightsToUse = PARMETIS_BOTH_WEIGHTS, + const EdgeSource edgeSource = PARMETIS_EDGES_FROM_EDGE_WEIGHTS ) + : algorithm_( algorithm ), weightsToUse_( weightsToUse ), edgeSource_( edgeSource ) {} + + bool operator()( std::vector< std::pair< const PhantomBlock *, uint_t > > & targetProcess, + std::set< uint_t > & processesToRecvFrom, + const PhantomBlockForest & phantomForest, + const uint_t iteration ) const; + + bool edgeWeightsUsed() const { return ( weightsToUse_ == PARMETIS_EDGE_WEIGHTS ) || ( weightsToUse_ == PARMETIS_BOTH_WEIGHTS ); } + bool vertexWeightsUsed() const { return ( weightsToUse_ == PARMETIS_VERTEX_WEIGHTS ) || ( weightsToUse_ == PARMETIS_BOTH_WEIGHTS ); } + bool vertexSizeUsed() const { return algorithm_ == PARMETIS_ADAPTIVE_REPART; } + + static Algorithm stringToAlgorithm( std::string s ); + static WeightsToUse stringToWeightsToUse( std::string s ); + static EdgeSource stringToEdgeSource( std::string s ); + +protected: + Algorithm algorithm_; + WeightsToUse weightsToUse_; + EdgeSource edgeSource_; +}; + +class DynamicParMetisBlockInfo +{ +public: + + typedef int64_t weight_t; + typedef int64_t vsize_t; + + DynamicParMetisBlockInfo( const weight_t vertexWeight ) + : vertexWeight_(vertexWeight), vertexSize_(1) + { } + + DynamicParMetisBlockInfo( mpi::RecvBuffer & buffer ) + { + buffer >> vertexWeight_ >> vertexSize_ >> vertexCoords_ >> edgeWeights_; + } + + weight_t getVertexWeight() const { return vertexWeight_; } + void setVertexWeight( const weight_t vertexWeight ) { vertexWeight_ = vertexWeight; } + + vsize_t getVertexSize() const { return vertexSize_; } + void setVertexSize( const vsize_t size ) { vertexSize_ = size; } + + const Vector3<double> & getVertexCoords() const { return vertexCoords_; } + void setVertexCoords( const Vector3<double> & p ) { vertexCoords_ = p; } + + void setEdgeWeight( const blockforest::BlockID & blockId, const weight_t edgeWeight ) { edgeWeights_[blockId] = edgeWeight; } + void setEdgeWeights( const std::map< blockforest::BlockID, weight_t > & edgeWeights ) { edgeWeights_ = edgeWeights; } + + const std::map< blockforest::BlockID, weight_t > & getEdgeWeights() const { return edgeWeights_; } + + void toBuffer( mpi::SendBuffer & buffer ) + { + buffer << vertexWeight_ << vertexSize_ << vertexCoords_ << edgeWeights_; + } + +private: + + weight_t vertexWeight_; /// Defines the weight of a block + vsize_t vertexSize_; /// Defines the cost of rebalancing a block + Vector3<double> vertexCoords_; /// Defines where the block is located in space. Needed by some ParMetis algorithms + std::map< blockforest::BlockID, weight_t > edgeWeights_; /// Defines the cost of communication with other blocks +}; + + +struct DynamicParMetisBlockInfoPackUnpack +{ + void operator()( mpi::SendBuffer & buffer, const PhantomBlock & block ) + { + block.getData< DynamicParMetisBlockInfo >().toBuffer( buffer ); + } + + void operator()( mpi::RecvBuffer & buffer, const PhantomBlock &, boost::any & data ) + { + data = DynamicParMetisBlockInfo( buffer ); + } +}; + + + + + + + +} // namespace blockforest +} // namespace walberla diff --git a/src/blockforest/loadbalancing/StaticParMetis.cpp b/src/blockforest/loadbalancing/StaticParMetis.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f3ccb4e452c7fca40ba33bdcf0f748cec264c28d --- /dev/null +++ b/src/blockforest/loadbalancing/StaticParMetis.cpp @@ -0,0 +1,238 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file StaticParMetis.cpp +//! \ingroup blockforest +//! \author Christian Godenschwager <christian.godenschwager@fau.de> +// +//====================================================================================================================== + +#include "StaticParMetis.h" + +#include "core/load_balancing/ParMetisWrapper.h" + +#include "core/logging/Logging.h" + +#include "core/mpi/BufferSystem.h" +#include "core/mpi/MPIManager.h" +#include "core/mpi/Gather.h" +#include "core/mpi/Gatherv.h" +#include "core/mpi/Tokenizing.h" + +#include "core/timing/Timer.h" + +#include <boost/algorithm/string/case_conv.hpp> +#include <boost/algorithm/string/trim.hpp> + +namespace walberla { +namespace blockforest { + +template< typename T > +T * ptr( std::vector<T> & v ) +{ + if( v.empty() ) + return NULL; + else + return &( v.front() ); +} + +typedef uint_t idx_t; + + +uint_t StaticLevelwiseParMetis::operator()( SetupBlockForest & forest, const uint_t numberOfProcesses, const memory_t /*perProcessMemoryLimit*/ ) const +{ + WcTimer globalTimer, parmetisTimer; + + int numRunnerProcesses = MPIManager::instance()->numProcesses(); // Number of processes running ParMetis (!= number of processes the partition is computed for) + int rank = MPIManager::instance()->rank(); + + for( uint_t level = forest.getMinLevel(); level <= forest.getMaxLevel(); ++level ) + { + globalTimer.start(); + WALBERLA_LOG_INFO_ON_ROOT( "Running static ParMetis partitioning on blocks of level " << level ); + + std::vector< SetupBlock* > blocks; + forest.getBlocks( blocks, level ); + + const uint_t numBlocks = blocks.size(); + + if( numBlocks < numberOfProcesses ) + { + for( uint_t i = 0; i < numBlocks; ++i ) + blocks[i]->assignTargetProcess( i ); + + WALBERLA_LOG_INFO_ON_ROOT( "Less blocks on level " << level << " (" << numBlocks << ") than processes (" << numberOfProcesses << "). Using simple load balancing." ); + + globalTimer.end(); + continue; + } + + for( uint_t i = 0; i < uint_c( blocks.size() ); ++i ) + { + blocks[i]->setIndex( i ); + } + + const uint_t chunkSize = uint_c( std::ceil( real_c( numBlocks ) / real_c( numRunnerProcesses ) ) ); + const uint_t chunkBegin = std::min( chunkSize * uint_c( rank ), numBlocks ); + const uint_t chunkEnd = std::min( chunkSize * uint_c( rank + 1 ), numBlocks ); + + std::vector<int64_t> vtxdist; + vtxdist.reserve( uint_c(numRunnerProcesses) + uint_t(1) ); + for( uint_t i = 0; i < uint_c(numRunnerProcesses); ++i ) + vtxdist.push_back( int64_c( std::min( i * chunkSize, numBlocks ) ) ); + vtxdist.push_back( int64_t( forest.getNumberOfBlocks() ) ); + + std::vector<int64_t> adjncy, xadj, vwgt, adjwgt; + std::vector<double> xyz; + std::vector< BlockPair > blockPairs; + + for( uint_t i = chunkBegin; i < chunkEnd; ++i ) + { + const SetupBlock & block = *blocks[i]; + + xadj.push_back( int64_c( adjncy.size() ) ); + + + for( auto nit = block.getNeighborhood().begin(); nit != block.getNeighborhood().end(); ++nit ) + { + if( (*nit)->getLevel() != level ) + continue; // ignore neighbor blocks on other levels + + adjncy.push_back( int64_c( (*nit)->getIndex() ) ); + + if(weightsToUse_ == PARMETIS_EDGE_WEIGHTS || weightsToUse_ == PARMETIS_BOTH_WEIGHTS) + { + blockPairs.push_back( BlockPair( blocks[i], *nit ) ); + } + } + + vwgt.push_back( int64_c( block.getWorkload() ) ); + Vector3<real_t> center = block.getAABB().center(); + xyz.push_back( center[0] ); + xyz.push_back( center[1] ); + xyz.push_back( center[2] ); + } + xadj.push_back( int64_c( adjncy.size() ) ); + + if( weightsToUse_ == PARMETIS_EDGE_WEIGHTS || weightsToUse_ == PARMETIS_BOTH_WEIGHTS ) + { + WALBERLA_ASSERT_EQUAL( adjncy.size(), blockPairs.size() ); + adjwgt.resize( blockPairs.size(), int64_t(1) ); + commWeightFunction_( blockPairs, adjwgt ); + + if( adjwgt.empty() ) + adjwgt.push_back( int64_t(0) ); // dummy value to circumvent dumb NULL pointer check of ParMetis + } + + WALBERLA_ASSERT_EQUAL( vtxdist.size(), uint_c(numRunnerProcesses) + uint_t( 1 ) ); + WALBERLA_ASSERT_EQUAL( uint_c(vtxdist[uint_c(rank)]), chunkBegin ); + WALBERLA_ASSERT_EQUAL( uint_c(vtxdist[uint_c(rank + 1)]), chunkEnd ); + WALBERLA_ASSERT_EQUAL( xadj.size(), (chunkEnd - chunkBegin) + 1 ); + WALBERLA_ASSERT_EQUAL( vwgt.size(), chunkEnd - chunkBegin ); + + std::vector<int64_t> part( chunkEnd - chunkBegin, int64_c( MPIManager::instance()->rank() ) ); + + int64_t wgtflag = weightsToUse_; + int64_t numflag = 0; // C-style ordering + int64_t ncon = 1; // Number of constraints + int64_t ndims = 3; // Number of dimensions + double ubvec[] = { real_t( 1.05 ) }; // imbalance tolerance + int64_t nparts = int64_c( numberOfProcesses ); // number of subdomains + MPI_Comm comm = MPIManager::instance()->comm(); + std::vector<double> tpwgts( uint_c(nparts * ncon), 1.0 / double_c( nparts ) ); // vertex weight fraction that is stored in a subdomain + int64_t options[] = { int64_t( 1 ), int64_t( 0 ), int64_t( 23 ), int64_t( 1 ) }; + + // add dummy element to circumvent null pointer check if less blocks than processes + adjncy.resize( std::max( adjncy.size(), size_t(1) ) ); + part.resize( std::max( part.size(), size_t(1) ) ); + vwgt.resize( std::max( vwgt.size(), size_t(1) ) ); + + int64_t edgecut = 0; + int metisResult = core::METIS_ERROR; + + switch( algorithm_ ) + { + case PARMETIS_PART_GEOM_KWAY: + parmetisTimer.start(); + metisResult = core::ParMETIS_V3_PartGeomKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ndims, ptr( xyz ), &ncon, &nparts, ptr( tpwgts ), ubvec, options, &edgecut, ptr( part ), &comm ); + parmetisTimer.end(); + break; + case PARMETIS_PART_KWAY: + parmetisTimer.start(); + metisResult = core::ParMETIS_V3_PartKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ubvec, options, &edgecut, ptr( part ), &comm ); + parmetisTimer.end(); + break; + } + + if( metisResult != core::METIS_OK ) + WALBERLA_ABORT("ParMetis failed!"); + + std::vector< int64_t > parts = mpi::allGatherv( part, comm ); + + WALBERLA_ASSERT_EQUAL( parts.size(), numBlocks ); + + for( uint_t i = 0; i < numBlocks; ++i ) + blocks[i]->assignTargetProcess( uint_c( parts[i] ) ); + + globalTimer.end(); + WALBERLA_LOG_INFO_ON_ROOT( "ParMetis partitioning finished for level " << level << " successfully after " << globalTimer.last() << " s (ParMetis took " << parmetisTimer.last() << " s = " << std::setprecision(2) << parmetisTimer.last() / globalTimer.last() * 100.0 << "%) with an edge cut of " << edgecut ); + } + + WALBERLA_LOG_INFO_ON_ROOT( "ParMetis partitioning finished after " << globalTimer.total() << " s (ParMetis took " << parmetisTimer.total() << " s = " << std::setprecision(2) << parmetisTimer.total() / globalTimer.total() * 100.0 << "%)" ); + + //count number of used processes + std::vector<bool> processUsed( numberOfProcesses, false ); + for(auto blockIt = forest.begin(); blockIt != forest.end(); ++blockIt) + { + processUsed[ blockIt->getTargetProcess() ] = true; + } + + return uint_c(std::count( processUsed.begin(), processUsed.end(), true )); +} + +StaticLevelwiseParMetis::Algorithm StaticLevelwiseParMetis::stringToAlgorithm( std::string s ) +{ + boost::algorithm::to_upper( s ); + boost::algorithm::trim( s ); + + if( s == "PART_GEOM_KWAY" ) + return PARMETIS_PART_GEOM_KWAY; + else if( s == "PART_KWAY" ) + return PARMETIS_PART_KWAY; + else + WALBERLA_ABORT( "Illegal ParMetis algorithm specified! Valid choices are: \"PART_GEOM_KWAY\" or \"PART_KWAY\"." ); +} + + +StaticLevelwiseParMetis::WeightsToUse StaticLevelwiseParMetis::stringToWeightsToUse( std::string s ) +{ + boost::algorithm::to_upper( s ); + boost::algorithm::trim( s ); + + if( s == "NO_WEIGHTS" ) + return PARMETIS_NO_WEIGHTS; + else if( s == "EDGE_WEIGHTS" ) + return PARMETIS_EDGE_WEIGHTS; + else if( s == "VERTEX_WEIGHTS" ) + return PARMETIS_VERTEX_WEIGHTS; + else if( s == "BOTH_WEIGHTS" ) + return PARMETIS_BOTH_WEIGHTS; + else + WALBERLA_ABORT( "Illegal ParMetis weights usage specified! Valid choices are: \"NO_WEIGHTS\", \"EDGE_WEIGHTS\", \"VERTEX_WEIGHTS\", or \"BOTH_WEIGHTS\"." ); +} + + +} // namespace blockforest +} // namespace walberla diff --git a/src/blockforest/loadbalancing/StaticParMetis.h b/src/blockforest/loadbalancing/StaticParMetis.h new file mode 100644 index 0000000000000000000000000000000000000000..deb0a41b6a69e4a67a5ba7a501ca769c33f01c7f --- /dev/null +++ b/src/blockforest/loadbalancing/StaticParMetis.h @@ -0,0 +1,69 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file StaticParMetis.h +//! \ingroup blockforest +//! \author Christian Godenschwager <christian.godenschwager@fau.de> +// +//====================================================================================================================== + +#include "blockforest/SetupBlockForest.h" + +#include "core/DataTypes.h" +#include "core/math/Vector3.h" +#include "core/mpi/MPIWrapper.h" + +#include <map> +#include <vector> + +namespace walberla { +namespace blockforest { + + +class StaticLevelwiseParMetis +{ +public: + enum Algorithm { PARMETIS_PART_GEOM_KWAY, PARMETIS_PART_KWAY }; + enum WeightsToUse { PARMETIS_NO_WEIGHTS = 0, PARMETIS_EDGE_WEIGHTS = 1, PARMETIS_VERTEX_WEIGHTS = 2, PARMETIS_BOTH_WEIGHTS = 3 }; + + typedef std::pair< const SetupBlock *, const SetupBlock * > BlockPair; + typedef boost::function<void (const std::vector< BlockPair > & edges, std::vector< int64_t > & weights ) > CommWeightFunction; + + StaticLevelwiseParMetis( const Algorithm algorithm = PARMETIS_PART_GEOM_KWAY ) + : algorithm_( algorithm ), weightsToUse_( PARMETIS_VERTEX_WEIGHTS ) {} + + StaticLevelwiseParMetis( const CommWeightFunction & commWeightFunction, + const Algorithm algorithm = PARMETIS_PART_GEOM_KWAY, + const WeightsToUse weightsToUse = PARMETIS_BOTH_WEIGHTS ) + : algorithm_( algorithm ), weightsToUse_( weightsToUse ), commWeightFunction_( commWeightFunction ) {} + + uint_t operator()( SetupBlockForest & forest, const uint_t numberOfProcesses, const memory_t perProcessMemoryLimit ) const; + + bool edgeWeightsUsed() const { return ( weightsToUse_ == PARMETIS_EDGE_WEIGHTS ) || ( weightsToUse_ == PARMETIS_BOTH_WEIGHTS ); } + bool vertexWeightsUsed() const { return ( weightsToUse_ == PARMETIS_VERTEX_WEIGHTS ) || ( weightsToUse_ == PARMETIS_BOTH_WEIGHTS ); } + + static Algorithm stringToAlgorithm( std::string s ); + static WeightsToUse stringToWeightsToUse( std::string s ); + +protected: + Algorithm algorithm_; + WeightsToUse weightsToUse_; + CommWeightFunction commWeightFunction_; +}; + + + +} // namespace blockforest +} // namespace walberla \ No newline at end of file diff --git a/src/core/mpi/MPIHelper.cpp b/src/core/mpi/MPIHelper.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8822c198d4ba19d6f76005c8a34aa6d374245970 --- /dev/null +++ b/src/core/mpi/MPIHelper.cpp @@ -0,0 +1,51 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file MPIHelper.cpp +//! \author Sebastian Eibl <sebastian.eibl@fau.de> +// +//====================================================================================================================== + +#include "MPIHelper.h" + +namespace walberla { +namespace mpi { + +int translateRank(const MPI_Comm srcComm, const MPI_Comm destComm, const int srcRank) +{ + int destRank = -1; + MPI_Group srcGroup, destGroup; + MPI_Comm_group(srcComm, &srcGroup); + MPI_Comm_group(destComm, &destGroup); + MPI_Group_translate_ranks(srcGroup, 1, const_cast<int*>(&srcRank), destGroup, &destRank); + MPI_Group_free(&srcGroup); + MPI_Group_free(&destGroup); + return destRank; +} + +std::vector<int> translateRank(const MPI_Comm srcComm, const MPI_Comm destComm, const std::vector<int>& srcRank) +{ + std::vector<int> destRank(srcRank.size(), -1); + MPI_Group srcGroup, destGroup; + MPI_Comm_group(srcComm, &srcGroup); + MPI_Comm_group(destComm, &destGroup); + MPI_Group_translate_ranks(srcGroup, int_c(srcRank.size()), const_cast<int*>(&srcRank[0]), destGroup, &destRank[0]); + MPI_Group_free(&srcGroup); + MPI_Group_free(&destGroup); + return destRank; +} + +} // namespace mpi +} // namespace walberla diff --git a/src/core/mpi/MPIHelper.h b/src/core/mpi/MPIHelper.h new file mode 100644 index 0000000000000000000000000000000000000000..6c0d6f943494b847dbaae73de42e35f2909983b2 --- /dev/null +++ b/src/core/mpi/MPIHelper.h @@ -0,0 +1,34 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file MPIHelper.h +//! \author Sebastian Eibl <sebastian.eibl@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include "MPIWrapper.h" + +#include <vector> + +namespace walberla { +namespace mpi { + +int translateRank(const MPI_Comm srcComm, const MPI_Comm destComm, const int srcRank); +std::vector<int> translateRank(const MPI_Comm srcComm, const MPI_Comm destComm, const std::vector<int>& srcRank); + +} // namespace mpi +} // namespace walberla