Commit 7fe55a7f authored by Marcel Koch's avatar Marcel Koch
Browse files

add roundRobinVolume to Distributed balancer

this requires an MPI all-to-all communication, not sure if that could be handled otherwise
parent d6faaab4
......@@ -21,6 +21,7 @@
#include "hyteg/primitivestorage/loadbalancing/DistributedBalancer.hpp"
#include <algorithm>
#include <numeric>
#include <queue>
#include "core/DataTypes.h"
......@@ -378,6 +379,123 @@ MigrationInfo roundRobinInterval( PrimitiveStorage& storage, uint_t interval, ui
return migrationInfo;
}
MigrationInfo roundRobinVolume( PrimitiveStorage& storage, uint_t minRank, uint_t maxRank )
{
WALBERLA_CHECK_LESS_EQUAL( minRank, maxRank );
WALBERLA_CHECK( maxRank < uint_c( walberla::mpi::MPIManager::instance()->numProcesses() ),
"Cannot distribute to more than available processes." );
const uint_t numProcesses = walberla::mpi::MPIManager::instance()->numProcesses();
const uint_t numReceivingProcesses = maxRank - minRank + 1;
const uint_t rank = uint_c( walberla::mpi::MPIManager::instance()->rank() );
MigrationMap_T migrationMap;
uint_t numReceivingPrimitives = 0;
// Cache number of primitives per rank to optimize the performance of this function, by a lot.
std::vector< uint_t > numPrimitivesPerRank( numProcesses, 0 );
std::vector< uint_t > numReceivedPrimitivesPerRank( numProcesses, 0 );
// Set every primitive to an invalid rank to allow counting primitives per rank later on.
for ( const auto& it : storage.getPrimitiveIDs() )
{
migrationMap[it.getID()] = -1;
}
// Heuristic: let's assign primitives with consecutive IDs to the same process.
// Often, meshes are generated in a way that primitives with consecutive IDs are located next to each other.
auto setup_volume_ids = [&]( const auto& ids ) {
auto it = ids.begin();
for ( uint_t currentRank = 0; currentRank < numReceivingProcesses; currentRank++ )
{
uint_t numVolumesOnThisRank = 0;
while ( numVolumesOnThisRank < ids.size() / numReceivingProcesses )
{
migrationMap[it->getID()] = minRank + currentRank;
numVolumesOnThisRank++;
it++;
}
if ( currentRank < ids.size() % numReceivingProcesses )
{
migrationMap[it->getID()] = minRank + currentRank;
it++;
}
numPrimitivesPerRank[minRank + currentRank] =
ids.size() / numReceivingProcesses + ( currentRank < ids.size() % numReceivingProcesses );
}
WALBERLA_CHECK( it == ids.end() );
};
if ( storage.hasGlobalCells() )
{
setup_volume_ids( storage.getCellIDs() );
}
else
{
setup_volume_ids( storage.getFaceIDs() );
}
// We assign lower-dimensional primitives to a rank of their higher-dimensional neighbors.
// To equalize the weights a little, we choose the neighbor with least amount of primitives of the current type.
auto getHigherDimNeighbors = []( const auto& primitive, bool is_face ) {
if ( is_face )
{
return primitive->neighborCells();
}
else
{
return primitive->getHigherDimNeighbors();
}
};
auto setup_lower_dim_ids = [&]( const auto& primitives, bool is_face ) {
for ( const auto& it : primitives )
{
const auto pID = it.first;
const auto primitive = it.second;
std::map< uint_t, uint_t > rankPrimitiveCount;
for ( const auto& neighborPrimitive : getHigherDimNeighbors( primitive, is_face ) )
{
if ( storage.getPrimitiveRank( neighborPrimitive.getID() ) == rank )
{
const auto neighborRank = migrationMap[neighborPrimitive.getID()];
rankPrimitiveCount[neighborRank] = numPrimitivesPerRank[neighborRank];
}
}
auto leastFullNBRank = std::min_element( rankPrimitiveCount.begin(),
rankPrimitiveCount.end(),
[]( std::pair< uint_t, uint_t > a, std::pair< uint_t, uint_t > b ) {
return a.second < b.second;
} )
->first;
migrationMap[pID] = minRank + leastFullNBRank;
numPrimitivesPerRank[leastFullNBRank]++;
}
};
if ( storage.hasGlobalCells() )
{
setup_lower_dim_ids( storage.getFaces(), true );
}
setup_lower_dim_ids( storage.getEdges(), false );
setup_lower_dim_ids( storage.getVertices(), false );
auto communicate_receiving_primitives = [&]() {
auto mpi_type = walberla::MPITrait< uint_t >::type();
auto comm = walberla::mpi::MPIManager::instance()->comm();
std::fill( numReceivedPrimitivesPerRank.begin(), numReceivedPrimitivesPerRank.end(), 0 );
MPI_Alltoall( numPrimitivesPerRank.data(), 1, mpi_type, numReceivedPrimitivesPerRank.data(), 1, mpi_type, comm );
return std::accumulate( numReceivedPrimitivesPerRank.begin(), numReceivedPrimitivesPerRank.end(), uint_t{ 0 } );
};
numReceivingPrimitives = communicate_receiving_primitives();
MigrationInfo migrationInfo( migrationMap, uint_c( numReceivingPrimitives ) );
storage.migratePrimitives( migrationInfo );
return migrationInfo;
}
MigrationInfo reverseDistribution( const MigrationInfo& originalMigrationInfo, PrimitiveStorage& storageToRedistribute )
{
MigrationInfo migrationInfo = reverseDistributionDry( originalMigrationInfo );
......
......@@ -50,6 +50,8 @@ MigrationInfo roundRobin( PrimitiveStorage & storage, uint_t numberOfTargetProce
/// \param maxRank highest included rank in the distribution
MigrationInfo roundRobin( PrimitiveStorage & storage, uint_t minRank, uint_t maxRank );
MigrationInfo roundRobinVolume( PrimitiveStorage & storage, uint_t minRank, uint_t maxRank );
/// \brief Performs a round robin distribution to a subset of processes in parallel.
///
/// \param storage the PrimitiveStorage, the primitives are distributed on
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment