Skip to content
Snippets Groups Projects
Commit ea056823 authored by Christoph Schwarzmeier's avatar Christoph Schwarzmeier
Browse files

Improve OpenMPI bugfix

parent e304491c
Branches
Tags
No related merge requests found
......@@ -263,19 +263,28 @@ void createSetupBlockForest( blockforest::SetupBlockForest & sforest, const Conf
WALBERLA_MPI_SECTION()
{
MPIManager::instance()->createCartesianComm( numberOfXProcesses, numberOfYProcesses, numberOfZProcesses, false, false, false );
if ( ! MPIManager::instance()->isCartesianCommValid() )
{
MPIManager::instance()->createCartesianComm(numberOfXProcesses, numberOfYProcesses, numberOfZProcesses, false, false, false);
processIdMap = make_shared< std::vector<uint_t> >( numberOfXProcesses * numberOfYProcesses * numberOfZProcesses );
processIdMap = make_shared<std::vector<uint_t> >(numberOfXProcesses * numberOfYProcesses * numberOfZProcesses);
for( uint_t z = 0; z != numberOfZProcesses; ++z ) {
for( uint_t y = 0; y != numberOfYProcesses; ++y ) {
for( uint_t x = 0; x != numberOfXProcesses; ++x )
{
(*processIdMap)[z * numberOfXProcesses * numberOfYProcesses + y * numberOfXProcesses + x] =
uint_c( MPIManager::instance()->cartesianRank( x, y, z ) );
for (uint_t z = 0; z != numberOfZProcesses; ++z) {
for (uint_t y = 0; y != numberOfYProcesses; ++y) {
for (uint_t x = 0; x != numberOfXProcesses; ++x)
{
(*processIdMap)[z * numberOfXProcesses * numberOfYProcesses + y * numberOfXProcesses + x] =
uint_c(MPIManager::instance()->cartesianRank(x, y, z));
}
}
}
}
else {
WALBERLA_LOG_WARNING_ON_ROOT( "Your version of OpenMPI contains a bug. See waLBerla issue #73 for more "
"information. As a workaround, MPI_COMM_WORLD instead of a "
"Cartesian MPI communicator is used." );
MPIManager::instance()->useWorldComm();
}
}
else
MPIManager::instance()->useWorldComm();
......
......@@ -229,19 +229,27 @@ createBlockForest( const AABB& domainAABB,
//create cartesian communicator only if not yet a cartesian communicator (or other communicator was created)
if ( ! mpiManager->rankValid() )
{
mpiManager->createCartesianComm( numberOfXProcesses, numberOfYProcesses, numberOfZProcesses, xPeriodic, yPeriodic, zPeriodic );
if ( ! mpiManager->isCartesianCommValid() ) {
mpiManager->createCartesianComm( numberOfXProcesses, numberOfYProcesses, numberOfZProcesses, xPeriodic, yPeriodic, zPeriodic );
processIdMap = new std::vector< uint_t >( numberOfProcesses );
processIdMap = new std::vector< uint_t >( numberOfProcesses );
for( uint_t z = 0; z != numberOfZProcesses; ++z ) {
for( uint_t y = 0; y != numberOfYProcesses; ++y ) {
for( uint_t x = 0; x != numberOfXProcesses; ++x ) {
for( uint_t z = 0; z != numberOfZProcesses; ++z ) {
for( uint_t y = 0; y != numberOfYProcesses; ++y ) {
for( uint_t x = 0; x != numberOfXProcesses; ++x ) {
(*processIdMap)[ z * numberOfXProcesses * numberOfYProcesses + y * numberOfXProcesses + x ] =
uint_c( MPIManager::instance()->cartesianRank(x,y,z) );
(*processIdMap)[ z * numberOfXProcesses * numberOfYProcesses + y * numberOfXProcesses + x ] =
uint_c( MPIManager::instance()->cartesianRank(x,y,z) );
}
}
}
}
else {
WALBERLA_LOG_WARNING_ON_ROOT( "Your version of OpenMPI contains a bug. See waLBerla issue #73 for more "
"information. As a workaround, MPI_COMM_WORLD instead of a "
"Cartesian MPI communicator is used." );
mpiManager->useWorldComm();
}
}
}
......
......@@ -109,22 +109,6 @@ void MPIManager::initializeMPI( int* argc, char*** argv, bool abortOnException )
MPI_Comm_size( MPI_COMM_WORLD, &numProcesses_ );
MPI_Comm_rank( MPI_COMM_WORLD, &worldRank_ );
// Avoid using the Cartesian MPI-communicator with certain versions of OpenMPI (see waLBerla issue #73)
#if defined(OMPI_MAJOR_VERSION) && defined(OMPI_MINOR_VERSION) && defined(OMPI_RELEASE_VERSION)
std::string ompi_ver = std::to_string(OMPI_MAJOR_VERSION) + "." + std::to_string(OMPI_MINOR_VERSION) + "." +
std::to_string(OMPI_RELEASE_VERSION);
if (ompi_ver == "2.0.0" || ompi_ver == "2.0.1" || ompi_ver == "2.0.2" || ompi_ver == "2.0.3" ||
ompi_ver == "2.1.0" || ompi_ver == "2.1.1") {
useWorldComm();
WALBERLA_LOG_WARNING_ON_ROOT( "Your version of OpenMPI (" << ompi_ver << ") contains a bug. See waLBerla "
"issue #73 for more information. Using MPI_COMM_WORLD instead of a Cartesian "
"MPI communicator as a workaround." );
}
#endif
if( abortOnException )
std::set_terminate( customTerminateHandler );
}
......@@ -174,12 +158,18 @@ void MPIManager::createCartesianComm( int dims[3], int periodicity[3] )
WALBERLA_ASSERT_GREATER( dims[1], 0 );
WALBERLA_ASSERT_GREATER( dims[2], 0 );
MPI_Cart_create( MPI_COMM_WORLD, 3, dims, periodicity, true, &comm_ );
WALBERLA_ASSERT_UNEQUAL(comm_, MPI_COMM_NULL);
if ( isCartesianCommValid() ) {
WALBERLA_LOG_WARNING_ON_ROOT( "Your version of OpenMPI contains a bug which might lead to a segmentation fault "
"when generating vtk output. Since the bug only occurs with a 3D Cartesian MPI "
"communicator, try to use MPI_COMM_WORLD instead. See waLBerla issue #73 for "
"additional information." );
}
MPI_Cart_create( MPI_COMM_WORLD, 3, dims, periodicity, true, &comm_ );
MPI_Comm_rank( comm_, &rank_ );
cartesianSetup_ = true;
WALBERLA_ASSERT_UNEQUAL(comm_, MPI_COMM_NULL);
}
......@@ -243,6 +233,25 @@ int MPIManager::cartesianRank( const uint_t x, const uint_t y, const uint_t z )
return cartesianRank( coords );
}
bool MPIManager::isCartesianCommValid() const
{
// Avoid using the Cartesian MPI-communicator with certain (buggy) versions of OpenMPI (see waLBerla issue #73)
#if defined(OMPI_MAJOR_VERSION) && defined(OMPI_MINOR_VERSION) && defined(OMPI_RELEASE_VERSION)
std::string ompi_ver = std::to_string(OMPI_MAJOR_VERSION) + "." + std::to_string(OMPI_MINOR_VERSION) + "." +
std::to_string(OMPI_RELEASE_VERSION);
if (ompi_ver == "2.0.0" || ompi_ver == "2.0.1" || ompi_ver == "2.0.2" || ompi_ver == "2.0.3" ||
ompi_ver == "2.1.0" || ompi_ver == "2.1.1") {
return true;
}
else {
return false;
}
#else
return false;
#endif
}
std::string MPIManager::getMPIErrorString(int errorCode)
{
WALBERLA_NON_MPI_SECTION()
......
......@@ -122,6 +122,9 @@ public:
bool hasCartesianSetup() const { return cartesianSetup_; }
/// Rank is valid after calling createCartesianComm() or useWorldComm()
bool rankValid() const { return rank_ >= 0; }
/// Using a Cartesian MPI communicator is not valid for certain versions of OpenMPI (see waLBerla issue #73)
bool isCartesianCommValid() const;
//@}
//*******************************************************************************************************************
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment