From ea056823db7a1f3b0cd281ffb630ff2fa33c7d44 Mon Sep 17 00:00:00 2001
From: Christoph Schwarzmeier <christoph.schwarzmeier@fau.de>
Date: Fri, 21 Dec 2018 14:24:45 +0100
Subject: [PATCH] Improve OpenMPI bugfix

---
 apps/benchmarks/UniformGrid/UniformGrid.cpp | 25 +++++++----
 src/blockforest/Initialization.cpp          | 22 +++++++---
 src/core/mpi/MPIManager.cpp                 | 47 ++++++++++++---------
 src/core/mpi/MPIManager.h                   |  3 ++
 4 files changed, 63 insertions(+), 34 deletions(-)

diff --git a/apps/benchmarks/UniformGrid/UniformGrid.cpp b/apps/benchmarks/UniformGrid/UniformGrid.cpp
index c270b38ca..62fbb5b34 100644
--- a/apps/benchmarks/UniformGrid/UniformGrid.cpp
+++ b/apps/benchmarks/UniformGrid/UniformGrid.cpp
@@ -263,19 +263,28 @@ void createSetupBlockForest( blockforest::SetupBlockForest & sforest, const Conf
 
       WALBERLA_MPI_SECTION()
       {
-         MPIManager::instance()->createCartesianComm( numberOfXProcesses, numberOfYProcesses, numberOfZProcesses, false, false, false );
+         if ( ! MPIManager::instance()->isCartesianCommValid() )
+         {
+            MPIManager::instance()->createCartesianComm(numberOfXProcesses, numberOfYProcesses, numberOfZProcesses, false, false, false);
 
-         processIdMap = make_shared< std::vector<uint_t> >( numberOfXProcesses * numberOfYProcesses * numberOfZProcesses );
+            processIdMap = make_shared<std::vector<uint_t> >(numberOfXProcesses * numberOfYProcesses * numberOfZProcesses);
 
-         for( uint_t z = 0; z != numberOfZProcesses; ++z ) {
-            for( uint_t y = 0; y != numberOfYProcesses; ++y ) {
-               for( uint_t x = 0; x != numberOfXProcesses; ++x )
-               {
-                  (*processIdMap)[z * numberOfXProcesses * numberOfYProcesses + y * numberOfXProcesses + x] =
-                     uint_c( MPIManager::instance()->cartesianRank( x, y, z ) );
+            for (uint_t z = 0; z != numberOfZProcesses; ++z) {
+               for (uint_t y = 0; y != numberOfYProcesses; ++y) {
+                  for (uint_t x = 0; x != numberOfXProcesses; ++x)
+                  {
+                     (*processIdMap)[z * numberOfXProcesses * numberOfYProcesses + y * numberOfXProcesses + x] =
+                           uint_c(MPIManager::instance()->cartesianRank(x, y, z));
+                  }
                }
             }
          }
+         else {
+            WALBERLA_LOG_WARNING_ON_ROOT( "Your version of OpenMPI contains a bug. See waLBerla issue #73 for more "
+                                          "information. As a workaround, MPI_COMM_WORLD instead of a "
+                                          "Cartesian MPI communicator is used." );
+            MPIManager::instance()->useWorldComm();
+         }
       }
       else
          MPIManager::instance()->useWorldComm();
diff --git a/src/blockforest/Initialization.cpp b/src/blockforest/Initialization.cpp
index acb638acc..d05b9521b 100644
--- a/src/blockforest/Initialization.cpp
+++ b/src/blockforest/Initialization.cpp
@@ -229,19 +229,27 @@ createBlockForest(      const AABB& domainAABB,
       //create cartesian communicator only if not yet a cartesian communicator (or other communicator was created)
       if ( ! mpiManager->rankValid() )
       {
-         mpiManager->createCartesianComm( numberOfXProcesses, numberOfYProcesses, numberOfZProcesses, xPeriodic, yPeriodic, zPeriodic );
+         if ( ! mpiManager->isCartesianCommValid() ) {
+            mpiManager->createCartesianComm( numberOfXProcesses, numberOfYProcesses, numberOfZProcesses, xPeriodic, yPeriodic, zPeriodic );
 
-         processIdMap = new std::vector< uint_t >( numberOfProcesses );
+            processIdMap = new std::vector< uint_t >( numberOfProcesses );
 
-         for( uint_t z = 0; z != numberOfZProcesses; ++z ) {
-            for( uint_t y = 0; y != numberOfYProcesses; ++y ) {
-               for( uint_t x = 0; x != numberOfXProcesses; ++x ) {
+            for( uint_t z = 0; z != numberOfZProcesses; ++z ) {
+               for( uint_t y = 0; y != numberOfYProcesses; ++y ) {
+                  for( uint_t x = 0; x != numberOfXProcesses; ++x ) {
 
-                  (*processIdMap)[ z * numberOfXProcesses * numberOfYProcesses + y * numberOfXProcesses + x ] =
-                        uint_c( MPIManager::instance()->cartesianRank(x,y,z) );
+                     (*processIdMap)[ z * numberOfXProcesses * numberOfYProcesses + y * numberOfXProcesses + x ] =
+                           uint_c( MPIManager::instance()->cartesianRank(x,y,z) );
+                  }
                }
             }
          }
+         else {
+            WALBERLA_LOG_WARNING_ON_ROOT( "Your version of OpenMPI contains a bug. See waLBerla issue #73 for more "
+                                          "information. As a workaround, MPI_COMM_WORLD instead of a "
+                                          "Cartesian MPI communicator is used." );
+            mpiManager->useWorldComm();
+         }
       }
    }
 
diff --git a/src/core/mpi/MPIManager.cpp b/src/core/mpi/MPIManager.cpp
index b855a67c7..b3705407b 100644
--- a/src/core/mpi/MPIManager.cpp
+++ b/src/core/mpi/MPIManager.cpp
@@ -109,22 +109,6 @@ void MPIManager::initializeMPI( int* argc, char*** argv, bool abortOnException )
       MPI_Comm_size( MPI_COMM_WORLD, &numProcesses_ );
       MPI_Comm_rank( MPI_COMM_WORLD, &worldRank_ );
 
-      // Avoid using the Cartesian MPI-communicator with certain versions of OpenMPI (see waLBerla issue #73)
-      #if defined(OMPI_MAJOR_VERSION) && defined(OMPI_MINOR_VERSION) && defined(OMPI_RELEASE_VERSION)
-         std::string ompi_ver = std::to_string(OMPI_MAJOR_VERSION) + "." + std::to_string(OMPI_MINOR_VERSION) + "." +
-                                std::to_string(OMPI_RELEASE_VERSION);
-
-         if (ompi_ver == "2.0.0" || ompi_ver == "2.0.1" || ompi_ver == "2.0.2" || ompi_ver == "2.0.3" ||
-             ompi_ver == "2.1.0" || ompi_ver == "2.1.1") {
-
-            useWorldComm();
-
-            WALBERLA_LOG_WARNING_ON_ROOT( "Your version of OpenMPI (" << ompi_ver << ") contains a bug. See waLBerla "
-                                         "issue #73 for more information. Using MPI_COMM_WORLD instead of a Cartesian "
-                                         "MPI communicator as a workaround." );
-         }
-      #endif
-
       if( abortOnException )
          std::set_terminate( customTerminateHandler );
    }
@@ -174,12 +158,18 @@ void MPIManager::createCartesianComm( int dims[3], int periodicity[3] )
    WALBERLA_ASSERT_GREATER( dims[1], 0 );
    WALBERLA_ASSERT_GREATER( dims[2], 0 );
 
-   MPI_Cart_create( MPI_COMM_WORLD, 3, dims, periodicity, true, &comm_ );
-
-   WALBERLA_ASSERT_UNEQUAL(comm_, MPI_COMM_NULL);
+   if ( isCartesianCommValid() ) {
+      WALBERLA_LOG_WARNING_ON_ROOT( "Your version of OpenMPI contains a bug which might lead to a segmentation fault "
+                                    "when generating vtk output. Since the bug only occurs with a 3D Cartesian MPI "
+                                    "communicator, try to use MPI_COMM_WORLD instead. See waLBerla issue #73 for "
+                                    "additional information." );
+   }
 
+   MPI_Cart_create( MPI_COMM_WORLD, 3, dims, periodicity, true, &comm_ );
    MPI_Comm_rank( comm_, &rank_ );
    cartesianSetup_ = true;
+
+   WALBERLA_ASSERT_UNEQUAL(comm_, MPI_COMM_NULL);
 }
 
 
@@ -243,6 +233,25 @@ int MPIManager::cartesianRank( const uint_t x, const uint_t y, const uint_t z )
    return cartesianRank( coords );
 }
 
+bool MPIManager::isCartesianCommValid() const
+{
+   // Avoid using the Cartesian MPI-communicator with certain (buggy) versions of OpenMPI (see waLBerla issue #73)
+   #if defined(OMPI_MAJOR_VERSION) && defined(OMPI_MINOR_VERSION) && defined(OMPI_RELEASE_VERSION)
+      std::string ompi_ver = std::to_string(OMPI_MAJOR_VERSION) + "." + std::to_string(OMPI_MINOR_VERSION) + "." +
+            std::to_string(OMPI_RELEASE_VERSION);
+
+      if (ompi_ver == "2.0.0" || ompi_ver == "2.0.1" || ompi_ver == "2.0.2" || ompi_ver == "2.0.3" ||
+          ompi_ver == "2.1.0" || ompi_ver == "2.1.1") {
+         return true;
+      }
+      else {
+         return false;
+      }
+   #else
+      return false;
+   #endif
+}
+
 std::string MPIManager::getMPIErrorString(int errorCode)
 {
    WALBERLA_NON_MPI_SECTION()
diff --git a/src/core/mpi/MPIManager.h b/src/core/mpi/MPIManager.h
index fe041c267..fdbc3e06e 100644
--- a/src/core/mpi/MPIManager.h
+++ b/src/core/mpi/MPIManager.h
@@ -122,6 +122,9 @@ public:
    bool hasCartesianSetup() const { return cartesianSetup_;  }
    /// Rank is valid after calling createCartesianComm() or useWorldComm()
    bool rankValid()         const { return rank_ >= 0;       }
+
+   /// Using a Cartesian MPI communicator is not valid for certain versions of OpenMPI (see waLBerla issue #73)
+   bool isCartesianCommValid() const;
    //@}
    //*******************************************************************************************************************
 
-- 
GitLab