From b41f9709c117a76f87cbe9967c9bbe53a94891b3 Mon Sep 17 00:00:00 2001
From: Sebastian Eibl <sebastian.eibl@fau.de>
Date: Fri, 2 Mar 2018 14:07:07 +0100
Subject: [PATCH] extended checks in DynamicParMetis

---
 .../loadbalancing/DynamicParMetis.cpp         | 121 +++++++++++++-----
 .../loadbalancing/DynamicParMetis.h           |   2 +-
 src/core/mpi/MPIHelper.cpp                    |  26 ++--
 3 files changed, 104 insertions(+), 45 deletions(-)

diff --git a/src/blockforest/loadbalancing/DynamicParMetis.cpp b/src/blockforest/loadbalancing/DynamicParMetis.cpp
index ec2b38ff3..0ec01197d 100644
--- a/src/blockforest/loadbalancing/DynamicParMetis.cpp
+++ b/src/blockforest/loadbalancing/DynamicParMetis.cpp
@@ -37,10 +37,13 @@
 #include <boost/algorithm/string/case_conv.hpp>
 #include <boost/algorithm/string/trim.hpp>
 
+#include <array>
+#include <vector>
+
 namespace walberla {
 namespace blockforest {
 
-std::pair<uint_t, uint_t> getBlockSequenceRange( const PhantomBlockForest & phantomForest, MPI_Comm comm )
+std::pair<uint_t, uint_t> getBlockSequenceRange( uint_t numLocalBlocks, MPI_Comm comm )
 {
    const uint_t rank = uint_c(mpi::translateRank(mpi::MPIManager::instance()->comm(), comm, MPIManager::instance()->rank()));
    WALBERLA_DEBUG_SECTION()
@@ -50,8 +53,6 @@ std::pair<uint_t, uint_t> getBlockSequenceRange( const PhantomBlockForest & phan
       WALBERLA_ASSERT_EQUAL(rank, rankRaw);
    }
 
-   uint_t numLocalBlocks = phantomForest.getNumberOfBlocks();
-
    size_t sequenceStartOnProcess = 0;
    MPI_Exscan( &numLocalBlocks, &sequenceStartOnProcess, 1, MPITrait<uint_t>::type(), MPI_SUM, comm );
    if( rank == 0 )
@@ -60,14 +61,13 @@ std::pair<uint_t, uint_t> getBlockSequenceRange( const PhantomBlockForest & phan
    return std::make_pair( sequenceStartOnProcess, sequenceStartOnProcess + numLocalBlocks );
 }
 
-std::map< blockforest::BlockID, uint_t > getBlockIdToSequenceMapping( const PhantomBlockForest & phantomForest, const std::pair<uint_t, uint_t> & blockSequenceRange, MPI_Comm comm )
+std::map< blockforest::BlockID, uint_t > getBlockIdToSequenceMapping( const PhantomBlockForest& phantomForest, const std::vector< std::pair< const PhantomBlock *, uint_t > > & targetProcess, const std::pair<uint_t, uint_t> & blockSequenceRange, MPI_Comm comm )
 {
    std::map< blockforest::BlockID, uint_t > mapping;
 
-   const auto & blockMap = phantomForest.getBlockMap();
    uint_t sequenceId = blockSequenceRange.first;
-   for( auto it = blockMap.begin(); it != blockMap.end(); ++it )
-      mapping.insert( std::make_pair( it->first, sequenceId++ ) );
+   for( auto it = targetProcess.begin(); it != targetProcess.end(); ++it )
+      mapping.insert( std::make_pair( it->first->getId(), sequenceId++ ) );
    WALBERLA_ASSERT_EQUAL( sequenceId, blockSequenceRange.second );
 
    const std::vector<uint_t> neighborProcesses = phantomForest.getNeighboringProcesses();
@@ -124,11 +124,11 @@ bool DynamicParMetis::operator()( std::vector< std::pair< const PhantomBlock *,
    globalTimer.start();
 
    //create new communicator which excludes processes which do not have blocks
-   MPI_Comm subComm = MPIManager::instance()->comm();
+   MPI_Comm subComm = MPI_COMM_NULL;
    MPI_Group allGroup, subGroup;
    MPI_Comm_group( MPIManager::instance()->comm(), &allGroup );
    std::vector<int> ranks;
-   if (phantomForest.getNumberOfBlocks() > 0)
+   if (targetProcess.size() > 0)
       ranks.push_back( MPIManager::instance()->rank() );
    ranks = mpi::allGatherv( ranks, MPIManager::instance()->comm() );
    auto numSubProcesses = ranks.size();
@@ -136,22 +136,51 @@ bool DynamicParMetis::operator()( std::vector< std::pair< const PhantomBlock *,
    MPI_Group_incl(allGroup, int_c(ranks.size()), &ranks[0], &subGroup);
    MPI_Comm_create( MPIManager::instance()->comm(), subGroup, &subComm);
 
+   if ( targetProcess.size() != 0)
+   {
+      int subRank;
+      int subSize;
+      MPI_Comm_rank(subComm, &subRank);
+      MPI_Comm_size(subComm, &subSize);
+      WALBERLA_CHECK_GREATER_EQUAL(subRank, 0);
+      WALBERLA_CHECK_LESS(subRank, subSize);
+   } else
+   {
+      int subRank;
+      MPI_Comm_rank(subComm, &subRank);
+      WALBERLA_CHECK_EQUAL(subRank, MPI_UNDEFINED);
+   }
+
    int64_t edgecut = 0;
-   std::vector<int64_t> part( phantomForest.getNumberOfBlocks(), int64_c( MPIManager::instance()->rank() ) );
+   WALBERLA_CHECK_EQUAL( phantomForest.getNumberOfBlocks(), targetProcess.size() );
+   std::vector<int64_t> part( targetProcess.size(), int64_c( MPIManager::instance()->rank() ) );
 
    if (subComm != MPI_COMM_NULL)
    {
-      const std::pair<uint_t, uint_t> blockSequenceRange = getBlockSequenceRange( phantomForest, subComm );
-      const std::map< blockforest::BlockID, uint_t > mapping = getBlockIdToSequenceMapping( phantomForest, blockSequenceRange, subComm ); //blockid to vertex id
+      int subRank;
+      int subSize;
+      MPI_Comm_rank(subComm, &subRank);
+      MPI_Comm_size(subComm, &subSize);
+
+      WALBERLA_CHECK_UNEQUAL(targetProcess.size(), 0);
+      const std::pair<uint_t, uint_t> blockSequenceRange = getBlockSequenceRange( targetProcess.size(), subComm );
+      const std::map< blockforest::BlockID, uint_t > mapping = getBlockIdToSequenceMapping( phantomForest, targetProcess, blockSequenceRange, subComm ); //blockid to vertex id
 
       std::vector<int64_t> vtxdist = mpi::allGather( int64_c( blockSequenceRange.second ), subComm );
       vtxdist.insert( vtxdist.begin(), uint_t( 0 ) );
+      WALBERLA_CHECK_EQUAL( vtxdist.size(), subSize + 1 );
+      for (size_t i = 1; i < vtxdist.size(); ++i)
+      {
+         WALBERLA_ASSERT_LESS( vtxdist[i-1], vtxdist[i] );
+      }
 
       std::vector<int64_t> adjncy, xadj, vsize, vwgt, adjwgt;
       std::vector<double> xyz;
 
-      for( auto it = targetProcess.begin(); it != targetProcess.end(); ++it )
+      uint_t blockIndex = 0;
+      for( auto it = targetProcess.begin(); it != targetProcess.end(); ++it, ++blockIndex )
       {
+         WALBERLA_CHECK_EQUAL(blockIndex, mapping.find(it->first->getId())->second - blockSequenceRange.first);
          xadj.push_back( int64_c( adjncy.size() ) );
          const PhantomBlock & block = *( it->first );
          auto bi = block.getData< DynamicParMetisBlockInfo >();
@@ -163,9 +192,12 @@ bool DynamicParMetis::operator()( std::vector< std::pair< const PhantomBlock *,
             {
                auto mapIt = mapping.find( nit->getId() );
                WALBERLA_ASSERT_UNEQUAL( mapIt, mapping.end(), "BlockId of neighbor is not contained in sequence mapping!" );
+               WALBERLA_CHECK_GREATER_EQUAL( mapIt->second, 0 );
+               WALBERLA_CHECK_LESS( mapIt->second, vtxdist.back() );
                adjncy.push_back( int64_c( mapIt->second ) );
                auto edgeWeightIt = bi.getEdgeWeights().find( nit->getId() );
-               adjwgt.push_back( edgeWeightIt == bi.getEdgeWeights().end() ? int64_t( 0 ) : edgeWeightIt->second );
+               //WALBERLA_CHECK_UNEQUAL( edgeWeightIt->second, 0 ); // perhaps WARNING
+               adjwgt.push_back( edgeWeightIt == bi.getEdgeWeights().end() ? int64_t( 1 ) : edgeWeightIt->second );
             }
             break;
          case PARMETIS_EDGES_FROM_EDGE_WEIGHTS:
@@ -173,10 +205,15 @@ bool DynamicParMetis::operator()( std::vector< std::pair< const PhantomBlock *,
             {
                auto mapIt = mapping.find( edgeIt->first );
                WALBERLA_ASSERT_UNEQUAL( mapIt, mapping.end(), "BlockId of neighbor is not contained in sequence mapping!" );
+               WALBERLA_CHECK_GREATER_EQUAL( mapIt->second, 0 );
+               WALBERLA_CHECK_LESS( mapIt->second, vtxdist.back() );
                adjncy.push_back( int64_c( mapIt->second ) );
+               //WALBERLA_CHECK_UNEQUAL( edgeIt->second, 0 ); // perhaps WARNING
                adjwgt.push_back( edgeIt->second );
             }
+            break;
          }
+         WALBERLA_CHECK_UNEQUAL( bi.getVertexWeight(), 0 );
          vwgt.push_back( bi.getVertexWeight() );
          vsize.push_back( bi.getVertexSize() );
          xyz.push_back( bi.getVertexCoords()[0] );
@@ -185,46 +222,58 @@ bool DynamicParMetis::operator()( std::vector< std::pair< const PhantomBlock *,
       }
       xadj.push_back( int64_c( adjncy.size() ) );
 
-      WALBERLA_ASSERT_EQUAL( vtxdist.size(), numSubProcesses + uint_t( 1 ) );
-      WALBERLA_ASSERT_EQUAL( xadj.size(), phantomForest.getNumberOfBlocks() + 1 );
-      WALBERLA_ASSERT_EQUAL( vwgt.size(), phantomForest.getNumberOfBlocks() );
-      WALBERLA_ASSERT_EQUAL( vsize.size(), phantomForest.getNumberOfBlocks() );
-      WALBERLA_ASSERT_EQUAL( adjncy.size(), adjwgt.size() );
-      WALBERLA_ASSERT_EQUAL( adjwgt.size(), xadj.back() );
-
-      int64_t wgtflag = weightsToUse_;
-      int64_t numflag = 0; // C-style ordering
-      int64_t ncon = 1; // Number of constraints
-      int64_t ndims = 3; // Number of dimensions
-      double  ubvec[] = { real_t( 1.05 ) }; // imbalance tolerance
-      int64_t nparts = int64_c( MPIManager::instance()->numProcesses() ); // number of subdomains
-      auto    ipc2redist = ipc2redist_;
-      MPI_Comm comm = subComm; //MPIManager::instance()->comm();
+      WALBERLA_CHECK_EQUAL( vtxdist.size(), numSubProcesses + uint_t( 1 ) );
+      WALBERLA_CHECK_EQUAL( xadj.size(), targetProcess.size() + 1 );
+      WALBERLA_CHECK_EQUAL( xadj.front(), 0);
+      WALBERLA_CHECK_EQUAL( xadj.back(), adjncy.size() );
+      for (size_t i = 1; i < xadj.size(); ++i)
+      {
+         WALBERLA_ASSERT_LESS( xadj[i-1], xadj[i] );
+      }
+      WALBERLA_CHECK_EQUAL( vwgt.size(), targetProcess.size() );
+      WALBERLA_CHECK_EQUAL( vsize.size(), targetProcess.size() );
+      WALBERLA_CHECK_EQUAL( xyz.size(), targetProcess.size() * 3 );
+      WALBERLA_CHECK_EQUAL( adjncy.size(), adjwgt.size() );
+      WALBERLA_CHECK_EQUAL( adjwgt.size(), xadj.back() );
+
+      int64_t wgtflag         = weightsToUse_;
+      int64_t numflag         = 0; // C-style ordering
+      int64_t ncon            = 1; // Number of constraints
+      int64_t ndims           = 3; // Number of dimensions
+      std::vector<double>  ubvec( uint_c(ncon), double_c( 1.05 ) ); // imbalance tolerance
+      int64_t nparts          = int64_c( MPIManager::instance()->numProcesses() ); // number of subdomains
+      double    ipc2redist    = double_c(ipc2redist_);
+      MPI_Comm comm           = subComm; //MPIManager::instance()->comm();
       std::vector<double> tpwgts( uint_c(nparts * ncon), 1.0 / double_c( nparts ) ); // vertex weight fraction that is stored in a subdomain
-      int64_t options[] = { int64_t( 1 ), int64_t( 0 ), int64_t( 23 ), int64_t( 1 ) };
+      std::vector<int64_t> options = { int64_t( 1 ), int64_t( 0 ), int64_t( 23 ), int64_t( 1 ) };
 
       int metisResult = core::METIS_OK;
 
       switch( algorithm_ )
       {
+      case PARMETIS_PART_GEOM:
+         parmetisTimer.start();
+         metisResult = core::ParMETIS_V3_PartGeom( ptr( vtxdist ), &ndims, ptr( xyz ), ptr( part ), &comm );
+         parmetisTimer.end();
+         break;
       case PARMETIS_PART_GEOM_KWAY:
          parmetisTimer.start();
-         metisResult = core::ParMETIS_V3_PartGeomKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ndims, ptr( xyz ), &ncon, &nparts, ptr( tpwgts ), ubvec, options, &edgecut, ptr( part ), &comm );
+         metisResult = core::ParMETIS_V3_PartGeomKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ndims, ptr( xyz ), &ncon, &nparts, ptr( tpwgts ), ptr( ubvec ), ptr( options ), &edgecut, ptr( part ), &comm );
          parmetisTimer.end();
          break;
       case PARMETIS_PART_KWAY:
          parmetisTimer.start();
-         metisResult = core::ParMETIS_V3_PartKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ubvec, options, &edgecut, ptr( part ), &comm );
+         metisResult = core::ParMETIS_V3_PartKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ptr(ubvec), ptr(options), &edgecut, ptr( part ), &comm );
          parmetisTimer.end();
          break;
       case PARMETIS_ADAPTIVE_REPART:
          parmetisTimer.start();
-         metisResult = core::ParMETIS_V3_AdaptiveRepart( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( vsize ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ubvec, &ipc2redist, options, &edgecut, ptr( part ), &comm );
+         metisResult = core::ParMETIS_V3_AdaptiveRepart( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( vsize ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ptr(ubvec), &ipc2redist, ptr(options), &edgecut, ptr( part ), &comm );
          parmetisTimer.end();
          break;
       case PARMETIS_REFINE_KWAY:
          parmetisTimer.start();
-         metisResult = core::ParMETIS_V3_RefineKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ubvec, options, &edgecut, ptr( part ), &comm );
+         metisResult = core::ParMETIS_V3_RefineKway( ptr( vtxdist ), ptr( xadj ), ptr( adjncy ), ptr( vwgt ), ptr( adjwgt ), &wgtflag, &numflag, &ncon, &nparts, ptr( tpwgts ), ptr(ubvec), ptr(options), &edgecut, ptr( part ), &comm );
          parmetisTimer.end();
          break;
       }
@@ -279,6 +328,8 @@ DynamicParMetis::Algorithm DynamicParMetis::stringToAlgorithm( std::string s )
 
    if( s == "PART_GEOM_KWAY" )
       return PARMETIS_PART_GEOM_KWAY;
+   else if( s == "PART_GEOM" )
+      return PARMETIS_PART_GEOM;
    else if( s == "PART_KWAY" )
       return PARMETIS_PART_KWAY;
    else if( s == "PART_ADAPTIVE_REPART" )
@@ -328,6 +379,8 @@ std::string DynamicParMetis::algorithmToString( ) const
    {
    case walberla::blockforest::DynamicParMetis::PARMETIS_PART_GEOM_KWAY:
       return "PART_GEOM_KWAY";
+   case walberla::blockforest::DynamicParMetis::PARMETIS_PART_GEOM:
+      return "PART_GEOM";
    case walberla::blockforest::DynamicParMetis::PARMETIS_PART_KWAY:
       return "PART_KWAY";
    case walberla::blockforest::DynamicParMetis::PARMETIS_ADAPTIVE_REPART:
diff --git a/src/blockforest/loadbalancing/DynamicParMetis.h b/src/blockforest/loadbalancing/DynamicParMetis.h
index 266b572d4..d1b3acd1f 100644
--- a/src/blockforest/loadbalancing/DynamicParMetis.h
+++ b/src/blockforest/loadbalancing/DynamicParMetis.h
@@ -42,7 +42,7 @@ std::map< blockforest::BlockID, uint_t > getBlockIdToSequenceMapping( const Phan
 class DynamicParMetis
 {
 public:
-   enum Algorithm    { PARMETIS_PART_GEOM_KWAY, PARMETIS_PART_KWAY, PARMETIS_ADAPTIVE_REPART, PARMETIS_REFINE_KWAY };
+   enum Algorithm    { PARMETIS_PART_GEOM, PARMETIS_PART_GEOM_KWAY, PARMETIS_PART_KWAY, PARMETIS_ADAPTIVE_REPART, PARMETIS_REFINE_KWAY };
    enum WeightsToUse { PARMETIS_NO_WEIGHTS = 0, PARMETIS_EDGE_WEIGHTS = 1, PARMETIS_VERTEX_WEIGHTS = 2, PARMETIS_BOTH_WEIGHTS = 3 };
    enum EdgeSource   { PARMETIS_EDGES_FROM_FOREST, PARMETIS_EDGES_FROM_EDGE_WEIGHTS };
 
diff --git a/src/core/mpi/MPIHelper.cpp b/src/core/mpi/MPIHelper.cpp
index 0a667acd9..94f239a90 100644
--- a/src/core/mpi/MPIHelper.cpp
+++ b/src/core/mpi/MPIHelper.cpp
@@ -34,6 +34,11 @@ namespace mpi {
 //!
 int translateRank(const MPI_Comm srcComm, const MPI_Comm destComm, const int srcRank)
 {
+   if (srcComm == destComm)
+   {
+      return srcRank;
+   }
+
    int destRank = -1;
    MPI_Group srcGroup, destGroup;
    MPI_Comm_group(srcComm, &srcGroup);
@@ -41,11 +46,9 @@ int translateRank(const MPI_Comm srcComm, const MPI_Comm destComm, const int src
    MPI_Group_translate_ranks(srcGroup, 1, const_cast<int*>(&srcRank), destGroup, &destRank);
    int size;
    MPI_Comm_size(destComm, &size);
-   if (destRank < 0 || destRank >= size)
-   {
-      WALBERLA_CHECK_EQUAL( destRank,  MPI_UNDEFINED );
-      destRank = -1;
-   }
+   if (destRank == MPI_UNDEFINED) destRank = -1;
+   WALBERLA_CHECK_GREATER_EQUAL(destRank, -1);
+   WALBERLA_CHECK_LESS(destRank, size);
    MPI_Group_free(&srcGroup);
    MPI_Group_free(&destGroup);
    return destRank;
@@ -60,6 +63,11 @@ int translateRank(const MPI_Comm srcComm, const MPI_Comm destComm, const int src
 //!
 std::vector<int> translateRank(const MPI_Comm srcComm, const MPI_Comm destComm, const std::vector<int>& srcRank)
 {
+   if (srcComm == destComm)
+   {
+      return srcRank;
+   }
+
    std::vector<int> destRank(srcRank.size(), -1);
    MPI_Group srcGroup, destGroup;
    MPI_Comm_group(srcComm, &srcGroup);
@@ -69,11 +77,9 @@ std::vector<int> translateRank(const MPI_Comm srcComm, const MPI_Comm destComm,
    MPI_Comm_size(destComm, &size);
    for (auto& dstRnk : destRank)
    {
-      if (dstRnk < 0 || dstRnk >= size)
-      {
-         WALBERLA_CHECK_EQUAL( dstRnk,  MPI_UNDEFINED );
-         dstRnk = -1;
-      }
+      if (dstRnk == MPI_UNDEFINED) dstRnk = -1;
+      WALBERLA_CHECK_GREATER_EQUAL(dstRnk, -1);
+      WALBERLA_CHECK_LESS(dstRnk, size);
    }
    MPI_Group_free(&srcGroup);
    MPI_Group_free(&destGroup);
-- 
GitLab