From e17f9a5347b35837692af1c5485c94c20ef6d6c9 Mon Sep 17 00:00:00 2001
From: Martin Bauer <martin.bauer@fau.de>
Date: Thu, 15 Nov 2018 10:03:45 +0100
Subject: [PATCH] Function for CUDA device selection based on MPI Rank

---
 .../UniformGridGPU/UniformGridGPU.cpp         |  2 +
 src/cuda/DeviceSelectMPI.cpp                  | 74 +++++++++++++++++++
 src/cuda/DeviceSelectMPI.h                    | 40 ++++++++++
 3 files changed, 116 insertions(+)
 create mode 100644 src/cuda/DeviceSelectMPI.cpp
 create mode 100644 src/cuda/DeviceSelectMPI.h

diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp b/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
index 0b2ff6b34..085c316d0 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
@@ -19,6 +19,7 @@
 #include "core/timing/RemainingTimeLogger.h"
 #include "cuda/AddGPUFieldToStorage.h"
 #include "cuda/communication/UniformGPUScheme.h"
+#include "cuda/DeviceSelectMPI.h"
 #include "lbm/sweeps/CellwiseSweep.h"
 #include "domain_decomposition/SharedSweep.h"
 
@@ -46,6 +47,7 @@ using FlagField_T = FlagField<flag_t>;
 int main( int argc, char **argv )
 {
    mpi::Environment env( argc, argv );
+   cuda::selectDeviceBasedOnMpiRank();
 
    for( auto cfg = python_coupling::configBegin( argc, argv ); cfg != python_coupling::configEnd(); ++cfg )
    {
diff --git a/src/cuda/DeviceSelectMPI.cpp b/src/cuda/DeviceSelectMPI.cpp
new file mode 100644
index 000000000..4fdab2f92
--- /dev/null
+++ b/src/cuda/DeviceSelectMPI.cpp
@@ -0,0 +1,74 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file DeviceSelectMPI.cpp
+//! \ingroup cuda
+//! \author Martin Bauer <martin.bauer@fau.de>
+//
+//======================================================================================================================
+
+#include "DeviceSelectMPI.h"
+#include "core/mpi/MPIWrapper.h"
+#include "cuda/ErrorChecking.h"
+#include "core/logging/Logging.h"
+
+namespace walberla {
+namespace cuda {
+
+#ifdef WALBERLA_BUILD_WITH_MPI
+
+void selectDeviceBasedOnMpiRank()
+{
+   int deviceCount;
+   WALBERLA_CUDA_CHECK( cudaGetDeviceCount ( &deviceCount ) );
+
+
+   MPI_Info info;
+   MPI_Info_create( &info );
+   MPI_Comm newCommunicator;
+   MPI_Comm_split_type( MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, info, &newCommunicator );
+
+   int processesOnNode;
+   int rankOnNode;
+   MPI_Comm_size( newCommunicator, &processesOnNode );
+   MPI_Comm_rank( newCommunicator, &rankOnNode );
+
+   if( deviceCount == processesOnNode )
+   {
+      WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode ) );
+   }
+   else if ( deviceCount > processesOnNode )
+   {
+      WALBERLA_LOG_WARNING("Not using all available GPUs on node. Processes on node "
+                                   << processesOnNode << " available GPUs on node " << deviceCount );
+      WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode ) );
+   }
+   else
+   {
+      WALBERLA_LOG_WARNING("Too many processes started per node - should be one per GPU. Number of processes per node "
+                                   << processesOnNode << ", available GPUs on node " << deviceCount );
+      WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode % deviceCount ) );
+   }
+}
+
+#else
+
+void selectDeviceBasedOnMpiRank() {}
+
+#endif
+
+
+} // namespace cuda
+} // namespace walberla
\ No newline at end of file
diff --git a/src/cuda/DeviceSelectMPI.h b/src/cuda/DeviceSelectMPI.h
new file mode 100644
index 000000000..06d429672
--- /dev/null
+++ b/src/cuda/DeviceSelectMPI.h
@@ -0,0 +1,40 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file DeviceSelectMPI.h
+//! \ingroup cuda
+//! \author Martin Bauer <martin.bauer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+
+namespace walberla {
+namespace cuda {
+
+
+/**
+ * Selects active CUDA device based on MPI rank
+ *
+ * assumes that on each node there are as many MPI processes started as there are GPUs
+ * - if there are more GPUs than processes on a node, a warning is printed and not all GPUs are utilized
+ * - if there are more processes than GPUs, also a warning is printed and multiple processes may access the same GPU.
+ *   Processes are assigned to GPUs in a round-robin fashion
+ */
+void selectDeviceBasedOnMpiRank();
+
+} // namespace cuda
+} // namespace walberla
\ No newline at end of file
-- 
GitLab