Skip to content
Snippets Groups Projects
Commit 2049c5bf authored by Martin Bauer's avatar Martin Bauer
Browse files

CUDA: fallbacks for older CUDA versions

parent 5c0e4d07
No related merge requests found
...@@ -29,8 +29,6 @@ namespace cuda { ...@@ -29,8 +29,6 @@ namespace cuda {
#if MPI_VERSION == 2 || MPI_VERSION == 1 #if MPI_VERSION == 2 || MPI_VERSION == 1
#ifndef MPI_COMM_TYPE_SHARED
void selectDeviceBasedOnMpiRank() { void selectDeviceBasedOnMpiRank() {
WALBERLA_ABORT("Your MPI implementation is tool old - it does not support CUDA device selection based on MPI rank"); WALBERLA_ABORT("Your MPI implementation is tool old - it does not support CUDA device selection based on MPI rank");
} }
...@@ -39,43 +37,39 @@ void selectDeviceBasedOnMpiRank() { ...@@ -39,43 +37,39 @@ void selectDeviceBasedOnMpiRank() {
void selectDeviceBasedOnMpiRank() void selectDeviceBasedOnMpiRank()
{ {
#ifdef WALBERLA_BUILD_WITH_MPI
int deviceCount; int deviceCount;
WALBERLA_CUDA_CHECK( cudaGetDeviceCount ( &deviceCount ) ); WALBERLA_CUDA_CHECK( cudaGetDeviceCount( &deviceCount ));
WALBERLA_LOG_INFO_ON_ROOT( "Selecting CUDA device depending on MPI Rank" );
MPI_Info info; MPI_Info info;
MPI_Info_create( &info ); MPI_Info_create( &info );
MPI_Comm newCommunicator; MPI_Comm newCommunicator;
MPI_Comm_split_type( MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, info, &newCommunicator ); MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, info, &newCommunicator );
int processesOnNode; int processesOnNode;
int rankOnNode; int rankOnNode;
MPI_Comm_size( newCommunicator, &processesOnNode ); MPI_Comm_size( newCommunicator, &processesOnNode );
MPI_Comm_rank( newCommunicator, &rankOnNode ); MPI_Comm_rank( newCommunicator, &rankOnNode );
if( deviceCount == processesOnNode ) if ( deviceCount == processesOnNode )
{ {
WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode ) ); WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode ));
} }
else if ( deviceCount > processesOnNode ) else if ( deviceCount > processesOnNode )
{ {
WALBERLA_LOG_WARNING("Not using all available GPUs on node. Processes on node " WALBERLA_LOG_WARNING( "Not using all available GPUs on node. Processes on node "
<< processesOnNode << " available GPUs on node " << deviceCount ); << processesOnNode << " available GPUs on node " << deviceCount );
WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode ) ); WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode ));
} }
else else
{ {
WALBERLA_LOG_WARNING("Too many processes started per node - should be one per GPU. Number of processes per node " WALBERLA_LOG_WARNING( "Too many processes started per node - should be one per GPU. Number of processes per node "
<< processesOnNode << ", available GPUs on node " << deviceCount ); << processesOnNode << ", available GPUs on node " << deviceCount );
WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode % deviceCount ) ); WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode % deviceCount ));
} }
}
#endif #endif
}
#else
void selectDeviceBasedOnMpiRank() {}
#endif #endif
......
...@@ -26,6 +26,19 @@ ...@@ -26,6 +26,19 @@
#include <cuda_runtime.h> #include <cuda_runtime.h>
#ifdef CUDART_VERSION
#if CUDART_VERSION <= 9020
cudaError_t cudaLaunchHostFunc( cudaStream_t, void(CUDART_CB* )( void* userData ), void* ) {
static bool printedWarning = false;
if( ! printedWarning ) {
WALBERLA_LOG_WARNING_ON_ROOT("Timing of CUDA functions only implemented for CUDA versions >= 10.0" );
printedWarning = true;
}
return cudaSuccess;
}
#endif
#endif
namespace walberla { namespace walberla {
namespace executiontree { namespace executiontree {
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment