From 2049c5bff82134f565ad2a5c28f62bef0c710fb6 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Thu, 7 Mar 2019 16:09:28 +0100 Subject: [PATCH] CUDA: fallbacks for older CUDA versions --- src/cuda/DeviceSelectMPI.cpp | 32 +++++++++++++------------------- src/cuda/ExecutionTreeGPU.h | 13 +++++++++++++ 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/cuda/DeviceSelectMPI.cpp b/src/cuda/DeviceSelectMPI.cpp index d934abe79..f8eee582e 100644 --- a/src/cuda/DeviceSelectMPI.cpp +++ b/src/cuda/DeviceSelectMPI.cpp @@ -29,8 +29,6 @@ namespace cuda { #if MPI_VERSION == 2 || MPI_VERSION == 1 -#ifndef MPI_COMM_TYPE_SHARED - void selectDeviceBasedOnMpiRank() { WALBERLA_ABORT("Your MPI implementation is tool old - it does not support CUDA device selection based on MPI rank"); } @@ -39,43 +37,39 @@ void selectDeviceBasedOnMpiRank() { void selectDeviceBasedOnMpiRank() { +#ifdef WALBERLA_BUILD_WITH_MPI int deviceCount; - WALBERLA_CUDA_CHECK( cudaGetDeviceCount ( &deviceCount ) ); - + WALBERLA_CUDA_CHECK( cudaGetDeviceCount( &deviceCount )); + WALBERLA_LOG_INFO_ON_ROOT( "Selecting CUDA device depending on MPI Rank" ); MPI_Info info; MPI_Info_create( &info ); MPI_Comm newCommunicator; - MPI_Comm_split_type( MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, info, &newCommunicator ); + MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, info, &newCommunicator ); int processesOnNode; int rankOnNode; MPI_Comm_size( newCommunicator, &processesOnNode ); MPI_Comm_rank( newCommunicator, &rankOnNode ); - if( deviceCount == processesOnNode ) + if ( deviceCount == processesOnNode ) { - WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode ) ); + WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode )); } else if ( deviceCount > processesOnNode ) { - WALBERLA_LOG_WARNING("Not using all available GPUs on node. Processes on node " - << processesOnNode << " available GPUs on node " << deviceCount ); - WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode ) ); + WALBERLA_LOG_WARNING( "Not using all available GPUs on node. Processes on node " + << processesOnNode << " available GPUs on node " << deviceCount ); + WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode )); } else { - WALBERLA_LOG_WARNING("Too many processes started per node - should be one per GPU. Number of processes per node " - << processesOnNode << ", available GPUs on node " << deviceCount ); - WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode % deviceCount ) ); + WALBERLA_LOG_WARNING( "Too many processes started per node - should be one per GPU. Number of processes per node " + << processesOnNode << ", available GPUs on node " << deviceCount ); + WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode % deviceCount )); } -} #endif - - -#else - -void selectDeviceBasedOnMpiRank() {} +} #endif diff --git a/src/cuda/ExecutionTreeGPU.h b/src/cuda/ExecutionTreeGPU.h index 9f458f289..0b4e565d5 100644 --- a/src/cuda/ExecutionTreeGPU.h +++ b/src/cuda/ExecutionTreeGPU.h @@ -26,6 +26,19 @@ #include <cuda_runtime.h> +#ifdef CUDART_VERSION +#if CUDART_VERSION <= 9020 +cudaError_t cudaLaunchHostFunc( cudaStream_t, void(CUDART_CB* )( void* userData ), void* ) { + static bool printedWarning = false; + if( ! printedWarning ) { + WALBERLA_LOG_WARNING_ON_ROOT("Timing of CUDA functions only implemented for CUDA versions >= 10.0" ); + printedWarning = true; + } + return cudaSuccess; +} +#endif +#endif + namespace walberla { namespace executiontree { -- GitLab