diff --git a/CMakeLists.txt b/CMakeLists.txt index c50270da0f7a63452e26c1e7c117a9e08238ade0..c4f09268258745d87e72ecc4fc2cd370904f7694 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1108,6 +1108,12 @@ if ( WALBERLA_BUILD_WITH_CUDA ) list( APPEND CUDA_NVCC_FLAGS "-Wno-deprecated-gpu-targets") + find_library( NVTX_LIBRARY nvToolsExt PATHS ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES lib lib64 ) + if( NVTX_LIBRARY ) + set( WALBERLA_CUDA_NVTX_AVAILABLE 1) + list ( APPEND SERVICE_LIBS ${NVTX_LIBRARY} ) + endif() + if ( NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std=" AND NOT WALBERLA_CXX_COMPILER_IS_MSVC ) list ( APPEND CUDA_NVCC_FLAGS "-std=c++14" ) endif () diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp b/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp index e8c9299c47e4e949fbb06983cceef7e6a4109361..db5e25972442ba7fb2384d71b81dc42fe661aa8f 100644 --- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp +++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp @@ -18,6 +18,7 @@ #include "cuda/HostFieldAllocator.h" #include "cuda/communication/GPUPackInfo.h" #include "cuda/ParallelStreams.h" +#include "cuda/NVTX.h" #include "core/timing/TimingPool.h" #include "core/timing/RemainingTimeLogger.h" #include "cuda/AddGPUFieldToStorage.h" @@ -115,6 +116,7 @@ int main( int argc, char **argv ) innerOuterSection.run([&]( auto innerStream ) { + cuda::nameStream(innerStream, "inner stream"); for( auto &block: *blocks ) { if(!disableBoundaries) @@ -129,6 +131,7 @@ int main( int argc, char **argv ) innerOuterSection.run([&]( auto outerStream ) { + cuda::nameStream(outerStream, "inner stream"); gpuComm( outerStream ); for( auto &block: *blocks ) diff --git a/src/cuda/NVTX.h b/src/cuda/NVTX.h new file mode 100644 index 0000000000000000000000000000000000000000..3943581afcb7d56076bfac683be6e7aa049f7038 --- /dev/null +++ b/src/cuda/NVTX.h @@ -0,0 +1,97 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file NVTX.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#include "core/DataTypes.h" + +#include <string> + +#ifdef WALBERLA_CUDA_NVTX_AVAILABLE +#include <nvToolsExt.h> +#include <nvToolsExtCuda.h> +#include <nvToolsExtCudaRt.h> + +namespace walberla{ +namespace cuda { + +inline void nvtxMarker(const std::string& name, const uint32_t color=0xaaaaaa) +{ + nvtxEventAttributes_t eventAttrib; + memset(&eventAttrib, 0, NVTX_EVENT_ATTRIB_STRUCT_SIZE); + eventAttrib.version = NVTX_VERSION; + eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + eventAttrib.colorType = NVTX_COLOR_ARGB; + eventAttrib.color = 0xFF000000 | color; + eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + eventAttrib.message.ascii = name.c_str(); + nvtxMarkEx(&eventAttrib); +} + +inline void nameStream(const cudaStream_t & stream, const std::string & name) +{ + nvtxNameCudaStreamA(stream, name.c_str()); +} + +class NvtxRange +{ +public: + NvtxRange(const std::string & name, const uint32_t color=0xaaaaaa) + { + memset(&eventAttrib, 0, NVTX_EVENT_ATTRIB_STRUCT_SIZE); + eventAttrib.version = NVTX_VERSION; + eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + eventAttrib.colorType = NVTX_COLOR_ARGB; + eventAttrib.color = 0xFF000000 | color; + eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + eventAttrib.message.ascii = name.c_str(); + nvtxRangePushEx(&eventAttrib); + } + ~NvtxRange() + { + nvtxRangePop(); + } +private: + nvtxEventAttributes_t eventAttrib; +}; + + +} // namespace cuda +} // namespace walberla + + + + +#else +namespace walberla{ +namespace cuda { + +inline void nameStream(const cudaStream_t & stream, const std::string & name) {} +inline void nvtxMarker(const std::string& name, const uint32_t color=0xaaaaaa) {} +class NvtxRange +{ +public: + NvtxRange(const std::string & name, const uint32_t color=0xaaaaaa) {}} +}; + +} // namespace cuda +} // namespace walberla + + +#endif \ No newline at end of file diff --git a/src/waLBerlaDefinitions.in.h b/src/waLBerlaDefinitions.in.h index 82f38181f2388727934ded2e08f302e34369b7e1..0d3ed2a75744723af5b9bc1ecd5553252a18504f 100644 --- a/src/waLBerlaDefinitions.in.h +++ b/src/waLBerlaDefinitions.in.h @@ -32,6 +32,7 @@ #cmakedefine WALBERLA_BUILD_WITH_OPENMESH #cmakedefine WALBERLA_BUILD_WITH_CUDA +#cmakedefine WALBERLA_CUDA_NVTX_AVAILABLE #cmakedefine WALBERLA_BUILD_WITH_CODEGEN