From a5f840b03ad6e1da9e684b773ada6b2d027eb5f8 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Wed, 2 Aug 2017 09:25:04 +0200 Subject: [PATCH] Fixes in CUDA module --- apps/tutorials/cuda/01_GameOfLife_kernels.h | 2 +- src/core/mpi/MPIWrapper.h | 1 + src/cuda/FieldIndexing.impl.h | 7 +++---- src/cuda/FieldIndexing3D.impl.h | 6 +++--- src/cuda/communication/GPUPackInfo.h | 12 ++++++++---- tests/cuda/FieldIndexing3DTest.h | 8 ++++---- tests/cuda/Kernels.cu | 15 --------------- tests/cuda/SimpleKernelTest.cpp | 3 --- 8 files changed, 20 insertions(+), 34 deletions(-) diff --git a/apps/tutorials/cuda/01_GameOfLife_kernels.h b/apps/tutorials/cuda/01_GameOfLife_kernels.h index e0e30c85a..8ade238ab 100644 --- a/apps/tutorials/cuda/01_GameOfLife_kernels.h +++ b/apps/tutorials/cuda/01_GameOfLife_kernels.h @@ -1,6 +1,6 @@ #include <iostream> -#include "cuda/FieldIndexing.h" +#include "cuda/FieldAccessor.h" namespace walberla { diff --git a/src/core/mpi/MPIWrapper.h b/src/core/mpi/MPIWrapper.h index 23c0a4e2f..5da7a429d 100755 --- a/src/core/mpi/MPIWrapper.h +++ b/src/core/mpi/MPIWrapper.h @@ -212,6 +212,7 @@ inline int MPI_Isend( void*, int, MPI_Datatype, int, int, MPI_Comm, MPI_Request* inline int MPI_Recv( void*, int, MPI_Datatype, int, int, MPI_Comm, MPI_Status* ) { WALBERLA_MPI_FUNCTION_ERROR } inline int MPI_Send( void*, int, MPI_Datatype, int, int, MPI_Comm ) { WALBERLA_MPI_FUNCTION_ERROR } +inline int MPI_Sendrecv( void*, int, MPI_Datatype, int, int, void*, int, MPI_Datatype, int, int, MPI_Comm, MPI_Status *) { WALBERLA_MPI_FUNCTION_ERROR } inline int MPI_Probe ( int, int, MPI_Comm, MPI_Status* ) { WALBERLA_MPI_FUNCTION_ERROR } inline int MPI_Iprobe ( int, int, MPI_Comm, int*, MPI_Status* ) { WALBERLA_MPI_FUNCTION_ERROR } diff --git a/src/cuda/FieldIndexing.impl.h b/src/cuda/FieldIndexing.impl.h index c4837d3c1..3c579cd4b 100644 --- a/src/cuda/FieldIndexing.impl.h +++ b/src/cuda/FieldIndexing.impl.h @@ -24,7 +24,6 @@ #include "core/cell/CellInterval.h" #include "core/debug/Debug.h" -#include "core/logging/Logging.h" #include "field/Layout.h" #include <cuda_runtime.h> @@ -112,9 +111,9 @@ FieldIndexing<T> FieldIndexing<T>::interval ( const GPUField<T> & f, const CellI // Jump over ghost cells to first inner cell cell_idx_t gl = cell_idx_c( f.nrOfGhostLayers() ); - data += ( ci.xMin() + gl )* xOffset + - ( ci.yMin() + gl )* yOffset + - ( ci.zMin() + gl )* zOffset; + data += ( ci.xMin() + gl )* cell_idx_c(xOffset) + + ( ci.yMin() + gl )* cell_idx_c(yOffset) + + ( ci.zMin() + gl )* cell_idx_c(zOffset); dim3 gridDim; diff --git a/src/cuda/FieldIndexing3D.impl.h b/src/cuda/FieldIndexing3D.impl.h index 896f7e1d2..147396ac4 100644 --- a/src/cuda/FieldIndexing3D.impl.h +++ b/src/cuda/FieldIndexing3D.impl.h @@ -87,9 +87,9 @@ FieldIndexing3D<T> FieldIndexing3D<T>::interval( const GPUField<T> & f, const Ce // position data according to ci cell_idx_t gl = cell_idx_c( f.nrOfGhostLayers() ); - data += ( ci.xMin() + gl ) * xOffset + - ( ci.yMin() + gl ) * yOffset + - ( ci.zMin() + gl ) * zOffset; + data += ( ci.xMin() + gl ) * cell_idx_c(xOffset) + + ( ci.yMin() + gl ) * cell_idx_c(yOffset) + + ( ci.zMin() + gl ) * cell_idx_c(zOffset); dim3 idxDim( (unsigned int)ci.xSize(), (unsigned int)ci.ySize(), (unsigned int)ci.zSize() ); diff --git a/src/cuda/communication/GPUPackInfo.h b/src/cuda/communication/GPUPackInfo.h index 6a3e98434..b3124dd39 100644 --- a/src/cuda/communication/GPUPackInfo.h +++ b/src/cuda/communication/GPUPackInfo.h @@ -91,7 +91,9 @@ void GPUPackInfo<GPUField_T>::unpackData(IBlock * receiver, stencil::Direction d copyHostToDevFZYX( f->pitchedPtr(), buf, sizeof(T), f->zAllocSize(), ci.zSize(), - ci.xMin() + nrOfGhostLayers, ci.yMin() + nrOfGhostLayers, ci.zMin() + nrOfGhostLayers, 0, + uint_c(ci.xMin() + nrOfGhostLayers), + uint_c(ci.yMin() + nrOfGhostLayers), + uint_c(ci.zMin() + nrOfGhostLayers), 0, 0, 0, 0, 0, ci.xSize(), ci.ySize(), ci.zSize(), f->fSize() ); } @@ -120,8 +122,8 @@ void GPUPackInfo<GPUField_T>::communicateLocal(const IBlock * sender, IBlock * r copyDevToDevFZYX( rf->pitchedPtr(), sf->pitchedPtr(), sizeof(T), rf->zAllocSize(), sf->zAllocSize(), - rCi.xMin() + nrOfGhostLayers, rCi.yMin() + nrOfGhostLayers, rCi.zMin() + nrOfGhostLayers, 0, - sCi.xMin() + nrOfGhostLayers, sCi.yMin() + nrOfGhostLayers, sCi.zMin() + nrOfGhostLayers, 0, + uint_c(rCi.xMin() + nrOfGhostLayers), uint_c(rCi.yMin() + nrOfGhostLayers), uint_c(rCi.zMin() + nrOfGhostLayers), 0, + uint_c(sCi.xMin() + nrOfGhostLayers), uint_c(sCi.yMin() + nrOfGhostLayers), uint_c(sCi.zMin() + nrOfGhostLayers), 0, rCi.xSize(), rCi.ySize(), rCi.zSize(), sf->fSize() ); } @@ -148,7 +150,9 @@ void GPUPackInfo<GPUField_T>::packDataImpl(const IBlock * sender, stencil::Direc copyDevToHostFZYX( buf, f->pitchedPtr(), sizeof(T), ci.zSize(), f->zAllocSize(), 0, 0, 0, 0, - ci.xMin() + nrOfGhostLayers, ci.yMin() + nrOfGhostLayers, ci.zMin() + nrOfGhostLayers, 0, + uint_c(ci.xMin() + nrOfGhostLayers), + uint_c(ci.yMin() + nrOfGhostLayers), + uint_c(ci.zMin() + nrOfGhostLayers), 0, ci.xSize(), ci.ySize(), ci.zSize(), f->fSize() ); } diff --git a/tests/cuda/FieldIndexing3DTest.h b/tests/cuda/FieldIndexing3DTest.h index ab8eb00ce..80e1b6cfe 100644 --- a/tests/cuda/FieldIndexing3DTest.h +++ b/tests/cuda/FieldIndexing3DTest.h @@ -29,10 +29,10 @@ #define LAYOUT field::fzyx #define GL_SIZE 1 -#define YOFFSET ( X_SIZE ) -#define ZOFFSET ( ( Y_SIZE ) * ( YOFFSET ) ) -#define FOFFSET ( ( Z_SIZE ) * ( ZOFFSET ) ) -#define IDX4D( x, y, z, f ) ( (int)( (f) * (FOFFSET) + (z) * (Z_SIZE) + (y) * (YOFFSET) + (x) ) ) +#define YOFFSET ( int( X_SIZE ) ) +#define ZOFFSET ( int( Y_SIZE ) * int( YOFFSET ) ) +#define FOFFSET ( int( Z_SIZE ) * int( ZOFFSET ) ) +#define IDX4D( x, y, z, f ) ( (int)( int(f) * int(FOFFSET) + int(z) * int(Z_SIZE) + int(y) * int(YOFFSET) + int(x) ) ) namespace walberla { diff --git a/tests/cuda/Kernels.cu b/tests/cuda/Kernels.cu index fb4228113..e0d4c2f97 100644 --- a/tests/cuda/Kernels.cu +++ b/tests/cuda/Kernels.cu @@ -1,8 +1,4 @@ - -#include <iostream> - #include "cuda/FieldAccessor.h" -#include "cuda/FieldIndexing.h" namespace walberla { @@ -18,16 +14,5 @@ __global__ void kernel_double( cuda::FieldAccessor<double> f ) f.get() *= 2.0; } -void kernel_double_field( const cuda::GPUField<double> & field ) -{ - using namespace std; - cuda::FieldIndexing<double> iter = cuda::FieldIndexing<double>::sliceBeforeGhostLayerXYZ( field, 1, stencil::E, true ); - std::cout << "Kernel call dims " - << iter.blockDim().x << "," - << iter.gridDim().x << "," - << iter.gridDim().y << "," - << iter.gridDim().z << endl; - kernel_double<<< iter.gridDim(), iter.blockDim() >>> ( iter.gpuAccess() ); -} } // namespace walberla diff --git a/tests/cuda/SimpleKernelTest.cpp b/tests/cuda/SimpleKernelTest.cpp index 4a19ec001..4c2efa9ef 100644 --- a/tests/cuda/SimpleKernelTest.cpp +++ b/tests/cuda/SimpleKernelTest.cpp @@ -35,13 +35,10 @@ using namespace walberla; namespace walberla{ -void kernel_double_field( const cuda::GPUField<double> & field ); void kernel_double( cuda::FieldAccessor<double> f ); } - - GhostLayerField<real_t,1> * createCPUField( IBlock* const block, StructuredBlockStorage* const storage ) { return new GhostLayerField<real_t,1> ( -- GitLab