From a5f840b03ad6e1da9e684b773ada6b2d027eb5f8 Mon Sep 17 00:00:00 2001
From: Martin Bauer <martin.bauer@fau.de>
Date: Wed, 2 Aug 2017 09:25:04 +0200
Subject: [PATCH] Fixes in CUDA module

---
 apps/tutorials/cuda/01_GameOfLife_kernels.h |  2 +-
 src/core/mpi/MPIWrapper.h                   |  1 +
 src/cuda/FieldIndexing.impl.h               |  7 +++----
 src/cuda/FieldIndexing3D.impl.h             |  6 +++---
 src/cuda/communication/GPUPackInfo.h        | 12 ++++++++----
 tests/cuda/FieldIndexing3DTest.h            |  8 ++++----
 tests/cuda/Kernels.cu                       | 15 ---------------
 tests/cuda/SimpleKernelTest.cpp             |  3 ---
 8 files changed, 20 insertions(+), 34 deletions(-)

diff --git a/apps/tutorials/cuda/01_GameOfLife_kernels.h b/apps/tutorials/cuda/01_GameOfLife_kernels.h
index e0e30c85a..8ade238ab 100644
--- a/apps/tutorials/cuda/01_GameOfLife_kernels.h
+++ b/apps/tutorials/cuda/01_GameOfLife_kernels.h
@@ -1,6 +1,6 @@
 #include <iostream>
 
-#include "cuda/FieldIndexing.h"
+#include "cuda/FieldAccessor.h"
 
 
 namespace walberla {
diff --git a/src/core/mpi/MPIWrapper.h b/src/core/mpi/MPIWrapper.h
index 23c0a4e2f..5da7a429d 100755
--- a/src/core/mpi/MPIWrapper.h
+++ b/src/core/mpi/MPIWrapper.h
@@ -212,6 +212,7 @@ inline int MPI_Isend( void*, int, MPI_Datatype, int, int, MPI_Comm, MPI_Request*
 
 inline int MPI_Recv( void*, int, MPI_Datatype, int, int, MPI_Comm, MPI_Status* ) { WALBERLA_MPI_FUNCTION_ERROR }
 inline int MPI_Send( void*, int, MPI_Datatype, int, int, MPI_Comm )              { WALBERLA_MPI_FUNCTION_ERROR }
+inline int MPI_Sendrecv( void*, int, MPI_Datatype, int, int, void*, int, MPI_Datatype, int, int, MPI_Comm, MPI_Status *) { WALBERLA_MPI_FUNCTION_ERROR }
 
 inline int MPI_Probe    ( int, int, MPI_Comm, MPI_Status* )       { WALBERLA_MPI_FUNCTION_ERROR }
 inline int MPI_Iprobe   ( int, int, MPI_Comm, int*, MPI_Status* ) { WALBERLA_MPI_FUNCTION_ERROR }
diff --git a/src/cuda/FieldIndexing.impl.h b/src/cuda/FieldIndexing.impl.h
index c4837d3c1..3c579cd4b 100644
--- a/src/cuda/FieldIndexing.impl.h
+++ b/src/cuda/FieldIndexing.impl.h
@@ -24,7 +24,6 @@
 
 #include "core/cell/CellInterval.h"
 #include "core/debug/Debug.h"
-#include "core/logging/Logging.h"
 #include "field/Layout.h"
 
 #include <cuda_runtime.h>
@@ -112,9 +111,9 @@ FieldIndexing<T> FieldIndexing<T>::interval ( const GPUField<T> & f, const CellI
 
    // Jump over ghost cells to first inner cell
    cell_idx_t gl = cell_idx_c( f.nrOfGhostLayers() );
-   data += ( ci.xMin() + gl )* xOffset +
-           ( ci.yMin() + gl )* yOffset +
-           ( ci.zMin() + gl )* zOffset;
+   data += ( ci.xMin() + gl )* cell_idx_c(xOffset) +
+           ( ci.yMin() + gl )* cell_idx_c(yOffset) +
+           ( ci.zMin() + gl )* cell_idx_c(zOffset);
 
 
    dim3 gridDim;
diff --git a/src/cuda/FieldIndexing3D.impl.h b/src/cuda/FieldIndexing3D.impl.h
index 896f7e1d2..147396ac4 100644
--- a/src/cuda/FieldIndexing3D.impl.h
+++ b/src/cuda/FieldIndexing3D.impl.h
@@ -87,9 +87,9 @@ FieldIndexing3D<T> FieldIndexing3D<T>::interval( const GPUField<T> & f, const Ce
 
    // position data according to ci
    cell_idx_t gl = cell_idx_c( f.nrOfGhostLayers() );
-   data += ( ci.xMin() + gl ) * xOffset +
-           ( ci.yMin() + gl ) * yOffset +
-           ( ci.zMin() + gl ) * zOffset;
+   data += ( ci.xMin() + gl ) * cell_idx_c(xOffset) +
+           ( ci.yMin() + gl ) * cell_idx_c(yOffset) +
+           ( ci.zMin() + gl ) * cell_idx_c(zOffset);
 
 
    dim3 idxDim( (unsigned int)ci.xSize(), (unsigned int)ci.ySize(), (unsigned int)ci.zSize() );
diff --git a/src/cuda/communication/GPUPackInfo.h b/src/cuda/communication/GPUPackInfo.h
index 6a3e98434..b3124dd39 100644
--- a/src/cuda/communication/GPUPackInfo.h
+++ b/src/cuda/communication/GPUPackInfo.h
@@ -91,7 +91,9 @@ void GPUPackInfo<GPUField_T>::unpackData(IBlock * receiver, stencil::Direction d
 
    copyHostToDevFZYX( f->pitchedPtr(), buf,
                       sizeof(T), f->zAllocSize(), ci.zSize(),
-                      ci.xMin() + nrOfGhostLayers, ci.yMin() + nrOfGhostLayers, ci.zMin() + nrOfGhostLayers, 0,
+                      uint_c(ci.xMin() + nrOfGhostLayers),
+                      uint_c(ci.yMin() + nrOfGhostLayers),
+                      uint_c(ci.zMin() + nrOfGhostLayers), 0,
                       0, 0, 0, 0,
                       ci.xSize(), ci.ySize(), ci.zSize(), f->fSize() );
 }
@@ -120,8 +122,8 @@ void GPUPackInfo<GPUField_T>::communicateLocal(const IBlock * sender, IBlock * r
 
    copyDevToDevFZYX( rf->pitchedPtr(), sf->pitchedPtr(),
                      sizeof(T), rf->zAllocSize(), sf->zAllocSize(),
-                     rCi.xMin() + nrOfGhostLayers, rCi.yMin() + nrOfGhostLayers, rCi.zMin() + nrOfGhostLayers, 0,
-                     sCi.xMin() + nrOfGhostLayers, sCi.yMin() + nrOfGhostLayers, sCi.zMin() + nrOfGhostLayers, 0,
+                     uint_c(rCi.xMin() + nrOfGhostLayers), uint_c(rCi.yMin() + nrOfGhostLayers), uint_c(rCi.zMin() + nrOfGhostLayers), 0,
+                     uint_c(sCi.xMin() + nrOfGhostLayers), uint_c(sCi.yMin() + nrOfGhostLayers), uint_c(sCi.zMin() + nrOfGhostLayers), 0,
                      rCi.xSize(), rCi.ySize(), rCi.zSize(), sf->fSize() );
 }
 
@@ -148,7 +150,9 @@ void GPUPackInfo<GPUField_T>::packDataImpl(const IBlock * sender, stencil::Direc
    copyDevToHostFZYX( buf, f->pitchedPtr(),
                       sizeof(T), ci.zSize(), f->zAllocSize(),
                       0, 0, 0, 0,
-                      ci.xMin() + nrOfGhostLayers, ci.yMin() + nrOfGhostLayers, ci.zMin() + nrOfGhostLayers, 0,
+                      uint_c(ci.xMin() + nrOfGhostLayers),
+                      uint_c(ci.yMin() + nrOfGhostLayers),
+                      uint_c(ci.zMin() + nrOfGhostLayers), 0,
                       ci.xSize(), ci.ySize(), ci.zSize(), f->fSize() );
 }
 
diff --git a/tests/cuda/FieldIndexing3DTest.h b/tests/cuda/FieldIndexing3DTest.h
index ab8eb00ce..80e1b6cfe 100644
--- a/tests/cuda/FieldIndexing3DTest.h
+++ b/tests/cuda/FieldIndexing3DTest.h
@@ -29,10 +29,10 @@
 #define LAYOUT    field::fzyx
 #define GL_SIZE   1
 
-#define YOFFSET               ( X_SIZE )
-#define ZOFFSET               ( ( Y_SIZE ) * ( YOFFSET ) )
-#define FOFFSET               ( ( Z_SIZE ) * ( ZOFFSET ) )
-#define IDX4D( x, y, z, f )   ( (int)( (f) * (FOFFSET) + (z) * (Z_SIZE) + (y) * (YOFFSET) + (x) ) )
+#define YOFFSET               ( int( X_SIZE ) )
+#define ZOFFSET               ( int( Y_SIZE ) * int( YOFFSET ) )
+#define FOFFSET               ( int( Z_SIZE ) * int( ZOFFSET ) )
+#define IDX4D( x, y, z, f )   ( (int)( int(f) * int(FOFFSET) + int(z) * int(Z_SIZE) + int(y) * int(YOFFSET) + int(x) ) )
 
 
 namespace walberla {
diff --git a/tests/cuda/Kernels.cu b/tests/cuda/Kernels.cu
index fb4228113..e0d4c2f97 100644
--- a/tests/cuda/Kernels.cu
+++ b/tests/cuda/Kernels.cu
@@ -1,8 +1,4 @@
-
-#include <iostream>
-
 #include "cuda/FieldAccessor.h"
-#include "cuda/FieldIndexing.h"
 
 namespace walberla {
 
@@ -18,16 +14,5 @@ __global__ void kernel_double( cuda::FieldAccessor<double> f )
    f.get() *= 2.0;
 }
 
-void kernel_double_field( const cuda::GPUField<double> & field )
-{
-   using namespace std;
-   cuda::FieldIndexing<double> iter = cuda::FieldIndexing<double>::sliceBeforeGhostLayerXYZ( field, 1, stencil::E, true );
-   std::cout << "Kernel call dims "
-             << iter.blockDim().x << ","
-             << iter.gridDim().x << ","
-             << iter.gridDim().y << ","
-             << iter.gridDim().z << endl;
-   kernel_double<<< iter.gridDim(), iter.blockDim() >>> ( iter.gpuAccess() );
-}
 
 } // namespace walberla
diff --git a/tests/cuda/SimpleKernelTest.cpp b/tests/cuda/SimpleKernelTest.cpp
index 4a19ec001..4c2efa9ef 100644
--- a/tests/cuda/SimpleKernelTest.cpp
+++ b/tests/cuda/SimpleKernelTest.cpp
@@ -35,13 +35,10 @@
 using namespace walberla;
 
 namespace walberla{
-void kernel_double_field( const cuda::GPUField<double> & field );
 
 void kernel_double( cuda::FieldAccessor<double> f );
 }
 
-
-
 GhostLayerField<real_t,1> * createCPUField( IBlock* const block, StructuredBlockStorage* const storage )
 {
    return new GhostLayerField<real_t,1> (
-- 
GitLab