Skip to content
Snippets Groups Projects
Commit a5f840b0 authored by Martin Bauer's avatar Martin Bauer
Browse files

Fixes in CUDA module

parent 04df6595
No related merge requests found
#include <iostream>
#include "cuda/FieldIndexing.h"
#include "cuda/FieldAccessor.h"
namespace walberla {
......
......@@ -212,6 +212,7 @@ inline int MPI_Isend( void*, int, MPI_Datatype, int, int, MPI_Comm, MPI_Request*
inline int MPI_Recv( void*, int, MPI_Datatype, int, int, MPI_Comm, MPI_Status* ) { WALBERLA_MPI_FUNCTION_ERROR }
inline int MPI_Send( void*, int, MPI_Datatype, int, int, MPI_Comm ) { WALBERLA_MPI_FUNCTION_ERROR }
inline int MPI_Sendrecv( void*, int, MPI_Datatype, int, int, void*, int, MPI_Datatype, int, int, MPI_Comm, MPI_Status *) { WALBERLA_MPI_FUNCTION_ERROR }
inline int MPI_Probe ( int, int, MPI_Comm, MPI_Status* ) { WALBERLA_MPI_FUNCTION_ERROR }
inline int MPI_Iprobe ( int, int, MPI_Comm, int*, MPI_Status* ) { WALBERLA_MPI_FUNCTION_ERROR }
......
......@@ -24,7 +24,6 @@
#include "core/cell/CellInterval.h"
#include "core/debug/Debug.h"
#include "core/logging/Logging.h"
#include "field/Layout.h"
#include <cuda_runtime.h>
......@@ -112,9 +111,9 @@ FieldIndexing<T> FieldIndexing<T>::interval ( const GPUField<T> & f, const CellI
// Jump over ghost cells to first inner cell
cell_idx_t gl = cell_idx_c( f.nrOfGhostLayers() );
data += ( ci.xMin() + gl )* xOffset +
( ci.yMin() + gl )* yOffset +
( ci.zMin() + gl )* zOffset;
data += ( ci.xMin() + gl )* cell_idx_c(xOffset) +
( ci.yMin() + gl )* cell_idx_c(yOffset) +
( ci.zMin() + gl )* cell_idx_c(zOffset);
dim3 gridDim;
......
......@@ -87,9 +87,9 @@ FieldIndexing3D<T> FieldIndexing3D<T>::interval( const GPUField<T> & f, const Ce
// position data according to ci
cell_idx_t gl = cell_idx_c( f.nrOfGhostLayers() );
data += ( ci.xMin() + gl ) * xOffset +
( ci.yMin() + gl ) * yOffset +
( ci.zMin() + gl ) * zOffset;
data += ( ci.xMin() + gl ) * cell_idx_c(xOffset) +
( ci.yMin() + gl ) * cell_idx_c(yOffset) +
( ci.zMin() + gl ) * cell_idx_c(zOffset);
dim3 idxDim( (unsigned int)ci.xSize(), (unsigned int)ci.ySize(), (unsigned int)ci.zSize() );
......
......@@ -91,7 +91,9 @@ void GPUPackInfo<GPUField_T>::unpackData(IBlock * receiver, stencil::Direction d
copyHostToDevFZYX( f->pitchedPtr(), buf,
sizeof(T), f->zAllocSize(), ci.zSize(),
ci.xMin() + nrOfGhostLayers, ci.yMin() + nrOfGhostLayers, ci.zMin() + nrOfGhostLayers, 0,
uint_c(ci.xMin() + nrOfGhostLayers),
uint_c(ci.yMin() + nrOfGhostLayers),
uint_c(ci.zMin() + nrOfGhostLayers), 0,
0, 0, 0, 0,
ci.xSize(), ci.ySize(), ci.zSize(), f->fSize() );
}
......@@ -120,8 +122,8 @@ void GPUPackInfo<GPUField_T>::communicateLocal(const IBlock * sender, IBlock * r
copyDevToDevFZYX( rf->pitchedPtr(), sf->pitchedPtr(),
sizeof(T), rf->zAllocSize(), sf->zAllocSize(),
rCi.xMin() + nrOfGhostLayers, rCi.yMin() + nrOfGhostLayers, rCi.zMin() + nrOfGhostLayers, 0,
sCi.xMin() + nrOfGhostLayers, sCi.yMin() + nrOfGhostLayers, sCi.zMin() + nrOfGhostLayers, 0,
uint_c(rCi.xMin() + nrOfGhostLayers), uint_c(rCi.yMin() + nrOfGhostLayers), uint_c(rCi.zMin() + nrOfGhostLayers), 0,
uint_c(sCi.xMin() + nrOfGhostLayers), uint_c(sCi.yMin() + nrOfGhostLayers), uint_c(sCi.zMin() + nrOfGhostLayers), 0,
rCi.xSize(), rCi.ySize(), rCi.zSize(), sf->fSize() );
}
......@@ -148,7 +150,9 @@ void GPUPackInfo<GPUField_T>::packDataImpl(const IBlock * sender, stencil::Direc
copyDevToHostFZYX( buf, f->pitchedPtr(),
sizeof(T), ci.zSize(), f->zAllocSize(),
0, 0, 0, 0,
ci.xMin() + nrOfGhostLayers, ci.yMin() + nrOfGhostLayers, ci.zMin() + nrOfGhostLayers, 0,
uint_c(ci.xMin() + nrOfGhostLayers),
uint_c(ci.yMin() + nrOfGhostLayers),
uint_c(ci.zMin() + nrOfGhostLayers), 0,
ci.xSize(), ci.ySize(), ci.zSize(), f->fSize() );
}
......
......@@ -29,10 +29,10 @@
#define LAYOUT field::fzyx
#define GL_SIZE 1
#define YOFFSET ( X_SIZE )
#define ZOFFSET ( ( Y_SIZE ) * ( YOFFSET ) )
#define FOFFSET ( ( Z_SIZE ) * ( ZOFFSET ) )
#define IDX4D( x, y, z, f ) ( (int)( (f) * (FOFFSET) + (z) * (Z_SIZE) + (y) * (YOFFSET) + (x) ) )
#define YOFFSET ( int( X_SIZE ) )
#define ZOFFSET ( int( Y_SIZE ) * int( YOFFSET ) )
#define FOFFSET ( int( Z_SIZE ) * int( ZOFFSET ) )
#define IDX4D( x, y, z, f ) ( (int)( int(f) * int(FOFFSET) + int(z) * int(Z_SIZE) + int(y) * int(YOFFSET) + int(x) ) )
namespace walberla {
......
#include <iostream>
#include "cuda/FieldAccessor.h"
#include "cuda/FieldIndexing.h"
namespace walberla {
......@@ -18,16 +14,5 @@ __global__ void kernel_double( cuda::FieldAccessor<double> f )
f.get() *= 2.0;
}
void kernel_double_field( const cuda::GPUField<double> & field )
{
using namespace std;
cuda::FieldIndexing<double> iter = cuda::FieldIndexing<double>::sliceBeforeGhostLayerXYZ( field, 1, stencil::E, true );
std::cout << "Kernel call dims "
<< iter.blockDim().x << ","
<< iter.gridDim().x << ","
<< iter.gridDim().y << ","
<< iter.gridDim().z << endl;
kernel_double<<< iter.gridDim(), iter.blockDim() >>> ( iter.gpuAccess() );
}
} // namespace walberla
......@@ -35,13 +35,10 @@
using namespace walberla;
namespace walberla{
void kernel_double_field( const cuda::GPUField<double> & field );
void kernel_double( cuda::FieldAccessor<double> f );
}
GhostLayerField<real_t,1> * createCPUField( IBlock* const block, StructuredBlockStorage* const storage )
{
return new GhostLayerField<real_t,1> (
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment