Commit 6fc7b559 authored by Martin Bauer's avatar Martin Bauer

CUDA support

parent f59f8d50
......@@ -81,6 +81,8 @@ option ( WALBERLA_BUILD_WITH_PYTHON_LBM "Include LBM module into python modu
option ( WALBERLA_BUILD_WITH_LIKWID_MARKERS "Compile in markers for likwid-perfctr" )
option ( WALBERLA_BUILD_WITH_CUDA "Enable CUDA support" )
option ( WALBERLA_BUILD_WITH_FASTMATH "Fast math" )
......@@ -1013,6 +1015,45 @@ endif()
############################################################################################################################
##
## CUDA
##
############################################################################################################################
if ( WALBERLA_BUILD_WITH_CUDA )
# set ( BUILD_SHARED_LIBS ON )
set ( CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE ON )
set ( CUDA_PROPAGATE_HOST_FLAGS OFF CACHE BOOL "" )
if ( (NOT DEFINED CUDA_HOST_COMPILER) AND (${CMAKE_C_COMPILER} MATCHES "ccache") )
string ( STRIP "${CMAKE_C_COMPILER_ARG1}" stripped_compiler_string )
find_program ( CUDA_HOST_COMPILER ${stripped_compiler_string} )
endif ()
find_package ( CUDA REQUIRED )
if ( CUDA_FOUND )
include_directories ( ${CUDA_INCLUDE_DIRS} )
list ( APPEND SERVICE_LIBS ${CUDA_LIBRARIES} )
if ( NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std=" )
list ( APPEND CUDA_NVCC_FLAGS "-std=c++11" )
endif ()
# Bug with gcc5 and cuda7.5:
#list( APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED -D_FORCE_INLINES -D__STRICT_ANSI__")
# NOTICE: exisiting cuda flags are overwritten
#set ( CUDA_NVCC_FLAGS "--compiler-bindir=/usr/bin/g++-4.3" )
#set ( CUDA_NVCC_FLAGS "-arch sm_20" )
else()
set ( WALBERLA_BUILD_WITH_CUDA FALSE )
endif ( )
endif ( )
############################################################################################################################
############################################################################################################################
##
## Testing Coverage
......
add_subdirectory(basics)
add_subdirectory(cuda)
add_subdirectory(lbm)
add_subdirectory(pde)
add_subdirectory(pe)
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file 03_GameOfLife.cpp
//! \author Martin Bauer <martin.bauer@fau.de>
//
//======================================================================================================================
#include "01_GameOfLife_kernels.h"
#include "cuda/HostFieldAllocator.h"
#include "blockforest/Initialization.h"
#include "blockforest/communication/UniformDirectScheme.h"
#include "core/Environment.h"
#include "cuda/HostFieldAllocator.h"
#include "cuda/FieldCopy.h"
#include "cuda/GPUField.h"
#include "cuda/Kernel.h"
#include "cuda/AddGPUFieldToStorage.h"
#include "field/AddToStorage.h"
#include "field/communication/UniformMPIDatatypeInfo.h"
#include "geometry/initializer/ScalarFieldFromGrayScaleImage.h"
#include "geometry/structured/GrayScaleImage.h"
#include "gui/Gui.h"
#include "stencil/D2Q9.h"
#include "timeloop/SweepTimeloop.h"
using namespace walberla;
typedef GhostLayerField<double,1> ScalarField;
typedef cuda::GPUField<double> GPUField;
ScalarField * createField( IBlock* const block, StructuredBlockStorage* const storage )
{
return new ScalarField (
storage->getNumberOfXCells( *block ), // number of cells in x direction per block
storage->getNumberOfYCells( *block ), // number of cells in y direction per block
storage->getNumberOfZCells( *block ), // number of cells in z direction per block
1, // one ghost layer
real_t(0), // initial value
field::fzyx, // layout
make_shared<cuda::HostFieldAllocator<double> >() // allocator for host pinned memory
);
}
class GameOfLifeSweepCUDA
{
public:
GameOfLifeSweepCUDA( BlockDataID gpuFieldSrcID, BlockDataID gpuFieldDstID )
: gpuFieldSrcID_( gpuFieldSrcID ), gpuFieldDstID_( gpuFieldDstID )
{
}
void operator() ( IBlock * block )
{
auto srcCudaField = block->getData< cuda::GPUField<real_t> > ( gpuFieldSrcID_ );
auto dstCudaField = block->getData< cuda::GPUField<real_t> > ( gpuFieldDstID_ );
auto myKernel = cuda::make_kernel( &gameOfLifeKernel );
myKernel.addFieldIndexingParam( cuda::FieldIndexing<double>::xyz( *srcCudaField ) );
myKernel.addFieldIndexingParam( cuda::FieldIndexing<double>::xyz( *dstCudaField ) );
myKernel();
srcCudaField->swapDataPointers( dstCudaField );
}
private:
BlockDataID gpuFieldSrcID_;
BlockDataID gpuFieldDstID_;
};
int main( int argc, char ** argv )
{
walberla::Environment env( argc, argv );
geometry::GrayScaleImage image ("GosperGliderGun.png");
// Create blocks
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
uint_t(1) , uint_t(2), uint_t(1), // number of blocks in x,y,z direction
image.size( uint_t(0) ), image.size( uint_t(1) ) / uint_t(2), uint_t(1), // how many cells per block (x,y,z)
real_t(1), // dx: length of one cell in physical coordinates
false, // one block per process - "false" means all blocks to one process
false, false, false ); // no periodicity
BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" );
// Initializing the field from an image
using geometry::initializer::ScalarFieldFromGrayScaleImage;
ScalarFieldFromGrayScaleImage fieldInitializer ( *blocks, cpuFieldID ) ;
fieldInitializer.init( image, uint_t(2), false );
BlockDataID gpuFieldSrcID = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" );
BlockDataID gpuFieldDstID = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Dst" );
typedef blockforest::communication::UniformDirectScheme<stencil::D2Q9 > CommScheme;
CommScheme communication( blocks );
communication.addDataToCommunicate( make_shared<field::communication::UniformMPIDatatypeInfo<GPUField> > (gpuFieldSrcID) );
// Create Timeloop
const uint_t numberOfTimesteps = uint_t(10); // number of timesteps for non-gui runs
SweepTimeloop timeloop ( blocks, numberOfTimesteps );
// Registering the sweep
timeloop.add() << BeforeFunction( communication, "Communication" )
<< Sweep( GameOfLifeSweepCUDA(gpuFieldSrcID, gpuFieldDstID ), "GameOfLifeSweep" );
timeloop.add() << Sweep( cuda::fieldCpyFunctor<ScalarField, GPUField >(cpuFieldID, gpuFieldDstID) );
GUI gui ( timeloop, blocks, argc, argv );
gui.run();
return 0;
}
namespace walberla{
/**
\page tutorial_cuda01 Tutorial - CUDA 1: Game of Life on GPU
\image html tutorial_cuda01_nvidia_titan.png
> _Note:_ This tutorial required a CUDA aware MPI library.
> If you get a SEGFAULT when executing this tutorial, make sure that your MPI library was built with
> CUDA support! For instructions how to build OpenMPI with CUDA see this [page](https://www.open-mpi.org/faq/?category=building#build-cuda).
\section cuda01_fields Creating Fields
To run a simulation on a NVIDIA graphics card, we have to allocate data on the GPU and
write a CUDA kernel that operates on this data. In this tutorial we first allocate a field on the GPU
and learn about functionality to transfer data between CPU and GPU fields.
Since initialization and output routines are usually not time critical, they are implemented
for CPU fields only. In waLBerla we set up the complete simulation using
CPU fields, copy the initialized fields over to the GPU, do the complete computation there, and, in the
end, copy everything back to do the output from the CPU field.
So only the time critical kernels have to be written in CUDA.
Thus the setup code of the GPU GameOfLife program is very similar to its CPU version, which was implemented
in a previous tutorial ( \ref tutorial_basics_03 ).
One difference is, that fields which are often transfered from/to the GPU should be allocated with
a different field allocator: cuda::HostFieldAllocator . This allocator uses cudaHostAlloc() instead of "new" ,
such that the memory is marked "pinned", which means that it is always held in RAM and cannot be swapped out to disk.
Data transfer from pinned memory is faster than from normal memory. The usage of this allocator is not
mandatory, the data transfer functions work (slightly slower) also with normally allocated fields.
\code
ScalarField * createField( IBlock* const block, StructuredBlockStorage* const storage )
{
return new ScalarField (
storage->getNumberOfXCells( *block ), // number of cells in x direction per block
storage->getNumberOfYCells( *block ), // number of cells in y direction per block
storage->getNumberOfZCells( *block ), // number of cells in z direction per block
1, // one ghost layer
real_t(0), // initial value
field::fzyx, // layout
make_shared<cuda::HostFieldAllocator<double> >() // allocator for host pinned memory
);
}
\endcode
Now we initialize the CPU field just like in the previous tutorial \ref tutorial_basics03 .
Then two GPU fields are created: "source" and "destination" field. The helper function
cuda::addGPUFieldToStorage() creates a cuda::GPUField field of the same size and layout of the given
CPU field:
\code
BlockDataID gpuFieldSrcID = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" );
BlockDataID gpuFieldDstID = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Dst" );
\endcode
The contents of the new GPU fields are initialized with the contents of the given CPU field.
\section cuda01_kernels Writing and calling CUDA kernels
For a basic understanding of the CUDA support in waLBerla please read \ref cudaPage first.
After reading this page you should know what a FieldAccessor is and how to call CUDA kernels from
cpp files. So we can now start with writing
a CUDA kernel for the Game of Life algorithm. We place this in a separate file with ".cu" extension.
The build system then automatically detects that this file should be compiled with the CUDA C++ compiler.
The kernel gets two field accessors as arguments, one for the source and one for the destination field.
Both accessors have to be configured using the CUDA variables blockIdx and threadIdx, such that afterwards
the get() and getNeighbor() functions of the accessor class can work correctly.
\code
__global__ void gameOfLifeKernel( cuda::FieldAccessor<double> src, cuda::FieldAccessor<double> dst )
{
src.set( blockIdx, threadIdx );
dst.set( blockIdx, threadIdx );
int liveNeighbors = 0;
if ( src.getNeighbor( 1, 0,0 ) > 0.5 ) ++liveNeighbors;
if ( src.getNeighbor( -1, 0,0 ) > 0.5 ) ++liveNeighbors;
// normal Game of Life algorithm ....
// ...
}
\endcode
To call this kernel we write a thin wrapper sweep which only has to get the GPU fields out of the blockstorage
and passes them to the CUDA kernel. We use the cuda::Kernel class from waLBerla here, so that we can write this
sweep in a normal cpp file.
Here are the contents of this sweep:
\code
auto srcCudaField = block->getData< cuda::GPUField<real_t> > ( gpuFieldSrcID_ );
auto dstCudaField = block->getData< cuda::GPUField<real_t> > ( gpuFieldDstID_ );
auto myKernel = cuda::make_kernel( &gameOfLifeKernel );
myKernel.addFieldIndexingParam( cuda::FieldIndexing<double>::xyz( *srcCudaField ) );
myKernel.addFieldIndexingParam( cuda::FieldIndexing<double>::xyz( *dstCudaField ) );
myKernel();
srcCudaField->swapDataPointers( dstCudaField );
\endcode
All the computations are done on the GPU. The CPU field is not updated automatically! It was just used for
setup reasons.
To see if our kernel works, we copy the contents back to the CPU field after every timestep:
\code
timeloop.add() << Sweep( cuda::fieldCpyFunctor<ScalarField, GPUField >(cpuFieldID, gpuFieldDstID) );
\endcode
Of course this makes no sense for real simulations, since the transfer time is much higher than the
time that was saved by doing the computation on the GPU. For production runs, one would usually transfer the
field back every n'th timestep and write e.g. a VTK frame.
\section cuda01_comm Communication
In waLBerla there are two types of communication: _buffered_ and _direct_ communication.
While buffered communication first collects all data in a buffer and sends only one message per communciation step and neighbor
the direct communciation strategy, which is based on MPI datatypes, uses no intermediate buffers and therefore has to send
more messages than buffered communication. For details see \ref walberla_communication .
In the tutorials up to now, only the buffered approach was used. In this tutorial, we switch to the direct communciation strategy
because then we can use the CUDA support of the MPI library to directly communciate from/to GPU memory.
The usage of the two different communication schemes is very similar. Instead of creating a blockforest::communication::UniformBufferedScheme
we create a blockforest::communication::UniformDirectScheme.
Then we register a field::communication::UniformMPIDatatypeInfo instead of the field::communication::PackInfo.
\code
typedef blockforest::communication::UniformDirectScheme<stencil::D2Q9 > CommScheme;
CommScheme communication( blocks );
communication.addDataToCommunicate( make_shared<field::communication::UniformMPIDatatypeInfo<GPUField> > (gpuFieldSrcID) );
\endcode
This scheme also supports heterogenous simulations, i.e. using a CPU field on
some processes and a GPU field on other processes.
*/
}
#include "../cuda/01_GameOfLife_kernels.h"
#include <iostream>
namespace walberla {
__global__ void gameOfLifeKernel( cuda::FieldAccessor<double> src, cuda::FieldAccessor<double> dst )
{
src.set( blockIdx, threadIdx );
dst.set( blockIdx, threadIdx );
// Count number of living neighbors
int liveNeighbors = 0;
if ( src.getNeighbor( 1, 0,0 ) > 0.5 ) ++liveNeighbors;
if ( src.getNeighbor( -1, 0,0 ) > 0.5 ) ++liveNeighbors;
if ( src.getNeighbor( 0,+1,0 ) > 0.5 ) ++liveNeighbors;
if ( src.getNeighbor( 0,-1,0 ) > 0.5 ) ++liveNeighbors;
if ( src.getNeighbor( -1, -1, 0 ) > 0.5 ) ++liveNeighbors;
if ( src.getNeighbor( -1, +1, 0 ) > 0.5 ) ++liveNeighbors;
if ( src.getNeighbor( +1, -1,0 ) > 0.5 ) ++liveNeighbors;
if ( src.getNeighbor( +1, +1,0 ) > 0.5 ) ++liveNeighbors;
// cell dies because of under- or over-population
if ( liveNeighbors < 2 || liveNeighbors > 3 )
dst.get() = 0.0;
else if ( liveNeighbors == 3 ) // cell comes alive
dst.get() = 1.0;
else
dst.get() = src.get();
}
} // namespace walberla
#include <iostream>
#include "cuda/FieldIndexing.h"
namespace walberla {
__global__ void gameOfLifeKernel( cuda::FieldAccessor<double> src, cuda::FieldAccessor<double> dst );
} // namespace walberla
waLBerla_link_files_to_builddir( *.prm )
waLBerla_link_files_to_builddir( *.png )
waLBerla_add_executable ( NAME 01_GameOfLife_cuda
FILES 01_GameOfLife_cuda.cpp 01_GameOfLife_kernels.cu
DEPENDS blockforest core cuda field lbm geometry timeloop gui )
\ No newline at end of file
......@@ -84,13 +84,17 @@ function ( waLBerla_add_module )
set( hasSourceFiles FALSE )
foreach ( sourceFile ${sourceFiles} )
if ( ${sourceFile} MATCHES "\\.(c|cpp)" )
if ( ${sourceFile} MATCHES "\\.(c|cpp|cu)" )
set( hasSourceFiles TRUE )
endif( )
endforeach( )
if ( hasSourceFiles )
add_library( ${moduleLibraryName} STATIC ${sourceFiles} ${otherFiles} )
if ( hasSourceFiles )
if ( CUDA_FOUND )
cuda_add_library( ${moduleLibraryName} STATIC ${sourceFiles} ${otherFiles} )
else()
add_library( ${moduleLibraryName} STATIC ${sourceFiles} ${otherFiles} )
endif( CUDA_FOUND )
else( )
add_custom_target( ${moduleLibraryName} SOURCES ${sourceFiles} ${otherFiles} ) # dummy IDE target
endif( )
......@@ -194,7 +198,13 @@ function ( waLBerla_add_executable )
endif ( )
endif()
add_executable( ${ARG_NAME} ${sourceFiles} )
if ( CUDA_FOUND )
cuda_add_executable( ${ARG_NAME} ${sourceFiles} )
else()
add_executable( ${ARG_NAME} ${sourceFiles} )
endif()
#add_executable( ${ARG_NAME} ${sourceFiles} )
target_link_modules ( ${ARG_NAME} ${ARG_DEPENDS} )
target_link_libraries( ${ARG_NAME} ${SERVICE_LIBS} )
......
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file AddGPUFieldToStorage.h
//! \ingroup cuda
//! \author Martin Bauer <martin.bauer@fau.de>
//
//======================================================================================================================
#pragma once
#include "GPUField.h"
#include "domain_decomposition/StructuredBlockStorage.h"
#include <boost/bind.hpp>
namespace walberla {
namespace cuda {
//*******************************************************************************************************************
/*! Adds a cuda::GPUField to a StructuredBlockStorage
*
* - Similar to walberla::field::addToStorage() functions
* - created field is uninitialized
*/
//*******************************************************************************************************************
template< typename GPUField_T>
BlockDataID addGPUFieldToStorage(const shared_ptr< StructuredBlockStorage >& bs,
const std::string & identifier,
uint_t fSize,
const Layout layout = fzyx,
uint_t nrOfGhostLayers = 1 );
//*******************************************************************************************************************
/*! Adds a cuda::GPUField to a StructuredBlockStorage using data from a CPU field
*
* - adds a GPU field to a StructuredBlockStorage using a CPU field
* - sizes, number of ghostlayers and layout are the same as the CPU field
* - GPU field is initialized with the data currently stored in the CPU field
* @tparam Field_T type of the CPU field, the created GPUField will be of type cuda::GPUField<Field_T::value_type>
*/
//*******************************************************************************************************************
template< typename Field_T>
BlockDataID addGPUFieldToStorage( const shared_ptr< StructuredBlockStorage > & bs,
ConstBlockDataID cpuFieldID,
const std::string & identifier );
} // namespace cuda
} // namespace walberla
#include "AddGPUFieldToStorage.impl.h"
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file AddGPUFieldToStorage.impl.h
//! \ingroup cuda
//! \author Martin Bauer <martin.bauer@fau.de>
//
//======================================================================================================================
#pragma once
namespace walberla {
namespace cuda {
namespace internal
{
template< typename GPUField_T>
GPUField_T * createGPUField( const IBlock * const block,
const StructuredBlockStorage * const bs,
uint_t ghostLayers,
uint_t fSize,
const field::Layout & layout )
{
return new GPUField_T( bs->getNumberOfXCells( *block ),
bs->getNumberOfYCells( *block ),
bs->getNumberOfZCells( *block ),
fSize, ghostLayers, layout );
}
template< typename Field_T>
GPUField< typename Field_T::value_type> *
createGPUFieldFromCPUField( const IBlock * const block,
const StructuredBlockStorage * const,
ConstBlockDataID cpuFieldID
)
{
typedef GPUField< typename Field_T::value_type> GPUField_T;
const Field_T * f = block->getData<Field_T>( cpuFieldID );
auto gpuField = new GPUField_T( f->xSize(), f->ySize(), f->zSize(), f->fSize(),
f->nrOfGhostLayers(), f->layout() );
cuda::fieldCpy( *gpuField, *f );
return gpuField;
}
}
template< typename GPUField_T>
BlockDataID addGPUFieldToStorage(const shared_ptr< StructuredBlockStorage >& bs,
const std::string & identifier,
uint_t fSize,
const Layout layout,
uint_t nrOfGhostLayers )
{
auto func = boost::bind ( internal::createGPUField<GPUField_T>, _1, _2, nrOfGhostLayers, fSize, layout );
return bs->addStructuredBlockData< GPUField_T >( func, identifier );
}
template< typename Field_T>
BlockDataID addGPUFieldToStorage( const shared_ptr< StructuredBlockStorage > & bs,
ConstBlockDataID cpuFieldID,
const std::string & identifier )
{
auto func = boost::bind ( internal::createGPUFieldFromCPUField<Field_T>, _1, _2, cpuFieldID );
return bs->addStructuredBlockData< GPUField<typename Field_T::value_type> >( func, identifier );
}
} // namespace cuda
} // namespace walberla
###################################################################################################
#
# Module cuda
#
###################################################################################################
waLBerla_add_module( DEPENDS core domain_decomposition field stencil BUILD_ONLY_IF_FOUND CUDA )
###################################################################################################
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file ErrorChecking.h
//! \ingroup cuda
//! \author Martin Bauer <martin.bauer@fau.de>
//
//======================================================================================================================
#pragma once
#include "core/Abort.h"
#include <sstream>
#include <cuda_runtime.h>
namespace walberla {
namespace cuda {
#define WALBERLA_CUDA_CHECK(ans) { ::walberla::cuda::checkForError((ans), __FILE__, __LINE__); }
inline void checkForError( cudaError_t code, const std::string & callerPath, const int line )
{
if(code != cudaSuccess)
{
std::stringstream ss;
ss << "CUDA Error: " << cudaGetErrorString( code );
Abort::instance()->abort( ss.str(), callerPath, line );
}
}
} // namespace cuda
} // namespace walberla
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file SimpleFieldAccessor.h
//! \ingroup cuda
//! \author Martin Bauer <martin.bauer@fau.de>
//
//======================================================================================================================
#pragma once
#include <cuda_runtime.h>
#include "core/DataTypes.h"
namespace walberla {
namespace cuda {
template<typename T>
class FieldAccessor
{
public:
enum IndexingScheme { FZYX, FZY, FZ, F,
ZYXF, ZYX, ZY, Z
};
FieldAccessor( char * ptr,
uint32_t xOffset,
uint32_t yOffset,
uint32_t zOffset,
uint32_t fOffset,
IndexingScheme indexingScheme )
: ptr_(ptr), xOffset_(xOffset), yOffset_(yOffset), zOffset_(zOffset),
fOffset_(fOffset), indexingScheme_(indexingScheme )
{}
__device__ void set( uint3 blockIdx, uint3 threadIdx )
{
switch ( indexingScheme_)
{
case FZYX: ptr_