Commit ddc0a76e authored by Martin Bauer's avatar Martin Bauer

Code generation: new API

- information from CMake to codegen: double/float, OpenMP, ...
parent 83a55d9a
Pipeline #13259 canceled with stages
in 103 minutes and 15 seconds
waLBerla_link_files_to_builddir( "*.prm" )
waLBerla_python_file_generates(UniformGridGPU.py
UniformGridGPU_LatticeModel.cpp UniformGridGPU_LatticeModel.h
UniformGridGPU_LbKernel.cu UniformGridGPU_LbKernel.h
UniformGridGPU_NoSlip.cu UniformGridGPU_NoSlip.h
UniformGridGPU_UBB.cu UniformGridGPU_UBB.h
UniformGridGPU_PackInfo.cu UniformGridGPU_PackInfo.h
)
waLBerla_add_executable ( NAME UniformGridBenchmarkGPU
FILES UniformGridGPU.cpp UniformGridGPU_LatticeModel.cpp
UniformGridGPU_LbKernel.cu UniformGridGPU_NoSlip.cu UniformGridGPU_UBB.cu
UniformGridGPU_PackInfo.cu
FILES UniformGridGPU.cpp UniformGridGPU.py
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk )
import sympy as sp
from lbmpy_walberla import generate_lattice_model_files
from lbmpy.creationfunctions import create_lb_update_rule
from pystencils_walberla.sweep import Sweep
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.creationfunctions import create_lb_method
from lbmpy_walberla.boundary import create_boundary_class
from pystencils_walberla.cmake_integration import codegen
dtype = 'float64'
# LB options
options = {
'method': 'srt',
'stencil': 'D3Q19',
'relaxation_rate': sp.Symbol("omega"),
'field_name': 'pdfs',
'compressible': False,
'temporary_field_name': 'pdfs_tmp',
'optimization': {'cse_global': True,
'cse_pdfs': True,
'double_precision': dtype == 'float64'}
}
# GPU optimization options
inner_opt = {'gpu_indexing_params': {'block_size': (128, 1, 1)}, 'data_type': dtype}
outer_opt = {'gpu_indexing_params': {'block_size': (32, 32, 32)}, 'data_type': dtype}
def lb_assignments():
ur = create_lb_update_rule(**options)
return ur.all_assignments
def genBoundary():
boundary = UBB([0.05, 0, 0], dim=3, name="UniformGridGPU_UBB")
return create_boundary_class(boundary, create_lb_method(**options), target='gpu')
def genNoSlip():
boundary = NoSlip(name='UniformGridGPU_NoSlip')
return create_boundary_class(boundary, create_lb_method(**options), target='gpu')
generate_lattice_model_files(class_name='UniformGridGPU_LatticeModel', **options)
Sweep.generate_inner_outer_kernel('UniformGridGPU_LbKernel',
lambda: create_lb_update_rule(**options).all_assignments,
target='gpu',
temporary_fields=['pdfs_tmp'],
field_swaps=[('pdfs', 'pdfs_tmp')],
optimization=inner_opt,
outer_optimization=outer_opt)
Sweep.generate_pack_info('UniformGridGPU_PackInfo', lb_assignments, target='gpu')
codegen.register(['UniformGridGPU_UBB.h', 'UniformGridGPU_UBB.cu'], genBoundary)
codegen.register(['UniformGridGPU_NoSlip.h', 'UniformGridGPU_NoSlip.cu'], genNoSlip)
import sympy as sp
from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule
from lbmpy.boundaries import NoSlip, UBB
from pystencils_walberla import generate_pack_info_from_kernel
from lbmpy_walberla import generate_lattice_model, generate_boundary
from pystencils_walberla import CodeGeneration, generate_sweep
with CodeGeneration() as ctx:
# LB options
options = {
'method': 'srt',
'stencil': 'D3Q19',
'relaxation_rate': sp.Symbol("omega"),
'field_name': 'pdfs',
'compressible': False,
'temporary_field_name': 'pdfs_tmp',
'optimization': {'cse_global': True,
'cse_pdfs': True,
'gpu_indexing_params': {'block_size': (128, 1, 1)}}
}
lb_method = create_lb_method(**options)
update_rule = create_lb_update_rule(lb_method=lb_method, **options)
# CPU lattice model - required for macroscopic value computation, VTK output etc.
generate_lattice_model(ctx, 'UniformGridGPU_LatticeModel', lb_method)
# gpu LB sweep & boundaries
generate_sweep(ctx, 'UniformGridGPU_LbKernel', update_rule, field_swaps=[('pdfs', 'pdfs_tmp')],
inner_outer_split=True, target='gpu')
generate_boundary(ctx, 'UniformGridGPU_NoSlip', NoSlip(), lb_method, target='gpu')
generate_boundary(ctx, 'UniformGridGPU_UBB', UBB([0.05, 0, 0]), lb_method, target='gpu')
# communication
generate_pack_info_from_kernel(ctx, 'UniformGridGPU_PackInfo', update_rule, target='gpu')
Parameters
{
omega 1.8;
timesteps 2;
remainingTimeLoggerFrequency 3;
vtkWriteFrequency 0;
overlapCommunication false;
cudaEnabledMPI false;
}
DomainSetup
{
blocks < 1, 1, 1 >;
cellsPerBlock < 50, 20, 10 >;
periodic < 0, 0, 1 >;
}
Boundaries
{
Border { direction W; walldistance -1; flag NoSlip; }
Border { direction E; walldistance -1; flag NoSlip; }
Border { direction S; walldistance -1; flag NoSlip; }
Border { direction N; walldistance -1; flag UBB; }
}
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \\file UniformGridGPU_LbKernel.h
//! \\author pystencils
//======================================================================================================================
#include "core/DataTypes.h"
#include "cuda/GPUField.h"
#include "cuda/ParallelStreams.h"
#include "field/SwapableCompare.h"
#include "domain_decomposition/BlockDataID.h"
#include "domain_decomposition/IBlock.h"
#include <set>
#ifdef __GNUC__
#define RESTRICT __restrict__
#elif _MSC_VER
#define RESTRICT __restrict
#else
#define RESTRICT
#endif
#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wunused-parameter"
#endif
namespace walberla {
namespace pystencils {
class UniformGridGPU_LbKernel
{
public:
UniformGridGPU_LbKernel( BlockDataID pdfsID_, double omega_)
: pdfsID(pdfsID_), omega(omega_)
{};
~UniformGridGPU_LbKernel() {
for(auto p: cache_pdfs_) {
delete p;
}
}
void operator() ( IBlock * block , cudaStream_t stream = 0 );
void inner( IBlock * block , cudaStream_t stream = 0 );
void outer( IBlock * block , cudaStream_t stream = 0 );
void setOuterPriority(int priority ) {
parallelStreams_.setStreamPriority(priority);
}
private:
BlockDataID pdfsID;
double omega;
std::set< cuda::GPUField<double> *, field::SwapableCompare< cuda::GPUField<double> * > > cache_pdfs_;
cuda::ParallelStreams parallelStreams_;
};
} // namespace pystencils
} // namespace walberla
#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
# pragma GCC diagnostic pop
#endif
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \\file UniformGridGPU_NoSlip.cpp
//! \\ingroup lbm
//! \\author lbmpy
//======================================================================================================================
#include <cmath>
#include "core/DataTypes.h"
#include "core/Macros.h"
#include "UniformGridGPU_NoSlip.h"
#include "cuda/ErrorChecking.h"
#define FUNC_PREFIX __global__
using namespace std;
namespace walberla {
namespace lbm {
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
#pragma GCC diagnostic ignored "-Wunused-variable"
#pragma GCC diagnostic ignored "-Wconversion"
#endif
#ifdef __CUDACC__
#pragma push
#pragma diag_suppress = declared_but_not_referenced
#endif
namespace internal_boundary_UniformGridGPU_NoSlip {
static FUNC_PREFIX void boundary_UniformGridGPU_NoSlip(uint8_t * const _data_indexVector, double * _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t indexVectorSize)
{
if (blockDim.x*blockIdx.x + threadIdx.x < indexVectorSize)
{
uint8_t * const _data_indexVector_10 = _data_indexVector;
const int32_t x = *((int32_t *)(& _data_indexVector_10[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
uint8_t * const _data_indexVector_14 = _data_indexVector + 4;
const int32_t y = *((int32_t *)(& _data_indexVector_14[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
uint8_t * const _data_indexVector_18 = _data_indexVector + 8;
const int32_t z = *((int32_t *)(& _data_indexVector_18[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
const int64_t cx [] = { 0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1 };
const int64_t cy [] = { 0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0 };
const int64_t cz [] = { 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1 };
const int invdir [] = { 0, 2, 1, 4, 3, 6, 5, 10, 9, 8, 7, 16, 15, 18, 17, 12, 11, 14, 13 };
const double weights [] = { 0.333333333333333,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778 };
uint8_t * const _data_indexVector_112 = _data_indexVector + 12;
const int32_t dir = *((int32_t *)(& _data_indexVector_112[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
double * _data_pdfsf9cc34cc4e2b6261 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*cy[dir] + _stride_pdfs_2*z + _stride_pdfs_2*cz[dir] + _stride_pdfs_3*invdir[dir];
double * _data_pdfs_10_2011ac6bf6446d4afa = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir;
_data_pdfsf9cc34cc4e2b6261[_stride_pdfs_0*x + _stride_pdfs_0*cx[dir]] = _data_pdfs_10_2011ac6bf6446d4afa[_stride_pdfs_0*x];
}
}
}
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
#ifdef __CUDACC__
#pragma pop
#endif
void UniformGridGPU_NoSlip::run( IBlock * block, IndexVectors::Type type , cudaStream_t stream )
{
auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
auto pointer = indexVectors->pointerGpu(type);
int64_t indexVectorSize = int64_c( indexVectors->indexVector(type).size() );
if( indexVectorSize == 0)
return;
uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
auto pdfs = block->getData< cuda::GPUField<double> >(pdfsID);
WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()));
double * _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
dim3 _block(int(((256 < indexVectorSize) ? 256 : indexVectorSize)), int(1), int(1));
dim3 _grid(int(( (indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ( (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) ) +1 )), int(1), int(1));
internal_boundary_UniformGridGPU_NoSlip::boundary_UniformGridGPU_NoSlip<<<_grid, _block, 0, stream>>>(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
}
void UniformGridGPU_NoSlip::operator() ( IBlock * block, cudaStream_t stream )
{
run( block, IndexVectors::ALL, stream );
}
void UniformGridGPU_NoSlip::inner( IBlock * block, cudaStream_t stream )
{
run( block, IndexVectors::INNER, stream );
}
void UniformGridGPU_NoSlip::outer( IBlock * block, cudaStream_t stream )
{
run( block, IndexVectors::OUTER, stream );
}
} // namespace lbm
} // namespace walberla
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
#include "stencil/Directions.h"
#include "core/cell/CellInterval.h"
#include "cuda/GPUField.h"
#include "core/DataTypes.h"
#include "domain_decomposition/IBlock.h"
#include "cuda/communication/GeneratedGPUPackInfo.h"
#define FUNC_PREFIX __global__
namespace walberla {
namespace pystencils {
class UniformGridGPU_PackInfo : public ::walberla::cuda::GeneratedGPUPackInfo
{
public:
UniformGridGPU_PackInfo( BlockDataID pdfsID_ )
: pdfsID(pdfsID_)
{};
virtual ~UniformGridGPU_PackInfo() {}
virtual void pack (stencil::Direction dir, unsigned char * buffer, IBlock * block, cudaStream_t stream);
virtual void unpack(stencil::Direction dir, unsigned char * buffer, IBlock * block, cudaStream_t stream);
virtual uint_t size (stencil::Direction dir, IBlock * block);
private:
BlockDataID pdfsID;
};
} // namespace pystencils
} // namespace walberla
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \\file UniformGridGPU_UBB.cpp
//! \\ingroup lbm
//! \\author lbmpy
//======================================================================================================================
#include <cmath>
#include "core/DataTypes.h"
#include "core/Macros.h"
#include "UniformGridGPU_UBB.h"
#include "cuda/ErrorChecking.h"
#define FUNC_PREFIX __global__
using namespace std;
namespace walberla {
namespace lbm {
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
#pragma GCC diagnostic ignored "-Wunused-variable"
#pragma GCC diagnostic ignored "-Wconversion"
#endif
#ifdef __CUDACC__
#pragma push
#pragma diag_suppress = declared_but_not_referenced
#endif
namespace internal_boundary_UniformGridGPU_UBB {
static FUNC_PREFIX void boundary_UniformGridGPU_UBB(uint8_t * const _data_indexVector, double * _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t indexVectorSize)
{
if (blockDim.x*blockIdx.x + threadIdx.x < indexVectorSize)
{
uint8_t * const _data_indexVector_10 = _data_indexVector;
const int32_t x = *((int32_t *)(& _data_indexVector_10[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
uint8_t * const _data_indexVector_14 = _data_indexVector + 4;
const int32_t y = *((int32_t *)(& _data_indexVector_14[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
uint8_t * const _data_indexVector_18 = _data_indexVector + 8;
const int32_t z = *((int32_t *)(& _data_indexVector_18[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
const int64_t cx [] = { 0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1 };
const int64_t cy [] = { 0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0 };
const int64_t cz [] = { 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1 };
const int invdir [] = { 0, 2, 1, 4, 3, 6, 5, 10, 9, 8, 7, 16, 15, 18, 17, 12, 11, 14, 13 };
const double weights [] = { 0.333333333333333,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778 };
uint8_t * const _data_indexVector_112 = _data_indexVector + 12;
const int32_t dir = *((int32_t *)(& _data_indexVector_112[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
double * _data_pdfsf9cc34cc4e2b6261 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*cy[dir] + _stride_pdfs_2*z + _stride_pdfs_2*cz[dir] + _stride_pdfs_3*invdir[dir];
double * _data_pdfs_10_2011ac6bf6446d4afa = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir;
_data_pdfsf9cc34cc4e2b6261[_stride_pdfs_0*x + _stride_pdfs_0*cx[dir]] = -0.30000000000000004*cx[dir]*weights[dir] + _data_pdfs_10_2011ac6bf6446d4afa[_stride_pdfs_0*x];
}
}
}
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
#ifdef __CUDACC__
#pragma pop
#endif
void UniformGridGPU_UBB::run( IBlock * block, IndexVectors::Type type , cudaStream_t stream )
{
auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
auto pointer = indexVectors->pointerGpu(type);
int64_t indexVectorSize = int64_c( indexVectors->indexVector(type).size() );
if( indexVectorSize == 0)
return;
uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
auto pdfs = block->getData< cuda::GPUField<double> >(pdfsID);
WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()));
double * _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
dim3 _block(int(((256 < indexVectorSize) ? 256 : indexVectorSize)), int(1), int(1));
dim3 _grid(int(( (indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ( (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) ) +1 )), int(1), int(1));
internal_boundary_UniformGridGPU_UBB::boundary_UniformGridGPU_UBB<<<_grid, _block, 0, stream>>>(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
}
void UniformGridGPU_UBB::operator() ( IBlock * block, cudaStream_t stream )
{
run( block, IndexVectors::ALL, stream );
}
void UniformGridGPU_UBB::inner( IBlock * block, cudaStream_t stream )
{
run( block, IndexVectors::INNER, stream );
}
void UniformGridGPU_UBB::outer( IBlock * block, cudaStream_t stream )
{
run( block, IndexVectors::OUTER, stream );
}
} // namespace lbm
} // namespace walberla
This diff is collapsed.
......@@ -245,6 +245,23 @@ endfunction ( waLBerla_add_executable )
#######################################################################################################################
#
# Function to tell CMake which C/C++/CUDA files are generated by a python file
#
# Example:
# waLBerla_python_file_generates(MyPythonCodeGenScript.py Sweep1.cpp Sweep1.h Sweep2.h Sweep2.cu)
#
#
#######################################################################################################################
function( waLBerla_python_file_generates pythonFile )
get_filename_component(pythonFileAbsolutePath ${pythonFile} ABSOLUTE)
set( "WALBERLA_CODEGEN_INFO_${pythonFileAbsolutePath}" ${ARGN}
CACHE INTERNAL "Files generated by python script ${pythonFile}" FORCE)
endfunction(waLBerla_python_file_generates)
#######################################################################################################################
#
# Adds a waLBerla module test executable.
......
......@@ -40,11 +40,10 @@ function( handle_python_codegen sourceFilesOut generatedSourceFilesOut generator
if( ${sourceFile} MATCHES ".*\\.py$" )
set(codeGenRequired YES)
if( WALBERLA_BUILD_WITH_CODEGEN)
execute_process(COMMAND ${PYTHON_EXECUTABLE} ${sourceFile} -l
OUTPUT_VARIABLE generatedSourceFiles)
string(REGEX REPLACE "\n$" "" generatedSourceFiles "${generatedSourceFiles}")
get_filename_component(pythonFileAbsolutePath ${sourceFile} ABSOLUTE )
set( generatedSourceFiles ${WALBERLA_CODEGEN_INFO_${pythonFileAbsolutePath}} )
set(generatedWithAbsolutePath )
set( generatedWithAbsolutePath )
foreach( filename ${generatedSourceFiles} )
list(APPEND generatedWithAbsolutePath ${CMAKE_CURRENT_BINARY_DIR}/${filename})
endforeach()
......@@ -52,9 +51,19 @@ function( handle_python_codegen sourceFilesOut generatedSourceFilesOut generator
list(APPEND generatedResult ${generatedWithAbsolutePath} )
list(APPEND generatorsResult ${sourceFile} )
string (REPLACE ";" "\", \"" jsonFileList "${generatedWithAbsolutePath}" )
set(pythonParameters
"{\"EXPECTED_FILES\": [\"${jsonFileList}\"], \"CMAKE_VARS\" : { "
"\"WALBERLA_OPTIMIZE_FOR_LOCALHOST\": \"${WALBERLA_OPTIMIZE_FOR_LOCALHOST}\","
"\"WALBERLA_DOUBLE_ACCURACY\": \"${WALBERLA_DOUBLE_ACCURACY}\","
"\"WALBERLA_BUILD_WITH_MPI\": \"${WALBERLA_BUILD_WITH_MPI}\","
"\"WALBERLA_BUILD_WITH_OPENMP\": \"${WALBERLA_BUILD_WITH_OPENMP}\" } }"
)
string(REPLACE "\"" "\\\"" pythonParameters ${pythonParameters}) # even one more quoting level required
string(REPLACE "\n" "" pythonParameters ${pythonParameters}) # remove newline characters
add_custom_command(OUTPUT ${generatedWithAbsolutePath}
DEPENDS ${sourceFile}
COMMAND ${PYTHON_EXECUTABLE} ${sourceFile} -g
COMMAND ${PYTHON_EXECUTABLE} ${sourceFile} ${pythonParameters}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
endif()
......
......@@ -19,6 +19,10 @@ waLBerla_execute_test( NAME SimpleKernelTest )
waLBerla_compile_test( FILES FieldIndexing3DTest.cpp FieldIndexing3DTest.cu )
waLBerla_execute_test( NAME FieldIndexing3DTest )
waLBerla_python_file_generates(codegen/CudaJacobiKernel.py
CudaJacobiKernel2D.cu CudaJacobiKernel2D.h
CudaJacobiKernel3D.cu CudaJacobiKernel3D.h)
waLBerla_compile_test( FILES codegen/CodegenJacobiGPU.cpp
codegen/CudaJacobiKernel.py
DEPENDS blockforest timeloop gui )
......@@ -34,8 +38,8 @@ waLBerla_compile_test( FILES CudaMPI DEPENDS blockforest timeloop gui )
waLBerla_compile_test( FILES AlignmentTest.cpp DEPENDS blockforest timeloop )
waLBerla_compile_test( FILES codegen/MicroBenchmarkGpuLbm.cpp codegen/MicroBenchmarkGpuLbm.py)
waLBerla_add_executable ( NAME CpuGpuGeneratedEquivalenceTest
FILES codegen/EquivalenceTest.cpp codegen/EquivalenceTest.gen.py
DEPENDS blockforest boundary core cuda field stencil timeloop vtk gui )
waLBerla_python_file_generates(codegen/MicroBenchmarkGpuLbm.py
MicroBenchmarkStreamKernel.cu MicroBenchmarkStreamKernel.h
MicroBenchmarkCopyKernel.cu MicroBenchmarkCopyKernel.h)
waLBerla_compile_test( FILES codegen/MicroBenchmarkGpuLbm.cpp codegen/MicroBenchmarkGpuLbm.py)
from pystencils_walberla.sweep import Sweep
import sympy as sp
import pystencils as ps
from pystencils_walberla import CodeGeneration, generate_sweep
def jacobi2D(sweep):
src = sweep.field("f1")
dst = sweep.temporary_field(src)
with CodeGeneration() as ctx:
h = sp.symbols("h")
dst[0, 0] @= (src[1, 0] + src[-1, 0] + src[0, 1] + src[0, -1]) / (4 * sweep.constant("h") ** 2)
# ----- Jacobi 2D - created by specifying weights in nested list --------------------------
src, dst = ps.fields("src, src_tmp: [2D]")
stencil = [[0, -1, 0],
[-1, 4, -1],
[0, -1, 0]]
assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=4 * h**2)
generate_sweep(ctx, 'CudaJacobiKernel2D', assignments, field_swaps=[(src, dst)], target="gpu")
# ----- Jacobi 3D - created by using kernel_decorator with assignments in '@=' format -----
src, dst = ps.fields("src, src_tmp: [3D]")
def jacobi3D(sweep):
src = sweep.field("f1")
dst = sweep.temporary_field(src)
@ps.kernel
def kernel_func():
dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] +
src[0, 1, 0] + src[0, -1, 0] +
src[0, 0, 1] + src[0, 0, -1]) / (6 * h ** 2)
dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] + src[0, 1, 0] + src[0, -1, 0] + src[0, 0, 1] + src[0, 0, -1]) / \
(6 * sweep.constant("h") ** 2)
Sweep.generate('CudaJacobiKernel2D', jacobi2D, dim=2, target='gpu')
Sweep.generate('CudaJacobiKernel3D', jacobi3D, dim=3, target='gpu')
generate_sweep(ctx, 'CudaJacobiKernel3D', kernel_func, field_swaps=[(src, dst)], target="gpu")
#include "core/Environment.h"
#include "python_coupling/CreateConfig.h"
#include "blockforest/Initialization.h"
#include "lbm/field/PdfField.h"
#include "lbm/field/AddToStorage.h"
#include "field/FlagField.h"
#include "field/AddToStorage.h"
#include "lbm/communication/PdfFieldPackInfo.h"
#include "lbm/PerformanceLogger.h"
#include "blockforest/communication/UniformBufferedScheme.h"