diff --git a/cmake/waLBerlaHelperFunctions.cmake b/cmake/waLBerlaHelperFunctions.cmake index df759b16241646ceb8b056866d31aa175982d0d4..02ef2aa007aace8de1aca86928cb819a21a905cf 100644 --- a/cmake/waLBerlaHelperFunctions.cmake +++ b/cmake/waLBerlaHelperFunctions.cmake @@ -33,20 +33,20 @@ function( handle_python_codegen sourceFilesOut codeGenRequiredOut ) set(codeGenRequired NO) foreach( sourceFile ${ARGN} ) if( ${sourceFile} MATCHES ".*\\.gen\\.py$" ) - get_filename_component(sourceFile ${sourceFile} NAME) - if( ${sourceFile} MATCHES ".*\\.cuda\\.gen\\.py$" ) - string(REPLACE ".cuda.gen.py" ".h" genHeaderFile ${sourceFile}) - string(REPLACE ".cuda.gen.py" ".cu" genSourceFile ${sourceFile}) + get_filename_component(sourceFileName ${sourceFile} NAME) + if( ${sourceFileName} MATCHES ".*\\.cuda\\.gen\\.py$" ) + string(REPLACE ".cuda.gen.py" ".h" genHeaderFile ${sourceFileName}) + string(REPLACE ".cuda.gen.py" ".cu" genSourceFile ${sourceFileName}) else() - string(REPLACE ".gen.py" ".h" genHeaderFile ${sourceFile}) - string(REPLACE ".gen.py" ".cpp" genSourceFile ${sourceFile}) + string(REPLACE ".gen.py" ".h" genHeaderFile ${sourceFileName}) + string(REPLACE ".gen.py" ".cpp" genSourceFile ${sourceFileName}) endif() list(APPEND result ${CMAKE_CURRENT_BINARY_DIR}/${genSourceFile} ${CMAKE_CURRENT_BINARY_DIR}/${genHeaderFile}) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${genSourceFile} ${CMAKE_CURRENT_BINARY_DIR}/${genHeaderFile} - DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile} - COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile} + DEPENDS ${sourceFile} + COMMAND ${PYTHON_EXECUTABLE} ${sourceFile} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) include_directories(${CMAKE_CURRENT_BINARY_DIR}) set(codeGenRequired YES) diff --git a/cmake/waLBerlaModuleDependencySystem.cmake b/cmake/waLBerlaModuleDependencySystem.cmake index 07b547551eb2be4d1d4ede08bffaf3d7d2f3f2af..45f7dbdc7f18e72de73c17e544867bb16590b3ad 100644 --- a/cmake/waLBerlaModuleDependencySystem.cmake +++ b/cmake/waLBerlaModuleDependencySystem.cmake @@ -7,11 +7,11 @@ # Here is an explanation of the waLBerla module mechanism: # - One folder with a CMakeLists.txt that is a subfolder of one of the directories listed in the variable # WALBERLA_MODULE_DIRS can be a module -# - the name of the module is the path relative to an WALBERLA_MODULE_DIRS entry +# - the name of the module is the path relative to a WALBERLA_MODULE_DIRS entry # - waLBerla modules are all placed in the src/ subdirectory, so WALBERLA_MODULE_DIRS contains ${waLBerla_SOURCE}/src/ # - to create a module call waLBerla_module() inside this folder -# - this creates a static library that has the same name as the module, but slashes are replaced by minuses -# in case the module contains only header files no static lib is generated, only a custom target is added +# - this creates a static library that has the same name as the module, but slashes are replaced by minuses. +# In case the module contains only header files no static lib is generated, only a custom target is added # to display the module in Visual Studio. # - waLBerla_module takes a list of dependent modules. A second list of dependencies is generated by parsing # all files in the module for corresponding "#include" lines. This mechanism is not a complete preprocessor diff --git a/tests/cuda/CMakeLists.txt b/tests/cuda/CMakeLists.txt index 62e1c45c501d58967725551e5c39bf7b8086fb9e..364b8bbea1b3980955955b22d3795ffc4048a2f9 100644 --- a/tests/cuda/CMakeLists.txt +++ b/tests/cuda/CMakeLists.txt @@ -16,6 +16,11 @@ waLBerla_execute_test( NAME SimpleKernelTest ) waLBerla_compile_test( FILES FieldIndexing3DTest.cpp FieldIndexing3DTest.cu ) waLBerla_execute_test( NAME FieldIndexing3DTest ) +waLBerla_compile_test( FILES codegen/CodegenJacobiGPU.cpp + codegen/JacobiKernel2D.cuda.gen.py + codegen/JacobiKernel3D.cuda.gen.py + DEPENDS blockforest timeloop gui ) +waLBerla_execute_test( NAME CodegenJacobiGPU ) # The following tests work only for CUDA enabled MPI diff --git a/tests/cuda/codegen/CodegenJacobiGPU.cpp b/tests/cuda/codegen/CodegenJacobiGPU.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f81ecf1a6c2a2c91481b5203b3ecad99489865b8 --- /dev/null +++ b/tests/cuda/codegen/CodegenJacobiGPU.cpp @@ -0,0 +1,189 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file JacobiGpu.cpp +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#include "JacobiKernel2D.h" +#include "JacobiKernel3D.h" + +#include "cuda/HostFieldAllocator.h" +#include "blockforest/Initialization.h" +#include "blockforest/communication/UniformDirectScheme.h" +#include "blockforest/communication/UniformBufferedScheme.h" + +#include "core/Environment.h" +#include "core/debug/TestSubsystem.h" + +#include "cuda/HostFieldAllocator.h" +#include "cuda/FieldCopy.h" +#include "cuda/GPUField.h" +#include "cuda/Kernel.h" +#include "cuda/AddGPUFieldToStorage.h" +#include "cuda/communication/GPUPackInfo.h" +#include "cuda/FieldIndexing.h" + +#include "field/AddToStorage.h" +#include "field/communication/UniformMPIDatatypeInfo.h" +#include "field/vtk/VTKWriter.h" + +#include "geometry/initializer/ScalarFieldFromGrayScaleImage.h" + +#include "gui/Gui.h" + +#include "stencil/D2Q9.h" +#include "stencil/D3Q7.h" + +#include "timeloop/SweepTimeloop.h" + + +using namespace walberla; + +typedef GhostLayerField<double,1> ScalarField; +typedef cuda::GPUField<double> GPUField; + + +ScalarField * createField( IBlock* const block, StructuredBlockStorage* const storage ) +{ + return new ScalarField ( + storage->getNumberOfXCells( *block ), // number of cells in x direction per block + storage->getNumberOfYCells( *block ), // number of cells in y direction per block + storage->getNumberOfZCells( *block ), // number of cells in z direction per block + 1, // one ghost layer + double(0), // initial value + field::fzyx, // layout + make_shared<cuda::HostFieldAllocator<double> >() // allocator for host pinned memory + ); +} + +void testJacobi2D() +{ + uint_t xSize = 20; + uint_t ySize = 20; + + // Create blocks + shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid ( + uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction + xSize, ySize, uint_t(1), // how many cells per block (x,y,z) + real_t(1), // dx: length of one cell in physical coordinates + false, // one block per process - "false" means all blocks to one process + true, true, true ); // no periodicity + + + BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" ); + BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" ); + + + for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) + { + auto f = blockIt->getData<ScalarField>( cpuFieldID ); + for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y ) + for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x ) + f->get( x, y, 0 ) = 1.0; + } + + + + typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme; + typedef cuda::communication::GPUPackInfo<GPUField> Packing; + + CommScheme commScheme(blocks); + commScheme.addDataToCommunicate( make_shared<Packing>(gpuField) ); + + // Create Timeloop + const uint_t numberOfTimesteps = uint_t(800); + SweepTimeloop timeloop ( blocks, numberOfTimesteps ); + + // Registering the sweep + timeloop.add() << BeforeFunction( commScheme, "Communication" ) + << Sweep( pystencils::JacobiKernel2D(gpuField, 1.0), "Jacobi Kernel" ); + + + cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID ); + timeloop.run(); + cuda::fieldCpy<ScalarField, GPUField>( blocks, cpuFieldID, gpuField ); + + auto firstBlock = blocks->begin(); + auto f = firstBlock->getData<ScalarField>( cpuFieldID ); + WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 4.0)); +} + + +void testJacobi3D() +{ + uint_t xSize = 12; + uint_t ySize = 12; + uint_t zSize = 12; + + // Create blocks + shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid ( + uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction + xSize, ySize, zSize, // how many cells per block (x,y,z) + real_t(1), // dx: length of one cell in physical coordinates + false, // one block per process - "false" means all blocks to one process + true, true, true ); // no periodicity + + + BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" ); + BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" ); + + + for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) + { + auto f = blockIt->getData<ScalarField>( cpuFieldID ); + for( cell_idx_t z = 0; z < cell_idx_c( f->zSize() / 2 ); ++z ) + for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y ) + for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x ) + f->get( x, y, z ) = 1.0; + } + + + + typedef blockforest::communication::UniformBufferedScheme<stencil::D3Q7> CommScheme; + typedef cuda::communication::GPUPackInfo<GPUField> Packing; + + CommScheme commScheme(blocks); + commScheme.addDataToCommunicate( make_shared<Packing>(gpuField) ); + + // Create Timeloop + const uint_t numberOfTimesteps = uint_t(800); + SweepTimeloop timeloop ( blocks, numberOfTimesteps ); + + // Registering the sweep + timeloop.add() << BeforeFunction( commScheme, "Communication" ) + << Sweep( pystencils::JacobiKernel3D(gpuField, 1.0), "Jacobi Kernel" ); + + + cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID ); + timeloop.run(); + cuda::fieldCpy<ScalarField, GPUField>( blocks, cpuFieldID, gpuField ); + + auto firstBlock = blocks->begin(); + auto f = firstBlock->getData<ScalarField>( cpuFieldID ); + WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 8.0)); +} + +int main( int argc, char ** argv ) +{ + mpi::Environment env( argc, argv ); + debug::enterTestMode(); + + testJacobi2D(); + testJacobi3D(); + + return 0; +} diff --git a/tests/cuda/codegen/JacobiKernel2D.cuda.gen.py b/tests/cuda/codegen/JacobiKernel2D.cuda.gen.py new file mode 100644 index 0000000000000000000000000000000000000000..ca6fca69cd5702bb2ec856b312f4e52890f1ec64 --- /dev/null +++ b/tests/cuda/codegen/JacobiKernel2D.cuda.gen.py @@ -0,0 +1,12 @@ +from pystencils_walberla import Sweep + +k = Sweep(dim=2) + +src = k.field("f1") +dst = k.temporaryField(src) +h = k.constant("h") + +rhs = (src[1,0] + src[-1,0] + src[0,1] + src[0, -1] ) / (4 * h**2) +k.addEq(dst[0,0], rhs) + +k.generate() diff --git a/tests/cuda/codegen/JacobiKernel3D.cuda.gen.py b/tests/cuda/codegen/JacobiKernel3D.cuda.gen.py new file mode 100644 index 0000000000000000000000000000000000000000..32ac6d17eff7cd85a103c38383e000f678ca04d9 --- /dev/null +++ b/tests/cuda/codegen/JacobiKernel3D.cuda.gen.py @@ -0,0 +1,12 @@ +from pystencils_walberla import Sweep + +k = Sweep(dim=3) + +src = k.field("f1") +dst = k.temporaryField(src) +h = k.constant("h") + +rhs = (src[1,0,0] + src[-1,0,0] + src[0,1,0] + src[0, -1, 0] + src[0, 0, 1] + src[0, 0 , -1] ) / (6 * h**2) +k.addEq(dst[0,0,0], rhs) + +k.generate() diff --git a/tests/field/CMakeLists.txt b/tests/field/CMakeLists.txt index 5426bc964844211226ba076b73768ec81379f690..4307644a0dcbbdd7d803f6a91d91b8e02e1c869f 100644 --- a/tests/field/CMakeLists.txt +++ b/tests/field/CMakeLists.txt @@ -50,3 +50,11 @@ if( WALBERLA_BUILD_WITH_MPI ) endif( WALBERLA_BUILD_WITH_MPI ) + +# CodeGen Tests + +waLBerla_compile_test( FILES codegen/CodegenJacobiCPU.cpp codegen/JacobiKernel2D.gen.py codegen/JacobiKernel3D.gen.py + DEPENDS gui timeloop ) +waLBerla_execute_test( NAME CodegenJacobiCPU ) + + diff --git a/tests/field/codegen/CodegenJacobiCPU.cpp b/tests/field/codegen/CodegenJacobiCPU.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2023baeb9598fa2e27150adabca8832b19ff75ff --- /dev/null +++ b/tests/field/codegen/CodegenJacobiCPU.cpp @@ -0,0 +1,149 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file CodegenJacobiGPU.cpp +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#include "JacobiKernel2D.h" +#include "JacobiKernel3D.h" +#include "blockforest/Initialization.h" +#include "blockforest/communication/UniformDirectScheme.h" +#include "blockforest/communication/UniformBufferedScheme.h" + +#include "core/Environment.h" +#include "core/debug/TestSubsystem.h" + +#include "field/AddToStorage.h" +#include "field/communication/PackInfo.h" + +#include "gui/Gui.h" + +#include "stencil/D2Q9.h" +#include "stencil/D3Q7.h" + +#include "timeloop/SweepTimeloop.h" + + +using namespace walberla; + +typedef GhostLayerField<double,1> ScalarField; + + +void testJacobi2D() +{ + uint_t xSize = 20; + uint_t ySize = 20; + // Create blocks + shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid ( + uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction + xSize, ySize, uint_t(1), // how many cells per block (x,y,z) + real_t(1), // dx: length of one cell in physical coordinates + false, // one block per process - "false" means all blocks to one process + true, true, true ); // no periodicity + + + BlockDataID fieldID = field::addToStorage<ScalarField>(blocks, "Field", real_t(0.0)); + + // Initialize a quarter of the field with ones, the rest remains 0 + // Jacobi averages the domain -> every cell should be at 0.25 at sufficiently many timesteps + for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) + { + auto f = blockIt->getData<ScalarField>( fieldID ); + for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y ) + for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x ) + f->get( x, y, 0 ) = 1.0; + } + + typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme; + typedef field::communication::PackInfo<ScalarField> Packing; + CommScheme commScheme(blocks); + commScheme.addDataToCommunicate( make_shared<Packing>(fieldID) ); + + // Create Timeloop + const uint_t numberOfTimesteps = uint_t(800); + SweepTimeloop timeloop ( blocks, numberOfTimesteps ); + + // Registering the sweep + timeloop.add() << BeforeFunction( commScheme, "Communication" ) + << Sweep( pystencils::JacobiKernel2D(fieldID, 1.0), "Jacobi Kernel" ); + + timeloop.run(); + + auto firstBlock = blocks->begin(); + auto f = firstBlock->getData<ScalarField>( fieldID ); + WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 4.0)); +} + + +void testJacobi3D() +{ + uint_t xSize = 12; + uint_t ySize = 12; + uint_t zSize = 12; + // Create blocks + shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid ( + uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction + xSize, ySize, zSize, // how many cells per block (x,y,z) + real_t(1), // dx: length of one cell in physical coordinates + false, // one block per process - "false" means all blocks to one process + true, true, true ); // no periodicity + + + BlockDataID fieldID = field::addToStorage<ScalarField>(blocks, "Field", real_t(0.0)); + + // Initialize a quarter of the field with ones, the rest remains 0 + // Jacobi averages the domain -> every cell should be at 0.25 at sufficiently many timesteps + for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) + { + auto f = blockIt->getData<ScalarField>( fieldID ); + for( cell_idx_t z = 0; z < cell_idx_c( f->zSize() / 2); ++z ) + for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y ) + for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x ) + f->get( x, y, z ) = 1.0; + } + + typedef blockforest::communication::UniformBufferedScheme<stencil::D3Q7> CommScheme; + typedef field::communication::PackInfo<ScalarField> Packing; + CommScheme commScheme(blocks); + commScheme.addDataToCommunicate( make_shared<Packing>(fieldID) ); + + // Create Timeloop + const uint_t numberOfTimesteps = uint_t(800); // number of timesteps for non-gui runs + SweepTimeloop timeloop ( blocks, numberOfTimesteps ); + + // Registering the sweep + timeloop.add() << BeforeFunction( commScheme, "Communication" ) + << Sweep( pystencils::JacobiKernel3D(fieldID, 1.0), "Jacobi Kernel" ); + + timeloop.run(); + + auto firstBlock = blocks->begin(); + auto f = firstBlock->getData<ScalarField>( fieldID ); + WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 8.0)); +} + + +int main( int argc, char ** argv ) +{ + mpi::Environment env( argc, argv ); + debug::enterTestMode(); + + testJacobi2D(); + testJacobi3D(); + + return 0; +} diff --git a/tests/field/codegen/JacobiKernel2D.gen.py b/tests/field/codegen/JacobiKernel2D.gen.py new file mode 100644 index 0000000000000000000000000000000000000000..ca6fca69cd5702bb2ec856b312f4e52890f1ec64 --- /dev/null +++ b/tests/field/codegen/JacobiKernel2D.gen.py @@ -0,0 +1,12 @@ +from pystencils_walberla import Sweep + +k = Sweep(dim=2) + +src = k.field("f1") +dst = k.temporaryField(src) +h = k.constant("h") + +rhs = (src[1,0] + src[-1,0] + src[0,1] + src[0, -1] ) / (4 * h**2) +k.addEq(dst[0,0], rhs) + +k.generate() diff --git a/tests/field/codegen/JacobiKernel3D.gen.py b/tests/field/codegen/JacobiKernel3D.gen.py new file mode 100644 index 0000000000000000000000000000000000000000..32ac6d17eff7cd85a103c38383e000f678ca04d9 --- /dev/null +++ b/tests/field/codegen/JacobiKernel3D.gen.py @@ -0,0 +1,12 @@ +from pystencils_walberla import Sweep + +k = Sweep(dim=3) + +src = k.field("f1") +dst = k.temporaryField(src) +h = k.constant("h") + +rhs = (src[1,0,0] + src[-1,0,0] + src[0,1,0] + src[0, -1, 0] + src[0, 0, 1] + src[0, 0 , -1] ) / (6 * h**2) +k.addEq(dst[0,0,0], rhs) + +k.generate()