diff --git a/tests/cuda/codegen/CodegenJacobiGPU.cpp b/tests/cuda/codegen/CodegenJacobiGPU.cpp index a8ebd370e44f6304ec2f0f6c17c8028b65ff10e1..824ba1f644be17d545762be270f03fa1471e1f68 100644 --- a/tests/cuda/codegen/CodegenJacobiGPU.cpp +++ b/tests/cuda/codegen/CodegenJacobiGPU.cpp @@ -78,7 +78,7 @@ void testJacobi2D() // Create blocks shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid ( uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction - xSize, ySize, uint_t(1), // how many cells per block (x,y,z) + xSize, ySize, uint_t(1), // how many cells per block (x,y,z) real_t(1), // dx: length of one cell in physical coordinates false, // one block per process - "false" means all blocks to one process true, true, true ); // no periodicity @@ -87,7 +87,8 @@ void testJacobi2D() BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" ); BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" ); - + // Initialize a quarter of the field with ones, the rest remains 0 + // Jacobi averages the domain -> every cell should be at 0.25 at sufficiently many timesteps for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) { auto f = blockIt->getData<ScalarField>( cpuFieldID ); @@ -96,8 +97,6 @@ void testJacobi2D() f->get( x, y, 0 ) = 1.0; } - - typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme; typedef cuda::communication::GPUPackInfo<GPUField> Packing; @@ -110,7 +109,7 @@ void testJacobi2D() // Registering the sweep timeloop.add() << BeforeFunction( commScheme, "Communication" ) - << Sweep( pystencils::CudaJacobiKernel2D(gpuField, 1.0), "Jacobi Kernel" ); + << Sweep( pystencils::CudaJacobiKernel2D(gpuField), "Jacobi Kernel" ); cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID ); @@ -141,7 +140,8 @@ void testJacobi3D() BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" ); BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" ); - + // Initialize a quarter of the field with ones, the rest remains 0 + // Jacobi averages the domain -> every cell should be at 0.25 at sufficiently many timesteps for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) { auto f = blockIt->getData<ScalarField>( cpuFieldID ); @@ -151,8 +151,6 @@ void testJacobi3D() f->get( x, y, z ) = 1.0; } - - typedef blockforest::communication::UniformBufferedScheme<stencil::D3Q7> CommScheme; typedef cuda::communication::GPUPackInfo<GPUField> Packing; @@ -165,7 +163,7 @@ void testJacobi3D() // Registering the sweep timeloop.add() << BeforeFunction( commScheme, "Communication" ) - << Sweep( pystencils::CudaJacobiKernel3D(gpuField, 1.0), "Jacobi Kernel" ); + << Sweep( pystencils::CudaJacobiKernel3D(gpuField), "Jacobi Kernel" ); cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID ); diff --git a/tests/cuda/codegen/CodegenPoissonGPU.cpp b/tests/cuda/codegen/CodegenPoissonGPU.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fd88bf1fb0a7749921eb48fc9b58006a05d0128d --- /dev/null +++ b/tests/cuda/codegen/CodegenPoissonGPU.cpp @@ -0,0 +1,154 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file PoissonGpu.cpp +//! \author Markus Holzer <markus.holzer@fau.de> +// +//====================================================================================================================== + +#include "PoissonGPU.h" + +#include "cuda/HostFieldAllocator.h" +#include "blockforest/Initialization.h" +#include "blockforest/communication/UniformDirectScheme.h" +#include "blockforest/communication/UniformBufferedScheme.h" + +#include "core/Environment.h" +#include "core/debug/TestSubsystem.h" +#include "core/math/Constants.h" + +#include "cuda/HostFieldAllocator.h" +#include "cuda/FieldCopy.h" +#include "cuda/GPUField.h" +#include "cuda/Kernel.h" +#include "cuda/AddGPUFieldToStorage.h" +#include "cuda/communication/GPUPackInfo.h" +#include "cuda/FieldIndexing.h" + +#include "field/AddToStorage.h" +#include "field/communication/UniformMPIDatatypeInfo.h" +#include "field/vtk/VTKWriter.h" + +#include "geometry/initializer/ScalarFieldFromGrayScaleImage.h" + +#include "gui/Gui.h" + +#include "stencil/D2Q9.h" + +#include "timeloop/SweepTimeloop.h" + + +using namespace walberla; + +typedef GhostLayerField<real_t, 1> ScalarField_T; +typedef cuda::GPUField<real_t> GPUField; + + +// U with Dirichlet Boundary +void initU( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & srcId ) +{ + for( auto block = blocks->begin(); block != blocks->end(); ++block ) + { + if( blocks->atDomainYMaxBorder( *block ) ) + { + ScalarField_T * src = block->getData< ScalarField_T >( srcId ); + CellInterval xyz = src->xyzSizeWithGhostLayer(); + xyz.yMin() = xyz.yMax(); + for( auto cell = xyz.begin(); cell != xyz.end(); ++cell ) + { + const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell ); + src->get( *cell ) = std::sin(math::pi * p[0] ) * std::sinh(math::pi * p[1] ); + } + } + } +} + +// right hand side +void initF( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & fId ) +{ + for( auto block = blocks->begin(); block != blocks->end(); ++block ) + { + ScalarField_T * f = block->getData< ScalarField_T >( fId ); + CellInterval xyz = f->xyzSize(); + for( auto cell = xyz.begin(); cell != xyz.end(); ++cell ) + { + f->get( *cell ) = 0.0; + } + } +} + +void testPoisson() +{ + const uint_t xCells = uint_t(200); + const uint_t yCells = uint_t(100); + const real_t xSize = real_t(2); + const real_t ySize = real_t(1); + const real_t dx = xSize / real_c( xCells + uint_t(1) ); + const real_t dy = ySize / real_c( yCells + uint_t(1) ); + + // Create blocks + shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid ( + math::AABB( real_t(0.5) * dx, real_t(0.5) * dy, real_t(0), + xSize - real_t(0.5) * dx, ySize - real_t(0.5) * dy, dx ), + uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction + xCells, yCells, uint_t(1), // how many cells per block (x,y,z) + false, // one block per process - "false" means all blocks to one process + false, false, false ); // no periodicity + + + BlockDataID cpuFieldID = field::addToStorage< ScalarField_T >( blocks, "CPU Field src", real_t(0.0) ); + BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField_T>( blocks, cpuFieldID, "GPU Field src" ); + initU( blocks, cpuFieldID ); + + BlockDataID cpufId = field::addToStorage< ScalarField_T >( blocks, "CPU Field f", real_t(0.0)); + BlockDataID gpufId = cuda::addGPUFieldToStorage<ScalarField_T>( blocks, cpufId, "GPU Field f" ); + initF( blocks, cpufId ); + + + typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme; + typedef cuda::communication::GPUPackInfo<GPUField> Packing; + + CommScheme commScheme(blocks); + commScheme.addDataToCommunicate( make_shared<Packing>(gpuField) ); + + // Create Timeloop + const uint_t numberOfTimesteps = uint_t(10000); + SweepTimeloop timeloop ( blocks, numberOfTimesteps ); + + // Registering the sweep + timeloop.add() << BeforeFunction( commScheme, "Communication" ) + << Sweep( pystencils::PoissonGPU(gpufId, gpuField, dx, dy), "Jacobi Kernel" ); + + + cuda::fieldCpy<GPUField, ScalarField_T>( blocks, gpuField, cpuFieldID ); + cuda::fieldCpy<GPUField, ScalarField_T>( blocks, gpufId, cpufId ); + timeloop.run(); + cuda::fieldCpy<ScalarField_T, GPUField>( blocks, cpuFieldID, gpuField ); + + auto firstBlock = blocks->begin(); + auto f = firstBlock->getData<ScalarField_T>( cpuFieldID ); + WALBERLA_CHECK_LESS(f->get(50,99,0) - std::sin( math::pi * 0.5 ) * std::sinh( math::pi * 0.99 ), 0.01); +} + + +int main( int argc, char ** argv ) +{ + mpi::Environment env( argc, argv ); + debug::enterTestMode(); + + testPoisson(); + + return 0; +} diff --git a/tests/cuda/codegen/CudaJacobiKernel.py b/tests/cuda/codegen/CudaJacobiKernel.py index 7ec84032a5c7941ddfee67f1484a08dca2014193..f8713d1d13e8336b9ccd0212d5f146067ea38e21 100644 --- a/tests/cuda/codegen/CudaJacobiKernel.py +++ b/tests/cuda/codegen/CudaJacobiKernel.py @@ -1,26 +1,25 @@ +import numpy as np import sympy as sp import pystencils as ps from pystencils_walberla import CodeGeneration, generate_sweep with CodeGeneration() as ctx: - h = sp.symbols("h") - - # ----- Jacobi 2D - created by specifying weights in nested list -------------------------- - src, dst = ps.fields("src, src_tmp: [2D]") - stencil = [[0, -1, 0], - [-1, 4, -1], - [0, -1, 0]] - assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=4 * h**2) + # ----- Stencil 2D - created by specifying weights in nested list -------------------------- + src, dst = ps.fields("src, src_tmp: [2D]", layout='fzyx') + stencil = [[1.11, 2.22, 3.33], + [4.44, 5.55, 6.66], + [7.77, 8.88, 9.99]] + assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=1 / np.sum(stencil)) generate_sweep(ctx, 'CudaJacobiKernel2D', assignments, field_swaps=[(src, dst)], target="gpu") - # ----- Jacobi 3D - created by using kernel_decorator with assignments in '@=' format ----- + # ----- Stencil 3D - created by using kernel_decorator with assignments in '@=' format ----- src, dst = ps.fields("src, src_tmp: [3D]") @ps.kernel def kernel_func(): - dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] + - src[0, 1, 0] + src[0, -1, 0] + - src[0, 0, 1] + src[0, 0, -1]) / (6 * h ** 2) + dst[0, 0, 0] @= (3 * src[1, 0, 0] + 4 * src[-1, 0, 0] + + 5 * src[0, 1, 0] + 6 * src[0, -1, 0] + + 7 * src[0, 0, 1] + 8 * src[0, 0, -1]) / 33 generate_sweep(ctx, 'CudaJacobiKernel3D', kernel_func, field_swaps=[(src, dst)], target="gpu") diff --git a/tests/cuda/codegen/CudaPoisson.py b/tests/cuda/codegen/CudaPoisson.py new file mode 100644 index 0000000000000000000000000000000000000000..53752ee96d463a4021e2c97c24d694fc9887217a --- /dev/null +++ b/tests/cuda/codegen/CudaPoisson.py @@ -0,0 +1,19 @@ +import numpy as np +import sympy as sp +import pystencils as ps +from pystencils_walberla import CodeGeneration, generate_sweep + + +with CodeGeneration() as ctx: + # ----- Solving the 2D Poisson equation with rhs -------------------------- + dx = sp.Symbol("dx") + dy = sp.Symbol("dy") + src, dst, rhs = ps.fields("src, src_tmp, rhs: [2D]", layout='fzyx') + + @ps.kernel + def kernel_func(): + src[0, 0] @= ((dy**2 * (src[1, 0] + src[-1, 0])) + + (dx**2 * (src[0, 1] + src[0, -1])) - + (rhs[0, 0] * dx**2 * dy**2)) / (2 * (dx**2 + dy**2)) + + generate_sweep(ctx, 'PoissonGPU', kernel_func, target='gpu') diff --git a/tests/field/CMakeLists.txt b/tests/field/CMakeLists.txt index bcb806a0d899c2e0dad75ed0f42b317bff5733e0..c0091537003048ed7f135181b7a96d55b7a3f15d 100644 --- a/tests/field/CMakeLists.txt +++ b/tests/field/CMakeLists.txt @@ -68,4 +68,9 @@ waLBerla_generate_target_from_python(NAME CodegenJacobiCPUGeneratedJacobiKernel waLBerla_compile_test( FILES codegen/CodegenJacobiCPU.cpp DEPENDS gui timeloop CodegenJacobiCPUGeneratedJacobiKernel) waLBerla_execute_test( NAME CodegenJacobiCPU ) +waLBerla_generate_target_from_python(NAME CodegenPoissonGeneratedKernel FILE codegen/Poisson.py + OUT_FILES Poisson.cpp Poisson.h ) +waLBerla_compile_test( FILES codegen/CodegenPoisson.cpp DEPENDS gui timeloop CodegenPoissonGeneratedKernel) +waLBerla_execute_test( NAME CodegenPoisson ) + diff --git a/tests/field/codegen/CodegenJacobiCPU.cpp b/tests/field/codegen/CodegenJacobiCPU.cpp index aefaf67e2b0152621dad007e3a4c7dfa2422fcde..20bc8b06116b9437de17e5967bf44e83df4100fb 100644 --- a/tests/field/codegen/CodegenJacobiCPU.cpp +++ b/tests/field/codegen/CodegenJacobiCPU.cpp @@ -79,12 +79,13 @@ void testJacobi2D() // Registering the sweep timeloop.add() << BeforeFunction( commScheme, "Communication" ) - << Sweep( pystencils::JacobiKernel2D(fieldID, 1.0), "Jacobi Kernel" ); + << Sweep( pystencils::JacobiKernel2D(fieldID), "Jacobi Kernel" ); timeloop.run(); auto firstBlock = blocks->begin(); auto f = firstBlock->getData<ScalarField>( fieldID ); + WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 4.0)); } @@ -127,7 +128,7 @@ void testJacobi3D() // Registering the sweep timeloop.add() << BeforeFunction( commScheme, "Communication" ) - << Sweep( pystencils::JacobiKernel3D(fieldID, 1.0), "Jacobi Kernel" ); + << Sweep( pystencils::JacobiKernel3D(fieldID), "Jacobi Kernel" ); timeloop.run(); diff --git a/tests/field/codegen/CodegenPoisson.cpp b/tests/field/codegen/CodegenPoisson.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f5137375a14eecb4bf79e007bef72bc1bbf089f3 --- /dev/null +++ b/tests/field/codegen/CodegenPoisson.cpp @@ -0,0 +1,133 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file CodegenPoisson.cpp +//! \author Markus Holzer <markus.holzer@fau.de> +// +//====================================================================================================================== + +#include "Poisson.h" +#include "blockforest/Initialization.h" +#include "blockforest/communication/UniformBufferedScheme.h" + +#include "core/Environment.h" +#include "core/debug/TestSubsystem.h" +#include "core/math/Constants.h" + +#include "field/AddToStorage.h" +#include "field/communication/PackInfo.h" +#include "field/vtk/VTKWriter.h" + +#include "gui/Gui.h" + +#include "stencil/D2Q9.h" +#include "timeloop/SweepTimeloop.h" + +#include "vtk/VTKOutput.h" + + +using namespace walberla; + +typedef GhostLayerField<real_t, 1> ScalarField_T; + +// U with Dirichlet Boundary +void initU( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & srcId ) +{ + for( auto block = blocks->begin(); block != blocks->end(); ++block ) + { + if( blocks->atDomainYMaxBorder( *block ) ) + { + ScalarField_T * src = block->getData< ScalarField_T >( srcId ); + CellInterval xyz = src->xyzSizeWithGhostLayer(); + xyz.yMin() = xyz.yMax(); + for( auto cell = xyz.begin(); cell != xyz.end(); ++cell ) + { + const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell ); + src->get( *cell ) = std::sin( math::pi * p[0] ) * std::sinh( math::pi * p[1] ); + } + } + } +} + +// right hand side +void initF( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & fId ) +{ + for( auto block = blocks->begin(); block != blocks->end(); ++block ) + { + ScalarField_T * f = block->getData< ScalarField_T >( fId ); + CellInterval xyz = f->xyzSize(); + for( auto cell = xyz.begin(); cell != xyz.end(); ++cell ) + { + f->get( *cell ) = 0.0; + } + } +} + +void testPoisson() +{ + + const uint_t xCells = uint_t(100); + const uint_t yCells = uint_t(100); + const real_t xSize = real_t(1); + const real_t ySize = real_t(1); + const real_t dx = xSize / real_c( xCells + uint_t(1) ); + const real_t dy = ySize / real_c( yCells + uint_t(1) ); + + // Create blocks + shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid ( + math::AABB( real_t(0.5) * dx, real_t(0.5) * dy, real_t(0), + xSize - real_t(0.5) * dx, ySize - real_t(0.5) * dy, dx ), + uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction + xCells, yCells, uint_t(1), // how many cells per block (x,y,z) + false, // one block per process - "false" means all blocks to one process + false, false, false ); // no periodicity + + + BlockDataID fieldID = field::addToStorage<ScalarField_T>(blocks, "Field", real_t(0.0)); + initU( blocks, fieldID ); + + BlockDataID fId = field::addToStorage< ScalarField_T >( blocks, "f", real_t(0.0)); + initF( blocks, fId ); + + typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme; + typedef field::communication::PackInfo<ScalarField_T> Packing; + CommScheme commScheme(blocks); + commScheme.addDataToCommunicate( make_shared<Packing>(fieldID) ); + + // Create Timeloop + const uint_t numberOfTimesteps = uint_t(10000); + SweepTimeloop timeloop ( blocks, numberOfTimesteps ); + + // Registering the sweep + timeloop.add() << BeforeFunction( commScheme, "Communication" ) + << Sweep( pystencils::Poisson(fId, fieldID, dx, dy), "Poisson Kernel" ); + + timeloop.run(); + + auto firstBlock = blocks->begin(); + auto f = firstBlock->getData<ScalarField_T>( fieldID ); + WALBERLA_CHECK_LESS(f->get(50,99,0) - std::sin( math::pi * 0.5 ) * std::sinh( math::pi * 0.99 ), 0.01); +} + + +int main( int argc, char ** argv ) +{ + mpi::Environment env( argc, argv ); + debug::enterTestMode(); + + testPoisson(); + + return EXIT_SUCCESS; +} diff --git a/tests/field/codegen/JacobiKernel.py b/tests/field/codegen/JacobiKernel.py index b2da5369dddee8223a3035bff10174d27f52fe70..3421c8cdd3fc6a0fb88e92ab479fbb2b768a7648 100644 --- a/tests/field/codegen/JacobiKernel.py +++ b/tests/field/codegen/JacobiKernel.py @@ -1,26 +1,24 @@ -import sympy as sp +import numpy as np import pystencils as ps from pystencils_walberla import CodeGeneration, generate_sweep with CodeGeneration() as ctx: - h = sp.symbols("h") - - # ----- Jacobi 2D - created by specifying weights in nested list -------------------------- + # ----- Stencil 2D - created by specifying weights in nested list -------------------------- src, dst = ps.fields("src, src_tmp: [2D]", layout='fzyx') - stencil = [[0, 1, 0], - [1, 0, 1], - [0, 1, 0]] - assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=1 / (4 * h ** 2)) + stencil = [[1.11, 2.22, 3.33], + [4.44, 5.55, 6.66], + [7.77, 8.88, 9.99]] + assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=1 / np.sum(stencil)) generate_sweep(ctx, 'JacobiKernel2D', assignments, field_swaps=[(src, dst)]) - # ----- Jacobi 3D - created by using kernel_decorator with assignments in '@=' format ----- - src, dst = ps.fields("src, src_tmp: [3D]") + # ----- Stencil 3D - created by using kernel_decorator with assignments in '@=' format ----- + src, dst = ps.fields("src, src_tmp: [3D]", layout='fzyx') @ps.kernel def kernel_func(): - dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] + - src[0, 1, 0] + src[0, -1, 0] + - src[0, 0, 1] + src[0, 0, -1]) / (h ** 2 * 6) + dst[0, 0, 0] @= (3 * src[1, 0, 0] + 4 * src[-1, 0, 0] + + 5 * src[0, 1, 0] + 6 * src[0, -1, 0] + + 7 * src[0, 0, 1] + 8 * src[0, 0, -1]) / 33 generate_sweep(ctx, 'JacobiKernel3D', kernel_func, field_swaps=[(src, dst)]) diff --git a/tests/field/codegen/Poisson.py b/tests/field/codegen/Poisson.py new file mode 100644 index 0000000000000000000000000000000000000000..6edec0315778aa2140303fc1158525a9344eb3dd --- /dev/null +++ b/tests/field/codegen/Poisson.py @@ -0,0 +1,18 @@ +import sympy as sp +import pystencils as ps +from pystencils_walberla import CodeGeneration, generate_sweep + + +with CodeGeneration() as ctx: + # ----- Solving the 2D Poisson equation with rhs -------------------------- + dx = sp.Symbol("dx") + dy = sp.Symbol("dy") + src, dst, rhs = ps.fields("src, src_tmp, rhs: [2D]", layout='fzyx') + + @ps.kernel + def kernel_func(): + src[0, 0] @= ((dy**2 * (src[1, 0] + src[-1, 0])) + + (dx**2 * (src[0, 1] + src[0, -1])) - + (rhs[0, 0] * dx**2 * dy**2)) / (2 * (dx**2 + dy**2)) + + generate_sweep(ctx, 'Poisson', kernel_func)