diff --git a/tests/cuda/codegen/CodegenJacobiGPU.cpp b/tests/cuda/codegen/CodegenJacobiGPU.cpp index d2e719e5a23e0197d83b3f5f929d38960bee592b..824ba1f644be17d545762be270f03fa1471e1f68 100644 --- a/tests/cuda/codegen/CodegenJacobiGPU.cpp +++ b/tests/cuda/codegen/CodegenJacobiGPU.cpp @@ -109,7 +109,7 @@ void testJacobi2D() // Registering the sweep timeloop.add() << BeforeFunction( commScheme, "Communication" ) - << Sweep( pystencils::CudaJacobiKernel2D(gpuField, 2.0), "Jacobi Kernel" ); + << Sweep( pystencils::CudaJacobiKernel2D(gpuField), "Jacobi Kernel" ); cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID ); @@ -163,7 +163,7 @@ void testJacobi3D() // Registering the sweep timeloop.add() << BeforeFunction( commScheme, "Communication" ) - << Sweep( pystencils::CudaJacobiKernel3D(gpuField, 1.0), "Jacobi Kernel" ); + << Sweep( pystencils::CudaJacobiKernel3D(gpuField), "Jacobi Kernel" ); cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID ); diff --git a/tests/cuda/codegen/CodegenPoissonGPU.cpp b/tests/cuda/codegen/CodegenPoissonGPU.cpp new file mode 100644 index 0000000000000000000000000000000000000000..166e8793ced3fa6b31a9f52e08dd182729573fd4 --- /dev/null +++ b/tests/cuda/codegen/CodegenPoissonGPU.cpp @@ -0,0 +1,151 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file PoissonGpu.cpp +//! \author Markus Holzer <markus.holzer@fau.de> +// +//====================================================================================================================== + +#include "PoissonGPU.h" + +#include "cuda/HostFieldAllocator.h" +#include "blockforest/Initialization.h" +#include "blockforest/communication/UniformDirectScheme.h" +#include "blockforest/communication/UniformBufferedScheme.h" + +#include "core/Environment.h" +#include "core/debug/TestSubsystem.h" +#include "core/math/Constants.h" + +#include "cuda/HostFieldAllocator.h" +#include "cuda/FieldCopy.h" +#include "cuda/GPUField.h" +#include "cuda/Kernel.h" +#include "cuda/AddGPUFieldToStorage.h" +#include "cuda/communication/GPUPackInfo.h" +#include "cuda/FieldIndexing.h" + +#include "field/AddToStorage.h" +#include "field/communication/UniformMPIDatatypeInfo.h" +#include "field/vtk/VTKWriter.h" + +#include "geometry/initializer/ScalarFieldFromGrayScaleImage.h" + +#include "gui/Gui.h" + +#include "stencil/D2Q9.h" + +#include "timeloop/SweepTimeloop.h" + + +using namespace walberla; + +typedef GhostLayerField<real_t, 1> ScalarField_T; +typedef cuda::GPUField<real_t> GPUField; + + +// U with Dirichlet Boundary +void initU( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & srcId ) +{ + for( auto block = blocks->begin(); block != blocks->end(); ++block ) + { + if( blocks->atDomainYMaxBorder( *block ) ) + { + ScalarField_T * src = block->getData< ScalarField_T >( srcId ); + CellInterval xyz = src->xyzSizeWithGhostLayer(); + xyz.yMin() = xyz.yMax(); + for( auto cell = xyz.begin(); cell != xyz.end(); ++cell ) + { + const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell ); + src->get( *cell ) = std::sin(math::pi * p[0] ) * std::sinh(math::pi * p[1] ); + } + } + } +} + +// right hand side +void initF( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & fId ) +{ + for( auto block = blocks->begin(); block != blocks->end(); ++block ) + { + ScalarField_T * f = block->getData< ScalarField_T >( fId ); + CellInterval xyz = f->xyzSize(); + for( auto cell = xyz.begin(); cell != xyz.end(); ++cell ) + { + const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell ); + f->get( *cell ) = real_t(4) * math::pi * math::pi * std::sin( real_t(2) * math::pi * p[0] ) * std::sinh( real_t(2) * math::pi * p[1] ); + } + } +} + +void testPoisson() +{ + const uint_t xCells = uint_t(200); + const uint_t yCells = uint_t(100); + const real_t xSize = real_t(2); + const real_t ySize = real_t(1); + const real_t dx = xSize / real_c( xCells + uint_t(1) ); + const real_t dy = ySize / real_c( yCells + uint_t(1) ); + + // Create blocks + shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid ( + math::AABB( real_t(0.5) * dx, real_t(0.5) * dy, real_t(0), + xSize - real_t(0.5) * dx, ySize - real_t(0.5) * dy, dx ), + uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction + xCells, yCells, uint_t(1), // how many cells per block (x,y,z) + false, // one block per process - "false" means all blocks to one process + false, false, false ); // no periodicity + + + BlockDataID cpuFieldID = field::addToStorage< ScalarField_T >( blocks, "CPU Field src", real_t(0.0) ); + BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField_T>( blocks, cpuFieldID, "GPU Field src" ); + initU( blocks, cpuFieldID ); + + BlockDataID cpufId = field::addToStorage< ScalarField_T >( blocks, "CPU Field f", real_t(0.0)); + BlockDataID gpufId = cuda::addGPUFieldToStorage<ScalarField_T>( blocks, cpufId, "GPU Field f" ); + initF( blocks, cpufId ); + + + typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme; + typedef cuda::communication::GPUPackInfo<GPUField> Packing; + + CommScheme commScheme(blocks); + commScheme.addDataToCommunicate( make_shared<Packing>(gpuField) ); + + // Create Timeloop + const uint_t numberOfTimesteps = uint_t(10000); + SweepTimeloop timeloop ( blocks, numberOfTimesteps ); + + // Registering the sweep + timeloop.add() << BeforeFunction( commScheme, "Communication" ) + << Sweep( pystencils::PoissonGPU(gpufId, gpuField, dx, dy), "Jacobi Kernel" ); + + + cuda::fieldCpy<GPUField, ScalarField_T>( blocks, gpuField, cpuFieldID ); + cuda::fieldCpy<GPUField, ScalarField_T>( blocks, gpufId, cpufId ); + timeloop.run(); + cuda::fieldCpy<ScalarField_T, GPUField>( blocks, cpuFieldID, gpuField ); +} + + +int main( int argc, char ** argv ) +{ + mpi::Environment env( argc, argv ); + debug::enterTestMode(); + + testPoisson(); + + return 0; +} diff --git a/tests/cuda/codegen/CudaJacobiKernel.py b/tests/cuda/codegen/CudaJacobiKernel.py index 9d1362066c7684c2ca6af5de48d4dc552097d054..f8713d1d13e8336b9ccd0212d5f146067ea38e21 100644 --- a/tests/cuda/codegen/CudaJacobiKernel.py +++ b/tests/cuda/codegen/CudaJacobiKernel.py @@ -1,26 +1,25 @@ +import numpy as np import sympy as sp import pystencils as ps from pystencils_walberla import CodeGeneration, generate_sweep with CodeGeneration() as ctx: - h = sp.symbols("h") - - # ----- Jacobi 2D - created by specifying weights in nested list -------------------------- + # ----- Stencil 2D - created by specifying weights in nested list -------------------------- src, dst = ps.fields("src, src_tmp: [2D]", layout='fzyx') - stencil = [[0, 4, 0], - [4, 0, 4], - [0, 4, 0]] - assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=1 / (4 * h ** 2)) + stencil = [[1.11, 2.22, 3.33], + [4.44, 5.55, 6.66], + [7.77, 8.88, 9.99]] + assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=1 / np.sum(stencil)) generate_sweep(ctx, 'CudaJacobiKernel2D', assignments, field_swaps=[(src, dst)], target="gpu") - # ----- Jacobi 3D - created by using kernel_decorator with assignments in '@=' format ----- + # ----- Stencil 3D - created by using kernel_decorator with assignments in '@=' format ----- src, dst = ps.fields("src, src_tmp: [3D]") @ps.kernel def kernel_func(): - dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] + - src[0, 1, 0] + src[0, -1, 0] + - src[0, 0, 1] + src[0, 0, -1]) / (6 * h ** 2) + dst[0, 0, 0] @= (3 * src[1, 0, 0] + 4 * src[-1, 0, 0] + + 5 * src[0, 1, 0] + 6 * src[0, -1, 0] + + 7 * src[0, 0, 1] + 8 * src[0, 0, -1]) / 33 generate_sweep(ctx, 'CudaJacobiKernel3D', kernel_func, field_swaps=[(src, dst)], target="gpu") diff --git a/tests/cuda/codegen/CudaPoisson.py b/tests/cuda/codegen/CudaPoisson.py new file mode 100644 index 0000000000000000000000000000000000000000..011b41494099e691642bcf3788ec8aef401a17bb --- /dev/null +++ b/tests/cuda/codegen/CudaPoisson.py @@ -0,0 +1,19 @@ +import numpy as np +import sympy as sp +import pystencils as ps +from pystencils_walberla import CodeGeneration, generate_sweep + + +with CodeGeneration() as ctx: + # ----- Solving the 2D Poisson equation with rhs -------------------------- + dx = sp.Symbol("dx") + dy = sp.Symbol("dy") + src, dst, rhs = ps.fields("src, src_tmp, rhs: [2D]", layout='fzyx') + + @ps.kernel + def kernel_func(): + dst[0, 0] @= ((dx**2 * (src[1, 0] + src[-1, 0])) + + (dy**2 * (src[0, 1] + src[0, -1])) - + (rhs[0, 0] * dx**2 * dy**2)) / (2 * (dx**2 + dy**2)) + + generate_sweep(ctx, 'PoissonGPU', kernel_func, field_swaps=[(src, dst)], target='gpu') diff --git a/tests/field/codegen/CodegenPoisson.cpp b/tests/field/codegen/CodegenPoisson.cpp index c390831b649d68a075eb539333fdd8cfe3b4236f..52b93bbbdeab9ecb6f735e2732da678b8c249372 100644 --- a/tests/field/codegen/CodegenPoisson.cpp +++ b/tests/field/codegen/CodegenPoisson.cpp @@ -20,65 +20,102 @@ #include "Poisson.h" #include "blockforest/Initialization.h" -#include "blockforest/communication/UniformDirectScheme.h" #include "blockforest/communication/UniformBufferedScheme.h" #include "core/Environment.h" #include "core/debug/TestSubsystem.h" +#include "core/math/Constants.h" #include "field/AddToStorage.h" #include "field/communication/PackInfo.h" +#include "field/vtk/VTKWriter.h" #include "gui/Gui.h" #include "stencil/D2Q9.h" -#include "stencil/D3Q7.h" - #include "timeloop/SweepTimeloop.h" +#include "vtk/VTKOutput.h" + using namespace walberla; -typedef GhostLayerField<double,1> ScalarField; +typedef GhostLayerField<real_t, 1> ScalarField_T; + +// U with Dirichlet Boundary +void initU( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & srcId ) +{ + for( auto block = blocks->begin(); block != blocks->end(); ++block ) + { + if( blocks->atDomainYMaxBorder( *block ) ) + { + ScalarField_T * src = block->getData< ScalarField_T >( srcId ); + CellInterval xyz = src->xyzSizeWithGhostLayer(); + xyz.yMin() = xyz.yMax(); + for( auto cell = xyz.begin(); cell != xyz.end(); ++cell ) + { + const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell ); + src->get( *cell ) = std::sin( math::pi * p[0] ) * std::sinh( math::pi * p[1] ); + } + } + } +} +// right hand side +void initF( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & fId ) +{ + for( auto block = blocks->begin(); block != blocks->end(); ++block ) + { + ScalarField_T * f = block->getData< ScalarField_T >( fId ); + CellInterval xyz = f->xyzSize(); + for( auto cell = xyz.begin(); cell != xyz.end(); ++cell ) + { + const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell ); + f->get( *cell ) = real_t(4) * math::pi * math::pi * std::sin( real_t(2) * math::pi * p[0] ) * std::sinh( real_t(2) * math::pi * p[1] ); + } + } +} void testPoisson() { - uint_t L0 = 50; - uint_t xSize = L0; - uint_t ySize = L0; - double h = 1 / (double(L0) + 1); - double rhs = 1.0; + const uint_t xCells = uint_t(200); + const uint_t yCells = uint_t(100); + const real_t xSize = real_t(2); + const real_t ySize = real_t(1); + const real_t dx = xSize / real_c( xCells + uint_t(1) ); + const real_t dy = ySize / real_c( yCells + uint_t(1) ); + // Create blocks shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid ( + math::AABB( real_t(0.5) * dx, real_t(0.5) * dy, real_t(0), + xSize - real_t(0.5) * dx, ySize - real_t(0.5) * dy, dx ), uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction - xSize, ySize, uint_t(1), // how many cells per block (x,y,z) - real_t(1), // dx: length of one cell in physical coordinates + xCells, yCells, uint_t(1), // how many cells per block (x,y,z) false, // one block per process - "false" means all blocks to one process false, false, false ); // no periodicity - BlockDataID fieldID = field::addToStorage<ScalarField>(blocks, "Field", real_t(0.0)); + BlockDataID fieldID = field::addToStorage<ScalarField_T>(blocks, "Field", real_t(0.0)); + initU( blocks, fieldID ); + + BlockDataID fId = field::addToStorage< ScalarField_T >( blocks, "f", real_t(0.0)); + initF( blocks, fId ); typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme; - typedef field::communication::PackInfo<ScalarField> Packing; + typedef field::communication::PackInfo<ScalarField_T> Packing; CommScheme commScheme(blocks); commScheme.addDataToCommunicate( make_shared<Packing>(fieldID) ); // Create Timeloop - const uint_t numberOfTimesteps = uint_t(2500); + const uint_t numberOfTimesteps = uint_t(10000); SweepTimeloop timeloop ( blocks, numberOfTimesteps ); // Registering the sweep timeloop.add() << BeforeFunction( commScheme, "Communication" ) - << Sweep( pystencils::Poisson(fieldID, h, rhs), "Poisson Kernel" ); + << Sweep( pystencils::Poisson(fId, fieldID, dx, dy), "Poisson Kernel" ); timeloop.run(); - - auto firstBlock = blocks->begin(); - auto f = firstBlock->getData<ScalarField>( fieldID ); - WALBERLA_CHECK_FLOAT_EQUAL(f->get(int(L0/2), int(L0/2), 0), real_t(7.28886456002774547e-02)); } @@ -89,5 +126,5 @@ int main( int argc, char ** argv ) testPoisson(); - return 0; + return EXIT_SUCCESS; } diff --git a/tests/field/codegen/JacobiKernel.py b/tests/field/codegen/JacobiKernel.py index c135ded28942cd897e39faed21b259b8073e2adb..3421c8cdd3fc6a0fb88e92ab479fbb2b768a7648 100644 --- a/tests/field/codegen/JacobiKernel.py +++ b/tests/field/codegen/JacobiKernel.py @@ -6,9 +6,9 @@ from pystencils_walberla import CodeGeneration, generate_sweep with CodeGeneration() as ctx: # ----- Stencil 2D - created by specifying weights in nested list -------------------------- src, dst = ps.fields("src, src_tmp: [2D]", layout='fzyx') - stencil = [[1, 2, 3], - [4, 5, 6], - [7, 8, 9]] + stencil = [[1.11, 2.22, 3.33], + [4.44, 5.55, 6.66], + [7.77, 8.88, 9.99]] assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=1 / np.sum(stencil)) generate_sweep(ctx, 'JacobiKernel2D', assignments, field_swaps=[(src, dst)]) diff --git a/tests/field/codegen/Poisson.py b/tests/field/codegen/Poisson.py index 9cf604064eac2e8feff950e841642f9cf9b31e3d..df3e9589e7ddb7d6a8101985f76fcd4ab8fddd2d 100644 --- a/tests/field/codegen/Poisson.py +++ b/tests/field/codegen/Poisson.py @@ -4,13 +4,15 @@ from pystencils_walberla import CodeGeneration, generate_sweep with CodeGeneration() as ctx: - # ----- Solving the 2D Poisson equation with constant rhs -------------------------- - rhs = sp.Symbol("rhs") - h = sp.Symbol("h") - src, dst = ps.fields("src, src_tmp: [2D]", layout='fzyx') + # ----- Solving the 2D Poisson equation with rhs -------------------------- + dx = sp.Symbol("dx") + dy = sp.Symbol("dy") + src, dst, rhs = ps.fields("src, src_tmp, rhs: [2D]", layout='fzyx') @ps.kernel def kernel_func(): - dst[0, 0] @= (src[1, 0] + src[-1, 0] + - src[0, 1] + src[0, -1]) / 4 + ((rhs * h**2) / 4) + dst[0, 0] @= ((dy**2 * (src[1, 0] + src[-1, 0])) + + (dx**2 * (src[0, 1] + src[0, -1])) - + (rhs[0, 0] * dx**2 * dy**2)) / (2 * (dx**2 + dy**2)) + generate_sweep(ctx, 'Poisson', kernel_func, field_swaps=[(src, dst)])