Skip to content
Snippets Groups Projects
Commit 84cb3ddc authored by Markus Holzer's avatar Markus Holzer
Browse files

added new Poisson test case

parent d3f09665
No related merge requests found
......@@ -109,7 +109,7 @@ void testJacobi2D()
// Registering the sweep
timeloop.add() << BeforeFunction( commScheme, "Communication" )
<< Sweep( pystencils::CudaJacobiKernel2D(gpuField, 2.0), "Jacobi Kernel" );
<< Sweep( pystencils::CudaJacobiKernel2D(gpuField), "Jacobi Kernel" );
cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID );
......@@ -163,7 +163,7 @@ void testJacobi3D()
// Registering the sweep
timeloop.add() << BeforeFunction( commScheme, "Communication" )
<< Sweep( pystencils::CudaJacobiKernel3D(gpuField, 1.0), "Jacobi Kernel" );
<< Sweep( pystencils::CudaJacobiKernel3D(gpuField), "Jacobi Kernel" );
cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID );
......
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file PoissonGpu.cpp
//! \author Markus Holzer <markus.holzer@fau.de>
//
//======================================================================================================================
#include "PoissonGPU.h"
#include "cuda/HostFieldAllocator.h"
#include "blockforest/Initialization.h"
#include "blockforest/communication/UniformDirectScheme.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "core/Environment.h"
#include "core/debug/TestSubsystem.h"
#include "core/math/Constants.h"
#include "cuda/HostFieldAllocator.h"
#include "cuda/FieldCopy.h"
#include "cuda/GPUField.h"
#include "cuda/Kernel.h"
#include "cuda/AddGPUFieldToStorage.h"
#include "cuda/communication/GPUPackInfo.h"
#include "cuda/FieldIndexing.h"
#include "field/AddToStorage.h"
#include "field/communication/UniformMPIDatatypeInfo.h"
#include "field/vtk/VTKWriter.h"
#include "geometry/initializer/ScalarFieldFromGrayScaleImage.h"
#include "gui/Gui.h"
#include "stencil/D2Q9.h"
#include "timeloop/SweepTimeloop.h"
using namespace walberla;
typedef GhostLayerField<real_t, 1> ScalarField_T;
typedef cuda::GPUField<real_t> GPUField;
// U with Dirichlet Boundary
void initU( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & srcId )
{
for( auto block = blocks->begin(); block != blocks->end(); ++block )
{
if( blocks->atDomainYMaxBorder( *block ) )
{
ScalarField_T * src = block->getData< ScalarField_T >( srcId );
CellInterval xyz = src->xyzSizeWithGhostLayer();
xyz.yMin() = xyz.yMax();
for( auto cell = xyz.begin(); cell != xyz.end(); ++cell )
{
const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell );
src->get( *cell ) = std::sin(math::pi * p[0] ) * std::sinh(math::pi * p[1] );
}
}
}
}
// right hand side
void initF( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & fId )
{
for( auto block = blocks->begin(); block != blocks->end(); ++block )
{
ScalarField_T * f = block->getData< ScalarField_T >( fId );
CellInterval xyz = f->xyzSize();
for( auto cell = xyz.begin(); cell != xyz.end(); ++cell )
{
const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell );
f->get( *cell ) = real_t(4) * math::pi * math::pi * std::sin( real_t(2) * math::pi * p[0] ) * std::sinh( real_t(2) * math::pi * p[1] );
}
}
}
void testPoisson()
{
const uint_t xCells = uint_t(200);
const uint_t yCells = uint_t(100);
const real_t xSize = real_t(2);
const real_t ySize = real_t(1);
const real_t dx = xSize / real_c( xCells + uint_t(1) );
const real_t dy = ySize / real_c( yCells + uint_t(1) );
// Create blocks
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
math::AABB( real_t(0.5) * dx, real_t(0.5) * dy, real_t(0),
xSize - real_t(0.5) * dx, ySize - real_t(0.5) * dy, dx ),
uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction
xCells, yCells, uint_t(1), // how many cells per block (x,y,z)
false, // one block per process - "false" means all blocks to one process
false, false, false ); // no periodicity
BlockDataID cpuFieldID = field::addToStorage< ScalarField_T >( blocks, "CPU Field src", real_t(0.0) );
BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField_T>( blocks, cpuFieldID, "GPU Field src" );
initU( blocks, cpuFieldID );
BlockDataID cpufId = field::addToStorage< ScalarField_T >( blocks, "CPU Field f", real_t(0.0));
BlockDataID gpufId = cuda::addGPUFieldToStorage<ScalarField_T>( blocks, cpufId, "GPU Field f" );
initF( blocks, cpufId );
typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme;
typedef cuda::communication::GPUPackInfo<GPUField> Packing;
CommScheme commScheme(blocks);
commScheme.addDataToCommunicate( make_shared<Packing>(gpuField) );
// Create Timeloop
const uint_t numberOfTimesteps = uint_t(10000);
SweepTimeloop timeloop ( blocks, numberOfTimesteps );
// Registering the sweep
timeloop.add() << BeforeFunction( commScheme, "Communication" )
<< Sweep( pystencils::PoissonGPU(gpufId, gpuField, dx, dy), "Jacobi Kernel" );
cuda::fieldCpy<GPUField, ScalarField_T>( blocks, gpuField, cpuFieldID );
cuda::fieldCpy<GPUField, ScalarField_T>( blocks, gpufId, cpufId );
timeloop.run();
cuda::fieldCpy<ScalarField_T, GPUField>( blocks, cpuFieldID, gpuField );
}
int main( int argc, char ** argv )
{
mpi::Environment env( argc, argv );
debug::enterTestMode();
testPoisson();
return 0;
}
import numpy as np
import sympy as sp
import pystencils as ps
from pystencils_walberla import CodeGeneration, generate_sweep
with CodeGeneration() as ctx:
h = sp.symbols("h")
# ----- Jacobi 2D - created by specifying weights in nested list --------------------------
# ----- Stencil 2D - created by specifying weights in nested list --------------------------
src, dst = ps.fields("src, src_tmp: [2D]", layout='fzyx')
stencil = [[0, 4, 0],
[4, 0, 4],
[0, 4, 0]]
assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=1 / (4 * h ** 2))
stencil = [[1.11, 2.22, 3.33],
[4.44, 5.55, 6.66],
[7.77, 8.88, 9.99]]
assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=1 / np.sum(stencil))
generate_sweep(ctx, 'CudaJacobiKernel2D', assignments, field_swaps=[(src, dst)], target="gpu")
# ----- Jacobi 3D - created by using kernel_decorator with assignments in '@=' format -----
# ----- Stencil 3D - created by using kernel_decorator with assignments in '@=' format -----
src, dst = ps.fields("src, src_tmp: [3D]")
@ps.kernel
def kernel_func():
dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] +
src[0, 1, 0] + src[0, -1, 0] +
src[0, 0, 1] + src[0, 0, -1]) / (6 * h ** 2)
dst[0, 0, 0] @= (3 * src[1, 0, 0] + 4 * src[-1, 0, 0] +
5 * src[0, 1, 0] + 6 * src[0, -1, 0] +
7 * src[0, 0, 1] + 8 * src[0, 0, -1]) / 33
generate_sweep(ctx, 'CudaJacobiKernel3D', kernel_func, field_swaps=[(src, dst)], target="gpu")
import numpy as np
import sympy as sp
import pystencils as ps
from pystencils_walberla import CodeGeneration, generate_sweep
with CodeGeneration() as ctx:
# ----- Solving the 2D Poisson equation with rhs --------------------------
dx = sp.Symbol("dx")
dy = sp.Symbol("dy")
src, dst, rhs = ps.fields("src, src_tmp, rhs: [2D]", layout='fzyx')
@ps.kernel
def kernel_func():
dst[0, 0] @= ((dx**2 * (src[1, 0] + src[-1, 0])) +
(dy**2 * (src[0, 1] + src[0, -1])) -
(rhs[0, 0] * dx**2 * dy**2)) / (2 * (dx**2 + dy**2))
generate_sweep(ctx, 'PoissonGPU', kernel_func, field_swaps=[(src, dst)], target='gpu')
......@@ -20,65 +20,102 @@
#include "Poisson.h"
#include "blockforest/Initialization.h"
#include "blockforest/communication/UniformDirectScheme.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "core/Environment.h"
#include "core/debug/TestSubsystem.h"
#include "core/math/Constants.h"
#include "field/AddToStorage.h"
#include "field/communication/PackInfo.h"
#include "field/vtk/VTKWriter.h"
#include "gui/Gui.h"
#include "stencil/D2Q9.h"
#include "stencil/D3Q7.h"
#include "timeloop/SweepTimeloop.h"
#include "vtk/VTKOutput.h"
using namespace walberla;
typedef GhostLayerField<double,1> ScalarField;
typedef GhostLayerField<real_t, 1> ScalarField_T;
// U with Dirichlet Boundary
void initU( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & srcId )
{
for( auto block = blocks->begin(); block != blocks->end(); ++block )
{
if( blocks->atDomainYMaxBorder( *block ) )
{
ScalarField_T * src = block->getData< ScalarField_T >( srcId );
CellInterval xyz = src->xyzSizeWithGhostLayer();
xyz.yMin() = xyz.yMax();
for( auto cell = xyz.begin(); cell != xyz.end(); ++cell )
{
const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell );
src->get( *cell ) = std::sin( math::pi * p[0] ) * std::sinh( math::pi * p[1] );
}
}
}
}
// right hand side
void initF( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & fId )
{
for( auto block = blocks->begin(); block != blocks->end(); ++block )
{
ScalarField_T * f = block->getData< ScalarField_T >( fId );
CellInterval xyz = f->xyzSize();
for( auto cell = xyz.begin(); cell != xyz.end(); ++cell )
{
const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell );
f->get( *cell ) = real_t(4) * math::pi * math::pi * std::sin( real_t(2) * math::pi * p[0] ) * std::sinh( real_t(2) * math::pi * p[1] );
}
}
}
void testPoisson()
{
uint_t L0 = 50;
uint_t xSize = L0;
uint_t ySize = L0;
double h = 1 / (double(L0) + 1);
double rhs = 1.0;
const uint_t xCells = uint_t(200);
const uint_t yCells = uint_t(100);
const real_t xSize = real_t(2);
const real_t ySize = real_t(1);
const real_t dx = xSize / real_c( xCells + uint_t(1) );
const real_t dy = ySize / real_c( yCells + uint_t(1) );
// Create blocks
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
math::AABB( real_t(0.5) * dx, real_t(0.5) * dy, real_t(0),
xSize - real_t(0.5) * dx, ySize - real_t(0.5) * dy, dx ),
uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction
xSize, ySize, uint_t(1), // how many cells per block (x,y,z)
real_t(1), // dx: length of one cell in physical coordinates
xCells, yCells, uint_t(1), // how many cells per block (x,y,z)
false, // one block per process - "false" means all blocks to one process
false, false, false ); // no periodicity
BlockDataID fieldID = field::addToStorage<ScalarField>(blocks, "Field", real_t(0.0));
BlockDataID fieldID = field::addToStorage<ScalarField_T>(blocks, "Field", real_t(0.0));
initU( blocks, fieldID );
BlockDataID fId = field::addToStorage< ScalarField_T >( blocks, "f", real_t(0.0));
initF( blocks, fId );
typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme;
typedef field::communication::PackInfo<ScalarField> Packing;
typedef field::communication::PackInfo<ScalarField_T> Packing;
CommScheme commScheme(blocks);
commScheme.addDataToCommunicate( make_shared<Packing>(fieldID) );
// Create Timeloop
const uint_t numberOfTimesteps = uint_t(2500);
const uint_t numberOfTimesteps = uint_t(10000);
SweepTimeloop timeloop ( blocks, numberOfTimesteps );
// Registering the sweep
timeloop.add() << BeforeFunction( commScheme, "Communication" )
<< Sweep( pystencils::Poisson(fieldID, h, rhs), "Poisson Kernel" );
<< Sweep( pystencils::Poisson(fId, fieldID, dx, dy), "Poisson Kernel" );
timeloop.run();
auto firstBlock = blocks->begin();
auto f = firstBlock->getData<ScalarField>( fieldID );
WALBERLA_CHECK_FLOAT_EQUAL(f->get(int(L0/2), int(L0/2), 0), real_t(7.28886456002774547e-02));
}
......@@ -89,5 +126,5 @@ int main( int argc, char ** argv )
testPoisson();
return 0;
return EXIT_SUCCESS;
}
......@@ -6,9 +6,9 @@ from pystencils_walberla import CodeGeneration, generate_sweep
with CodeGeneration() as ctx:
# ----- Stencil 2D - created by specifying weights in nested list --------------------------
src, dst = ps.fields("src, src_tmp: [2D]", layout='fzyx')
stencil = [[1, 2, 3],
[4, 5, 6],
[7, 8, 9]]
stencil = [[1.11, 2.22, 3.33],
[4.44, 5.55, 6.66],
[7.77, 8.88, 9.99]]
assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=1 / np.sum(stencil))
generate_sweep(ctx, 'JacobiKernel2D', assignments, field_swaps=[(src, dst)])
......
......@@ -4,13 +4,15 @@ from pystencils_walberla import CodeGeneration, generate_sweep
with CodeGeneration() as ctx:
# ----- Solving the 2D Poisson equation with constant rhs --------------------------
rhs = sp.Symbol("rhs")
h = sp.Symbol("h")
src, dst = ps.fields("src, src_tmp: [2D]", layout='fzyx')
# ----- Solving the 2D Poisson equation with rhs --------------------------
dx = sp.Symbol("dx")
dy = sp.Symbol("dy")
src, dst, rhs = ps.fields("src, src_tmp, rhs: [2D]", layout='fzyx')
@ps.kernel
def kernel_func():
dst[0, 0] @= (src[1, 0] + src[-1, 0] +
src[0, 1] + src[0, -1]) / 4 + ((rhs * h**2) / 4)
dst[0, 0] @= ((dy**2 * (src[1, 0] + src[-1, 0])) +
(dx**2 * (src[0, 1] + src[0, -1])) -
(rhs[0, 0] * dx**2 * dy**2)) / (2 * (dx**2 + dy**2))
generate_sweep(ctx, 'Poisson', kernel_func, field_swaps=[(src, dst)])
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment