Skip to content
Snippets Groups Projects
Commit c43040be authored by Martin Bauer's avatar Martin Bauer
Browse files

Test and Bugfixes for the pystencils code generation interface

parent c3f6c2c2
Branches
Tags
No related merge requests found
...@@ -33,20 +33,20 @@ function( handle_python_codegen sourceFilesOut codeGenRequiredOut ) ...@@ -33,20 +33,20 @@ function( handle_python_codegen sourceFilesOut codeGenRequiredOut )
set(codeGenRequired NO) set(codeGenRequired NO)
foreach( sourceFile ${ARGN} ) foreach( sourceFile ${ARGN} )
if( ${sourceFile} MATCHES ".*\\.gen\\.py$" ) if( ${sourceFile} MATCHES ".*\\.gen\\.py$" )
get_filename_component(sourceFile ${sourceFile} NAME) get_filename_component(sourceFileName ${sourceFile} NAME)
if( ${sourceFile} MATCHES ".*\\.cuda\\.gen\\.py$" ) if( ${sourceFileName} MATCHES ".*\\.cuda\\.gen\\.py$" )
string(REPLACE ".cuda.gen.py" ".h" genHeaderFile ${sourceFile}) string(REPLACE ".cuda.gen.py" ".h" genHeaderFile ${sourceFileName})
string(REPLACE ".cuda.gen.py" ".cu" genSourceFile ${sourceFile}) string(REPLACE ".cuda.gen.py" ".cu" genSourceFile ${sourceFileName})
else() else()
string(REPLACE ".gen.py" ".h" genHeaderFile ${sourceFile}) string(REPLACE ".gen.py" ".h" genHeaderFile ${sourceFileName})
string(REPLACE ".gen.py" ".cpp" genSourceFile ${sourceFile}) string(REPLACE ".gen.py" ".cpp" genSourceFile ${sourceFileName})
endif() endif()
list(APPEND result ${CMAKE_CURRENT_BINARY_DIR}/${genSourceFile} list(APPEND result ${CMAKE_CURRENT_BINARY_DIR}/${genSourceFile}
${CMAKE_CURRENT_BINARY_DIR}/${genHeaderFile}) ${CMAKE_CURRENT_BINARY_DIR}/${genHeaderFile})
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${genSourceFile} add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${genSourceFile}
${CMAKE_CURRENT_BINARY_DIR}/${genHeaderFile} ${CMAKE_CURRENT_BINARY_DIR}/${genHeaderFile}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile} DEPENDS ${sourceFile}
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile} COMMAND ${PYTHON_EXECUTABLE} ${sourceFile}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR}) include_directories(${CMAKE_CURRENT_BINARY_DIR})
set(codeGenRequired YES) set(codeGenRequired YES)
......
...@@ -7,11 +7,11 @@ ...@@ -7,11 +7,11 @@
# Here is an explanation of the waLBerla module mechanism: # Here is an explanation of the waLBerla module mechanism:
# - One folder with a CMakeLists.txt that is a subfolder of one of the directories listed in the variable # - One folder with a CMakeLists.txt that is a subfolder of one of the directories listed in the variable
# WALBERLA_MODULE_DIRS can be a module # WALBERLA_MODULE_DIRS can be a module
# - the name of the module is the path relative to an WALBERLA_MODULE_DIRS entry # - the name of the module is the path relative to a WALBERLA_MODULE_DIRS entry
# - waLBerla modules are all placed in the src/ subdirectory, so WALBERLA_MODULE_DIRS contains ${waLBerla_SOURCE}/src/ # - waLBerla modules are all placed in the src/ subdirectory, so WALBERLA_MODULE_DIRS contains ${waLBerla_SOURCE}/src/
# - to create a module call waLBerla_module() inside this folder # - to create a module call waLBerla_module() inside this folder
# - this creates a static library that has the same name as the module, but slashes are replaced by minuses # - this creates a static library that has the same name as the module, but slashes are replaced by minuses.
# in case the module contains only header files no static lib is generated, only a custom target is added # In case the module contains only header files no static lib is generated, only a custom target is added
# to display the module in Visual Studio. # to display the module in Visual Studio.
# - waLBerla_module takes a list of dependent modules. A second list of dependencies is generated by parsing # - waLBerla_module takes a list of dependent modules. A second list of dependencies is generated by parsing
# all files in the module for corresponding "#include" lines. This mechanism is not a complete preprocessor # all files in the module for corresponding "#include" lines. This mechanism is not a complete preprocessor
......
...@@ -16,6 +16,11 @@ waLBerla_execute_test( NAME SimpleKernelTest ) ...@@ -16,6 +16,11 @@ waLBerla_execute_test( NAME SimpleKernelTest )
waLBerla_compile_test( FILES FieldIndexing3DTest.cpp FieldIndexing3DTest.cu ) waLBerla_compile_test( FILES FieldIndexing3DTest.cpp FieldIndexing3DTest.cu )
waLBerla_execute_test( NAME FieldIndexing3DTest ) waLBerla_execute_test( NAME FieldIndexing3DTest )
waLBerla_compile_test( FILES codegen/CodegenJacobiGPU.cpp
codegen/JacobiKernel2D.cuda.gen.py
codegen/JacobiKernel3D.cuda.gen.py
DEPENDS blockforest timeloop gui )
waLBerla_execute_test( NAME CodegenJacobiGPU )
# The following tests work only for CUDA enabled MPI # The following tests work only for CUDA enabled MPI
......
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file JacobiGpu.cpp
//! \author Martin Bauer <martin.bauer@fau.de>
//
//======================================================================================================================
#include "JacobiKernel2D.h"
#include "JacobiKernel3D.h"
#include "cuda/HostFieldAllocator.h"
#include "blockforest/Initialization.h"
#include "blockforest/communication/UniformDirectScheme.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "core/Environment.h"
#include "core/debug/TestSubsystem.h"
#include "cuda/HostFieldAllocator.h"
#include "cuda/FieldCopy.h"
#include "cuda/GPUField.h"
#include "cuda/Kernel.h"
#include "cuda/AddGPUFieldToStorage.h"
#include "cuda/communication/GPUPackInfo.h"
#include "cuda/FieldIndexing.h"
#include "field/AddToStorage.h"
#include "field/communication/UniformMPIDatatypeInfo.h"
#include "field/vtk/VTKWriter.h"
#include "geometry/initializer/ScalarFieldFromGrayScaleImage.h"
#include "gui/Gui.h"
#include "stencil/D2Q9.h"
#include "stencil/D3Q7.h"
#include "timeloop/SweepTimeloop.h"
using namespace walberla;
typedef GhostLayerField<double,1> ScalarField;
typedef cuda::GPUField<double> GPUField;
ScalarField * createField( IBlock* const block, StructuredBlockStorage* const storage )
{
return new ScalarField (
storage->getNumberOfXCells( *block ), // number of cells in x direction per block
storage->getNumberOfYCells( *block ), // number of cells in y direction per block
storage->getNumberOfZCells( *block ), // number of cells in z direction per block
1, // one ghost layer
double(0), // initial value
field::fzyx, // layout
make_shared<cuda::HostFieldAllocator<double> >() // allocator for host pinned memory
);
}
void testJacobi2D()
{
uint_t xSize = 20;
uint_t ySize = 20;
// Create blocks
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction
xSize, ySize, uint_t(1), // how many cells per block (x,y,z)
real_t(1), // dx: length of one cell in physical coordinates
false, // one block per process - "false" means all blocks to one process
true, true, true ); // no periodicity
BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" );
BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" );
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
{
auto f = blockIt->getData<ScalarField>( cpuFieldID );
for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y )
for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x )
f->get( x, y, 0 ) = 1.0;
}
typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme;
typedef cuda::communication::GPUPackInfo<GPUField> Packing;
CommScheme commScheme(blocks);
commScheme.addDataToCommunicate( make_shared<Packing>(gpuField) );
// Create Timeloop
const uint_t numberOfTimesteps = uint_t(800);
SweepTimeloop timeloop ( blocks, numberOfTimesteps );
// Registering the sweep
timeloop.add() << BeforeFunction( commScheme, "Communication" )
<< Sweep( pystencils::JacobiKernel2D(gpuField, 1.0), "Jacobi Kernel" );
cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID );
timeloop.run();
cuda::fieldCpy<ScalarField, GPUField>( blocks, cpuFieldID, gpuField );
auto firstBlock = blocks->begin();
auto f = firstBlock->getData<ScalarField>( cpuFieldID );
WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 4.0));
}
void testJacobi3D()
{
uint_t xSize = 12;
uint_t ySize = 12;
uint_t zSize = 12;
// Create blocks
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction
xSize, ySize, zSize, // how many cells per block (x,y,z)
real_t(1), // dx: length of one cell in physical coordinates
false, // one block per process - "false" means all blocks to one process
true, true, true ); // no periodicity
BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" );
BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" );
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
{
auto f = blockIt->getData<ScalarField>( cpuFieldID );
for( cell_idx_t z = 0; z < cell_idx_c( f->zSize() / 2 ); ++z )
for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y )
for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x )
f->get( x, y, z ) = 1.0;
}
typedef blockforest::communication::UniformBufferedScheme<stencil::D3Q7> CommScheme;
typedef cuda::communication::GPUPackInfo<GPUField> Packing;
CommScheme commScheme(blocks);
commScheme.addDataToCommunicate( make_shared<Packing>(gpuField) );
// Create Timeloop
const uint_t numberOfTimesteps = uint_t(800);
SweepTimeloop timeloop ( blocks, numberOfTimesteps );
// Registering the sweep
timeloop.add() << BeforeFunction( commScheme, "Communication" )
<< Sweep( pystencils::JacobiKernel3D(gpuField, 1.0), "Jacobi Kernel" );
cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID );
timeloop.run();
cuda::fieldCpy<ScalarField, GPUField>( blocks, cpuFieldID, gpuField );
auto firstBlock = blocks->begin();
auto f = firstBlock->getData<ScalarField>( cpuFieldID );
WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 8.0));
}
int main( int argc, char ** argv )
{
mpi::Environment env( argc, argv );
debug::enterTestMode();
testJacobi2D();
testJacobi3D();
return 0;
}
from pystencils_walberla import Sweep
k = Sweep(dim=2)
src = k.field("f1")
dst = k.temporaryField(src)
h = k.constant("h")
rhs = (src[1,0] + src[-1,0] + src[0,1] + src[0, -1] ) / (4 * h**2)
k.addEq(dst[0,0], rhs)
k.generate()
from pystencils_walberla import Sweep
k = Sweep(dim=3)
src = k.field("f1")
dst = k.temporaryField(src)
h = k.constant("h")
rhs = (src[1,0,0] + src[-1,0,0] + src[0,1,0] + src[0, -1, 0] + src[0, 0, 1] + src[0, 0 , -1] ) / (6 * h**2)
k.addEq(dst[0,0,0], rhs)
k.generate()
...@@ -50,3 +50,11 @@ if( WALBERLA_BUILD_WITH_MPI ) ...@@ -50,3 +50,11 @@ if( WALBERLA_BUILD_WITH_MPI )
endif( WALBERLA_BUILD_WITH_MPI ) endif( WALBERLA_BUILD_WITH_MPI )
# CodeGen Tests
waLBerla_compile_test( FILES codegen/CodegenJacobiCPU.cpp codegen/JacobiKernel2D.gen.py codegen/JacobiKernel3D.gen.py
DEPENDS gui timeloop )
waLBerla_execute_test( NAME CodegenJacobiCPU )
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file CodegenJacobiGPU.cpp
//! \author Martin Bauer <martin.bauer@fau.de>
//
//======================================================================================================================
#include "JacobiKernel2D.h"
#include "JacobiKernel3D.h"
#include "blockforest/Initialization.h"
#include "blockforest/communication/UniformDirectScheme.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "core/Environment.h"
#include "core/debug/TestSubsystem.h"
#include "field/AddToStorage.h"
#include "field/communication/PackInfo.h"
#include "gui/Gui.h"
#include "stencil/D2Q9.h"
#include "stencil/D3Q7.h"
#include "timeloop/SweepTimeloop.h"
using namespace walberla;
typedef GhostLayerField<double,1> ScalarField;
void testJacobi2D()
{
uint_t xSize = 20;
uint_t ySize = 20;
// Create blocks
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction
xSize, ySize, uint_t(1), // how many cells per block (x,y,z)
real_t(1), // dx: length of one cell in physical coordinates
false, // one block per process - "false" means all blocks to one process
true, true, true ); // no periodicity
BlockDataID fieldID = field::addToStorage<ScalarField>(blocks, "Field", real_t(0.0));
// Initialize a quarter of the field with ones, the rest remains 0
// Jacobi averages the domain -> every cell should be at 0.25 at sufficiently many timesteps
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
{
auto f = blockIt->getData<ScalarField>( fieldID );
for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y )
for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x )
f->get( x, y, 0 ) = 1.0;
}
typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme;
typedef field::communication::PackInfo<ScalarField> Packing;
CommScheme commScheme(blocks);
commScheme.addDataToCommunicate( make_shared<Packing>(fieldID) );
// Create Timeloop
const uint_t numberOfTimesteps = uint_t(800);
SweepTimeloop timeloop ( blocks, numberOfTimesteps );
// Registering the sweep
timeloop.add() << BeforeFunction( commScheme, "Communication" )
<< Sweep( pystencils::JacobiKernel2D(fieldID, 1.0), "Jacobi Kernel" );
timeloop.run();
auto firstBlock = blocks->begin();
auto f = firstBlock->getData<ScalarField>( fieldID );
WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 4.0));
}
void testJacobi3D()
{
uint_t xSize = 12;
uint_t ySize = 12;
uint_t zSize = 12;
// Create blocks
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction
xSize, ySize, zSize, // how many cells per block (x,y,z)
real_t(1), // dx: length of one cell in physical coordinates
false, // one block per process - "false" means all blocks to one process
true, true, true ); // no periodicity
BlockDataID fieldID = field::addToStorage<ScalarField>(blocks, "Field", real_t(0.0));
// Initialize a quarter of the field with ones, the rest remains 0
// Jacobi averages the domain -> every cell should be at 0.25 at sufficiently many timesteps
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
{
auto f = blockIt->getData<ScalarField>( fieldID );
for( cell_idx_t z = 0; z < cell_idx_c( f->zSize() / 2); ++z )
for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y )
for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x )
f->get( x, y, z ) = 1.0;
}
typedef blockforest::communication::UniformBufferedScheme<stencil::D3Q7> CommScheme;
typedef field::communication::PackInfo<ScalarField> Packing;
CommScheme commScheme(blocks);
commScheme.addDataToCommunicate( make_shared<Packing>(fieldID) );
// Create Timeloop
const uint_t numberOfTimesteps = uint_t(800); // number of timesteps for non-gui runs
SweepTimeloop timeloop ( blocks, numberOfTimesteps );
// Registering the sweep
timeloop.add() << BeforeFunction( commScheme, "Communication" )
<< Sweep( pystencils::JacobiKernel3D(fieldID, 1.0), "Jacobi Kernel" );
timeloop.run();
auto firstBlock = blocks->begin();
auto f = firstBlock->getData<ScalarField>( fieldID );
WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 8.0));
}
int main( int argc, char ** argv )
{
mpi::Environment env( argc, argv );
debug::enterTestMode();
testJacobi2D();
testJacobi3D();
return 0;
}
from pystencils_walberla import Sweep
k = Sweep(dim=2)
src = k.field("f1")
dst = k.temporaryField(src)
h = k.constant("h")
rhs = (src[1,0] + src[-1,0] + src[0,1] + src[0, -1] ) / (4 * h**2)
k.addEq(dst[0,0], rhs)
k.generate()
from pystencils_walberla import Sweep
k = Sweep(dim=3)
src = k.field("f1")
dst = k.temporaryField(src)
h = k.constant("h")
rhs = (src[1,0,0] + src[-1,0,0] + src[0,1,0] + src[0, -1, 0] + src[0, 0, 1] + src[0, 0 , -1] ) / (6 * h**2)
k.addEq(dst[0,0,0], rhs)
k.generate()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment