Commit c43040be authored by Martin Bauer's avatar Martin Bauer

Test and Bugfixes for the pystencils code generation interface

parent c3f6c2c2
......@@ -33,20 +33,20 @@ function( handle_python_codegen sourceFilesOut codeGenRequiredOut )
set(codeGenRequired NO)
foreach( sourceFile ${ARGN} )
if( ${sourceFile} MATCHES ".*\\.gen\\.py$" )
get_filename_component(sourceFile ${sourceFile} NAME)
if( ${sourceFile} MATCHES ".*\\.cuda\\.gen\\.py$" )
string(REPLACE ".cuda.gen.py" ".h" genHeaderFile ${sourceFile})
string(REPLACE ".cuda.gen.py" ".cu" genSourceFile ${sourceFile})
get_filename_component(sourceFileName ${sourceFile} NAME)
if( ${sourceFileName} MATCHES ".*\\.cuda\\.gen\\.py$" )
string(REPLACE ".cuda.gen.py" ".h" genHeaderFile ${sourceFileName})
string(REPLACE ".cuda.gen.py" ".cu" genSourceFile ${sourceFileName})
else()
string(REPLACE ".gen.py" ".h" genHeaderFile ${sourceFile})
string(REPLACE ".gen.py" ".cpp" genSourceFile ${sourceFile})
string(REPLACE ".gen.py" ".h" genHeaderFile ${sourceFileName})
string(REPLACE ".gen.py" ".cpp" genSourceFile ${sourceFileName})
endif()
list(APPEND result ${CMAKE_CURRENT_BINARY_DIR}/${genSourceFile}
${CMAKE_CURRENT_BINARY_DIR}/${genHeaderFile})
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${genSourceFile}
${CMAKE_CURRENT_BINARY_DIR}/${genHeaderFile}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile}
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile}
DEPENDS ${sourceFile}
COMMAND ${PYTHON_EXECUTABLE} ${sourceFile}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
set(codeGenRequired YES)
......
......@@ -7,11 +7,11 @@
# Here is an explanation of the waLBerla module mechanism:
# - One folder with a CMakeLists.txt that is a subfolder of one of the directories listed in the variable
# WALBERLA_MODULE_DIRS can be a module
# - the name of the module is the path relative to an WALBERLA_MODULE_DIRS entry
# - the name of the module is the path relative to a WALBERLA_MODULE_DIRS entry
# - waLBerla modules are all placed in the src/ subdirectory, so WALBERLA_MODULE_DIRS contains ${waLBerla_SOURCE}/src/
# - to create a module call waLBerla_module() inside this folder
# - this creates a static library that has the same name as the module, but slashes are replaced by minuses
# in case the module contains only header files no static lib is generated, only a custom target is added
# - this creates a static library that has the same name as the module, but slashes are replaced by minuses.
# In case the module contains only header files no static lib is generated, only a custom target is added
# to display the module in Visual Studio.
# - waLBerla_module takes a list of dependent modules. A second list of dependencies is generated by parsing
# all files in the module for corresponding "#include" lines. This mechanism is not a complete preprocessor
......
......@@ -16,6 +16,11 @@ waLBerla_execute_test( NAME SimpleKernelTest )
waLBerla_compile_test( FILES FieldIndexing3DTest.cpp FieldIndexing3DTest.cu )
waLBerla_execute_test( NAME FieldIndexing3DTest )
waLBerla_compile_test( FILES codegen/CodegenJacobiGPU.cpp
codegen/JacobiKernel2D.cuda.gen.py
codegen/JacobiKernel3D.cuda.gen.py
DEPENDS blockforest timeloop gui )
waLBerla_execute_test( NAME CodegenJacobiGPU )
# The following tests work only for CUDA enabled MPI
......
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file JacobiGpu.cpp
//! \author Martin Bauer <martin.bauer@fau.de>
//
//======================================================================================================================
#include "JacobiKernel2D.h"
#include "JacobiKernel3D.h"
#include "cuda/HostFieldAllocator.h"
#include "blockforest/Initialization.h"
#include "blockforest/communication/UniformDirectScheme.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "core/Environment.h"
#include "core/debug/TestSubsystem.h"
#include "cuda/HostFieldAllocator.h"
#include "cuda/FieldCopy.h"
#include "cuda/GPUField.h"
#include "cuda/Kernel.h"
#include "cuda/AddGPUFieldToStorage.h"
#include "cuda/communication/GPUPackInfo.h"
#include "cuda/FieldIndexing.h"
#include "field/AddToStorage.h"
#include "field/communication/UniformMPIDatatypeInfo.h"
#include "field/vtk/VTKWriter.h"
#include "geometry/initializer/ScalarFieldFromGrayScaleImage.h"
#include "gui/Gui.h"
#include "stencil/D2Q9.h"
#include "stencil/D3Q7.h"
#include "timeloop/SweepTimeloop.h"
using namespace walberla;
typedef GhostLayerField<double,1> ScalarField;
typedef cuda::GPUField<double> GPUField;
ScalarField * createField( IBlock* const block, StructuredBlockStorage* const storage )
{
return new ScalarField (
storage->getNumberOfXCells( *block ), // number of cells in x direction per block
storage->getNumberOfYCells( *block ), // number of cells in y direction per block
storage->getNumberOfZCells( *block ), // number of cells in z direction per block
1, // one ghost layer
double(0), // initial value
field::fzyx, // layout
make_shared<cuda::HostFieldAllocator<double> >() // allocator for host pinned memory
);
}
void testJacobi2D()
{
uint_t xSize = 20;
uint_t ySize = 20;
// Create blocks
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction
xSize, ySize, uint_t(1), // how many cells per block (x,y,z)
real_t(1), // dx: length of one cell in physical coordinates
false, // one block per process - "false" means all blocks to one process
true, true, true ); // no periodicity
BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" );
BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" );
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
{
auto f = blockIt->getData<ScalarField>( cpuFieldID );
for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y )
for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x )
f->get( x, y, 0 ) = 1.0;
}
typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme;
typedef cuda::communication::GPUPackInfo<GPUField> Packing;
CommScheme commScheme(blocks);
commScheme.addDataToCommunicate( make_shared<Packing>(gpuField) );
// Create Timeloop
const uint_t numberOfTimesteps = uint_t(800);
SweepTimeloop timeloop ( blocks, numberOfTimesteps );
// Registering the sweep
timeloop.add() << BeforeFunction( commScheme, "Communication" )
<< Sweep( pystencils::JacobiKernel2D(gpuField, 1.0), "Jacobi Kernel" );
cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID );
timeloop.run();
cuda::fieldCpy<ScalarField, GPUField>( blocks, cpuFieldID, gpuField );
auto firstBlock = blocks->begin();
auto f = firstBlock->getData<ScalarField>( cpuFieldID );
WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 4.0));
}
void testJacobi3D()
{
uint_t xSize = 12;
uint_t ySize = 12;
uint_t zSize = 12;
// Create blocks
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction
xSize, ySize, zSize, // how many cells per block (x,y,z)
real_t(1), // dx: length of one cell in physical coordinates
false, // one block per process - "false" means all blocks to one process
true, true, true ); // no periodicity
BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" );
BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" );
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
{
auto f = blockIt->getData<ScalarField>( cpuFieldID );
for( cell_idx_t z = 0; z < cell_idx_c( f->zSize() / 2 ); ++z )
for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y )
for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x )
f->get( x, y, z ) = 1.0;
}
typedef blockforest::communication::UniformBufferedScheme<stencil::D3Q7> CommScheme;
typedef cuda::communication::GPUPackInfo<GPUField> Packing;
CommScheme commScheme(blocks);
commScheme.addDataToCommunicate( make_shared<Packing>(gpuField) );
// Create Timeloop
const uint_t numberOfTimesteps = uint_t(800);
SweepTimeloop timeloop ( blocks, numberOfTimesteps );
// Registering the sweep
timeloop.add() << BeforeFunction( commScheme, "Communication" )
<< Sweep( pystencils::JacobiKernel3D(gpuField, 1.0), "Jacobi Kernel" );
cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID );
timeloop.run();
cuda::fieldCpy<ScalarField, GPUField>( blocks, cpuFieldID, gpuField );
auto firstBlock = blocks->begin();
auto f = firstBlock->getData<ScalarField>( cpuFieldID );
WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 8.0));
}
int main( int argc, char ** argv )
{
mpi::Environment env( argc, argv );
debug::enterTestMode();
testJacobi2D();
testJacobi3D();
return 0;
}
from pystencils_walberla import Sweep
k = Sweep(dim=2)
src = k.field("f1")
dst = k.temporaryField(src)
h = k.constant("h")
rhs = (src[1,0] + src[-1,0] + src[0,1] + src[0, -1] ) / (4 * h**2)
k.addEq(dst[0,0], rhs)
k.generate()
from pystencils_walberla import Sweep
k = Sweep(dim=3)
src = k.field("f1")
dst = k.temporaryField(src)
h = k.constant("h")
rhs = (src[1,0,0] + src[-1,0,0] + src[0,1,0] + src[0, -1, 0] + src[0, 0, 1] + src[0, 0 , -1] ) / (6 * h**2)
k.addEq(dst[0,0,0], rhs)
k.generate()
......@@ -50,3 +50,11 @@ if( WALBERLA_BUILD_WITH_MPI )
endif( WALBERLA_BUILD_WITH_MPI )
# CodeGen Tests
waLBerla_compile_test( FILES codegen/CodegenJacobiCPU.cpp codegen/JacobiKernel2D.gen.py codegen/JacobiKernel3D.gen.py
DEPENDS gui timeloop )
waLBerla_execute_test( NAME CodegenJacobiCPU )
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file CodegenJacobiGPU.cpp
//! \author Martin Bauer <martin.bauer@fau.de>
//
//======================================================================================================================
#include "JacobiKernel2D.h"
#include "JacobiKernel3D.h"
#include "blockforest/Initialization.h"
#include "blockforest/communication/UniformDirectScheme.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "core/Environment.h"
#include "core/debug/TestSubsystem.h"
#include "field/AddToStorage.h"
#include "field/communication/PackInfo.h"
#include "gui/Gui.h"
#include "stencil/D2Q9.h"
#include "stencil/D3Q7.h"
#include "timeloop/SweepTimeloop.h"
using namespace walberla;
typedef GhostLayerField<double,1> ScalarField;
void testJacobi2D()
{
uint_t xSize = 20;
uint_t ySize = 20;
// Create blocks
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction
xSize, ySize, uint_t(1), // how many cells per block (x,y,z)
real_t(1), // dx: length of one cell in physical coordinates
false, // one block per process - "false" means all blocks to one process
true, true, true ); // no periodicity
BlockDataID fieldID = field::addToStorage<ScalarField>(blocks, "Field", real_t(0.0));
// Initialize a quarter of the field with ones, the rest remains 0
// Jacobi averages the domain -> every cell should be at 0.25 at sufficiently many timesteps
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
{
auto f = blockIt->getData<ScalarField>( fieldID );
for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y )
for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x )
f->get( x, y, 0 ) = 1.0;
}
typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme;
typedef field::communication::PackInfo<ScalarField> Packing;
CommScheme commScheme(blocks);
commScheme.addDataToCommunicate( make_shared<Packing>(fieldID) );
// Create Timeloop
const uint_t numberOfTimesteps = uint_t(800);
SweepTimeloop timeloop ( blocks, numberOfTimesteps );
// Registering the sweep
timeloop.add() << BeforeFunction( commScheme, "Communication" )
<< Sweep( pystencils::JacobiKernel2D(fieldID, 1.0), "Jacobi Kernel" );
timeloop.run();
auto firstBlock = blocks->begin();
auto f = firstBlock->getData<ScalarField>( fieldID );
WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 4.0));
}
void testJacobi3D()
{
uint_t xSize = 12;
uint_t ySize = 12;
uint_t zSize = 12;
// Create blocks
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
uint_t(1) , uint_t(1), uint_t(1), // number of blocks in x,y,z direction
xSize, ySize, zSize, // how many cells per block (x,y,z)
real_t(1), // dx: length of one cell in physical coordinates
false, // one block per process - "false" means all blocks to one process
true, true, true ); // no periodicity
BlockDataID fieldID = field::addToStorage<ScalarField>(blocks, "Field", real_t(0.0));
// Initialize a quarter of the field with ones, the rest remains 0
// Jacobi averages the domain -> every cell should be at 0.25 at sufficiently many timesteps
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
{
auto f = blockIt->getData<ScalarField>( fieldID );
for( cell_idx_t z = 0; z < cell_idx_c( f->zSize() / 2); ++z )
for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y )
for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x )
f->get( x, y, z ) = 1.0;
}
typedef blockforest::communication::UniformBufferedScheme<stencil::D3Q7> CommScheme;
typedef field::communication::PackInfo<ScalarField> Packing;
CommScheme commScheme(blocks);
commScheme.addDataToCommunicate( make_shared<Packing>(fieldID) );
// Create Timeloop
const uint_t numberOfTimesteps = uint_t(800); // number of timesteps for non-gui runs
SweepTimeloop timeloop ( blocks, numberOfTimesteps );
// Registering the sweep
timeloop.add() << BeforeFunction( commScheme, "Communication" )
<< Sweep( pystencils::JacobiKernel3D(fieldID, 1.0), "Jacobi Kernel" );
timeloop.run();
auto firstBlock = blocks->begin();
auto f = firstBlock->getData<ScalarField>( fieldID );
WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 8.0));
}
int main( int argc, char ** argv )
{
mpi::Environment env( argc, argv );
debug::enterTestMode();
testJacobi2D();
testJacobi3D();
return 0;
}
from pystencils_walberla import Sweep
k = Sweep(dim=2)
src = k.field("f1")
dst = k.temporaryField(src)
h = k.constant("h")
rhs = (src[1,0] + src[-1,0] + src[0,1] + src[0, -1] ) / (4 * h**2)
k.addEq(dst[0,0], rhs)
k.generate()
from pystencils_walberla import Sweep
k = Sweep(dim=3)
src = k.field("f1")
dst = k.temporaryField(src)
h = k.constant("h")
rhs = (src[1,0,0] + src[-1,0,0] + src[0,1,0] + src[0, -1, 0] + src[0, 0, 1] + src[0, 0 , -1] ) / (6 * h**2)
k.addEq(dst[0,0,0], rhs)
k.generate()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment