Test and Bugfixes for the pystencils code generation interface

c43040be · Martin Bauer · c3f6c2c2 · c43040be · c43040be · c43040be
Commit c43040be authored 7 years ago by Martin Bauer
--- a/cmake/waLBerlaHelperFunctions.cmake
+++ b/cmake/waLBerlaHelperFunctions.cmake
@@ -33,20 +33,20 @@ function( handle_python_codegen sourceFilesOut codeGenRequiredOut )
    set(codeGenRequired NO)
    foreach( sourceFile ${ARGN} )
        if( ${sourceFile} MATCHES ".*\\.gen\\.py$" )
-            get_filename_component(sourceFile ${sourceFile} NAME)
+            get_filename_component(sourceFileName ${sourceFile} NAME)
-            if( ${sourceFile} MATCHES ".*\\.cuda\\.gen\\.py$" )
+            if( ${sourceFileName} MATCHES ".*\\.cuda\\.gen\\.py$" )
-                string(REPLACE ".cuda.gen.py" ".h"  genHeaderFile ${sourceFile})
+                string(REPLACE ".cuda.gen.py" ".h"  genHeaderFile ${sourceFileName})
-                string(REPLACE ".cuda.gen.py" ".cu" genSourceFile ${sourceFile})
+                string(REPLACE ".cuda.gen.py" ".cu" genSourceFile ${sourceFileName})
            else()
-                string(REPLACE ".gen.py" ".h"  genHeaderFile ${sourceFile})
+                string(REPLACE ".gen.py" ".h"  genHeaderFile ${sourceFileName})
-                string(REPLACE ".gen.py" ".cpp" genSourceFile ${sourceFile})
+                string(REPLACE ".gen.py" ".cpp" genSourceFile ${sourceFileName})
            endif()
            list(APPEND result ${CMAKE_CURRENT_BINARY_DIR}/${genSourceFile}
                               ${CMAKE_CURRENT_BINARY_DIR}/${genHeaderFile})
            add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${genSourceFile}
                                      ${CMAKE_CURRENT_BINARY_DIR}/${genHeaderFile}
-                               DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile}
+                               DEPENDS ${sourceFile}
-                               COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile}
+                               COMMAND ${PYTHON_EXECUTABLE} ${sourceFile}
                               WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
            include_directories(${CMAKE_CURRENT_BINARY_DIR})
            set(codeGenRequired YES)

--- a/cmake/waLBerlaModuleDependencySystem.cmake
+++ b/cmake/waLBerlaModuleDependencySystem.cmake
@@ -7,11 +7,11 @@
 # Here is an explanation of the waLBerla module mechanism:        
 #  - One folder with a CMakeLists.txt that is a subfolder of one of the directories listed in the variable
 #    WALBERLA_MODULE_DIRS can be a module
-#  - the name of the module is the path relative to an WALBERLA_MODULE_DIRS entry
+#  - the name of the module is the path relative to a WALBERLA_MODULE_DIRS entry
 #  - waLBerla modules are all placed in the src/ subdirectory, so WALBERLA_MODULE_DIRS contains ${waLBerla_SOURCE}/src/
 #  - to create a module call waLBerla_module() inside this folder
-#  - this creates a static library that has the same name as the module, but slashes are replaced by minuses
+#  - this creates a static library that has the same name as the module, but slashes are replaced by minuses.
-#    in case the module contains only header files no static lib is generated, only a custom target is added
+#    In case the module contains only header files no static lib is generated, only a custom target is added
 #    to display the module in Visual Studio.
 #  - waLBerla_module takes a list of dependent modules. A second list of dependencies is generated by parsing
 #    all files in the module for corresponding "#include" lines. This mechanism is not a complete preprocessor

--- a/tests/cuda/CMakeLists.txt
+++ b/tests/cuda/CMakeLists.txt
@@ -16,6 +16,11 @@ waLBerla_execute_test( NAME  SimpleKernelTest )
 waLBerla_compile_test( FILES FieldIndexing3DTest.cpp FieldIndexing3DTest.cu )
 waLBerla_execute_test( NAME  FieldIndexing3DTest )
+waLBerla_compile_test( FILES codegen/CodegenJacobiGPU.cpp
+                             codegen/JacobiKernel2D.cuda.gen.py
+                             codegen/JacobiKernel3D.cuda.gen.py
+                       DEPENDS blockforest timeloop gui )
+waLBerla_execute_test( NAME CodegenJacobiGPU )
 # The following tests work only for CUDA enabled MPI

--- a/tests/cuda/codegen/CodegenJacobiGPU.cpp
+++ b/tests/cuda/codegen/CodegenJacobiGPU.cpp
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file JacobiGpu.cpp
+//! \author Martin Bauer <martin.bauer@fau.de>
+//
+//======================================================================================================================
+#include "JacobiKernel2D.h"
+#include "JacobiKernel3D.h"
+#include "cuda/HostFieldAllocator.h"
+#include "blockforest/Initialization.h"
+#include "blockforest/communication/UniformDirectScheme.h"
+#include "blockforest/communication/UniformBufferedScheme.h"
+#include "core/Environment.h"
+#include "core/debug/TestSubsystem.h"
+#include "cuda/HostFieldAllocator.h"
+#include "cuda/FieldCopy.h"
+#include "cuda/GPUField.h"
+#include "cuda/Kernel.h"
+#include "cuda/AddGPUFieldToStorage.h"
+#include "cuda/communication/GPUPackInfo.h"
+#include "cuda/FieldIndexing.h"
+#include "field/AddToStorage.h"
+#include "field/communication/UniformMPIDatatypeInfo.h"
+#include "field/vtk/VTKWriter.h"
+#include "geometry/initializer/ScalarFieldFromGrayScaleImage.h"
+#include "gui/Gui.h"
+#include "stencil/D2Q9.h"
+#include "stencil/D3Q7.h"
+#include "timeloop/SweepTimeloop.h"
+using namespace walberla;
+typedef GhostLayerField<double,1> ScalarField;
+typedef cuda::GPUField<double> GPUField;
+ScalarField * createField( IBlock* const block, StructuredBlockStorage* const storage )
+{
+   return new ScalarField (
+            storage->getNumberOfXCells( *block ),   // number of cells in x direction per block
+            storage->getNumberOfYCells( *block ),   // number of cells in y direction per block
+            storage->getNumberOfZCells( *block ),   // number of cells in z direction per block
+            1,                                      // one ghost layer
+            double(0),                              // initial value
+            field::fzyx,                            // layout
+            make_shared<cuda::HostFieldAllocator<double> >()  // allocator for host pinned memory
+            );
+}
+void testJacobi2D()
+{
+   uint_t xSize = 20;
+   uint_t ySize = 20;
+   // Create blocks
+   shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
+           uint_t(1) , uint_t(1),  uint_t(1),  // number of blocks in x,y,z direction
+           xSize, ySize, uint_t(1),  // how many cells per block (x,y,z)
+           real_t(1),                          // dx: length of one cell in physical coordinates
+           false,                              // one block per process - "false" means all blocks to one process
+           true, true, true );                 // no periodicity
+   BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" );
+   BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" );
+   for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+   {
+      auto f = blockIt->getData<ScalarField>( cpuFieldID );
+      for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y )
+         for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x )
+            f->get( x, y, 0 ) = 1.0;
+   }
+   typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme;
+   typedef cuda::communication::GPUPackInfo<GPUField> Packing;
+   CommScheme commScheme(blocks);
+   commScheme.addDataToCommunicate( make_shared<Packing>(gpuField) );
+   // Create Timeloop
+   const uint_t numberOfTimesteps = uint_t(800);
+   SweepTimeloop timeloop ( blocks, numberOfTimesteps );
+   // Registering the sweep
+   timeloop.add() << BeforeFunction(  commScheme, "Communication" )
+                  << Sweep( pystencils::JacobiKernel2D(gpuField, 1.0), "Jacobi Kernel" );
+   cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID );
+   timeloop.run();
+   cuda::fieldCpy<ScalarField, GPUField>( blocks, cpuFieldID, gpuField );
+   auto firstBlock = blocks->begin();
+   auto f = firstBlock->getData<ScalarField>( cpuFieldID );
+   WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 4.0));
+}
+void testJacobi3D()
+{
+   uint_t xSize = 12;
+   uint_t ySize = 12;
+   uint_t zSize = 12;
+   // Create blocks
+   shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
+           uint_t(1) , uint_t(1),  uint_t(1),  // number of blocks in x,y,z direction
+           xSize, ySize, zSize,                // how many cells per block (x,y,z)
+           real_t(1),                          // dx: length of one cell in physical coordinates
+           false,                              // one block per process - "false" means all blocks to one process
+           true, true, true );                 // no periodicity
+   BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" );
+   BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" );
+   for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+   {
+      auto f = blockIt->getData<ScalarField>( cpuFieldID );
+      for( cell_idx_t z = 0; z < cell_idx_c( f->zSize() / 2 ); ++z )
+         for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y )
+            for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x )
+               f->get( x, y, z ) = 1.0;
+   }
+   typedef blockforest::communication::UniformBufferedScheme<stencil::D3Q7> CommScheme;
+   typedef cuda::communication::GPUPackInfo<GPUField> Packing;
+   CommScheme commScheme(blocks);
+   commScheme.addDataToCommunicate( make_shared<Packing>(gpuField) );
+   // Create Timeloop
+   const uint_t numberOfTimesteps = uint_t(800);
+   SweepTimeloop timeloop ( blocks, numberOfTimesteps );
+   // Registering the sweep
+   timeloop.add() << BeforeFunction(  commScheme, "Communication" )
+                  << Sweep( pystencils::JacobiKernel3D(gpuField, 1.0), "Jacobi Kernel" );
+   cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID );
+   timeloop.run();
+   cuda::fieldCpy<ScalarField, GPUField>( blocks, cpuFieldID, gpuField );
+   auto firstBlock = blocks->begin();
+   auto f = firstBlock->getData<ScalarField>( cpuFieldID );
+   WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 8.0));
+}
+int main( int argc, char ** argv )
+{
+   mpi::Environment env( argc, argv );
+   debug::enterTestMode();
+   testJacobi2D();
+   testJacobi3D();
+   return 0;
+}
--- a/tests/cuda/codegen/JacobiKernel2D.cuda.gen.py
+++ b/tests/cuda/codegen/JacobiKernel2D.cuda.gen.py
+from pystencils_walberla import Sweep
+k = Sweep(dim=2)
+src = k.field("f1")
+dst = k.temporaryField(src)
+h = k.constant("h")
+rhs = (src[1,0] + src[-1,0] + src[0,1] + src[0, -1] ) / (4 * h**2)
+k.addEq(dst[0,0], rhs)
+k.generate()
--- a/tests/cuda/codegen/JacobiKernel3D.cuda.gen.py
+++ b/tests/cuda/codegen/JacobiKernel3D.cuda.gen.py
+from pystencils_walberla import Sweep
+k = Sweep(dim=3)
+src = k.field("f1")
+dst = k.temporaryField(src)
+h = k.constant("h")
+rhs = (src[1,0,0] + src[-1,0,0] + src[0,1,0] + src[0, -1, 0] + src[0, 0, 1] + src[0, 0 , -1] ) / (6 * h**2)
+k.addEq(dst[0,0,0], rhs)
+k.generate()
--- a/tests/field/CMakeLists.txt
+++ b/tests/field/CMakeLists.txt
@@ -50,3 +50,11 @@ if( WALBERLA_BUILD_WITH_MPI )
 endif( WALBERLA_BUILD_WITH_MPI )
+# CodeGen Tests
+waLBerla_compile_test( FILES codegen/CodegenJacobiCPU.cpp codegen/JacobiKernel2D.gen.py codegen/JacobiKernel3D.gen.py
+                       DEPENDS gui timeloop )
+waLBerla_execute_test( NAME CodegenJacobiCPU )
--- a/tests/field/codegen/CodegenJacobiCPU.cpp
+++ b/tests/field/codegen/CodegenJacobiCPU.cpp
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file CodegenJacobiGPU.cpp
+//! \author Martin Bauer <martin.bauer@fau.de>
+//
+//======================================================================================================================
+#include "JacobiKernel2D.h"
+#include "JacobiKernel3D.h"
+#include "blockforest/Initialization.h"
+#include "blockforest/communication/UniformDirectScheme.h"
+#include "blockforest/communication/UniformBufferedScheme.h"
+#include "core/Environment.h"
+#include "core/debug/TestSubsystem.h"
+#include "field/AddToStorage.h"
+#include "field/communication/PackInfo.h"
+#include "gui/Gui.h"
+#include "stencil/D2Q9.h"
+#include "stencil/D3Q7.h"
+#include "timeloop/SweepTimeloop.h"
+using namespace walberla;
+typedef GhostLayerField<double,1> ScalarField;
+void testJacobi2D()
+{
+   uint_t xSize = 20;
+   uint_t ySize = 20;
+   // Create blocks
+   shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
+           uint_t(1) , uint_t(1),  uint_t(1),  // number of blocks in x,y,z direction
+           xSize, ySize, uint_t(1),            // how many cells per block (x,y,z)
+           real_t(1),                          // dx: length of one cell in physical coordinates
+           false,                              // one block per process - "false" means all blocks to one process
+           true, true, true );                 // no periodicity
+   BlockDataID fieldID = field::addToStorage<ScalarField>(blocks, "Field", real_t(0.0));
+   // Initialize a quarter of the field with ones, the rest remains 0
+   // Jacobi averages the domain -> every cell should be at 0.25 at sufficiently many timesteps
+   for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+   {
+      auto f = blockIt->getData<ScalarField>( fieldID );
+      for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y )
+         for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x )
+            f->get( x, y, 0 ) = 1.0;
+   }
+   typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme;
+   typedef field::communication::PackInfo<ScalarField> Packing;
+   CommScheme commScheme(blocks);
+   commScheme.addDataToCommunicate( make_shared<Packing>(fieldID) );
+   // Create Timeloop
+   const uint_t numberOfTimesteps = uint_t(800);
+   SweepTimeloop timeloop ( blocks, numberOfTimesteps );
+   // Registering the sweep
+   timeloop.add() << BeforeFunction(  commScheme, "Communication" )
+                  << Sweep( pystencils::JacobiKernel2D(fieldID, 1.0), "Jacobi Kernel" );
+   timeloop.run();
+   auto firstBlock = blocks->begin();
+   auto f = firstBlock->getData<ScalarField>( fieldID );
+   WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 4.0));
+}
+void testJacobi3D()
+{
+   uint_t xSize = 12;
+   uint_t ySize = 12;
+   uint_t zSize = 12;
+   // Create blocks
+   shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
+           uint_t(1) , uint_t(1),  uint_t(1),  // number of blocks in x,y,z direction
+           xSize, ySize, zSize,                // how many cells per block (x,y,z)
+           real_t(1),                          // dx: length of one cell in physical coordinates
+           false,                              // one block per process - "false" means all blocks to one process
+           true, true, true );                 // no periodicity
+   BlockDataID fieldID = field::addToStorage<ScalarField>(blocks, "Field", real_t(0.0));
+   // Initialize a quarter of the field with ones, the rest remains 0
+   // Jacobi averages the domain -> every cell should be at 0.25 at sufficiently many timesteps
+   for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+   {
+      auto f = blockIt->getData<ScalarField>( fieldID );
+      for( cell_idx_t z = 0; z < cell_idx_c( f->zSize() / 2); ++z )
+         for( cell_idx_t y = 0; y < cell_idx_c( f->ySize() / 2 ); ++y )
+            for( cell_idx_t x = 0; x < cell_idx_c( f->xSize() / 2 ); ++x )
+               f->get( x, y, z ) = 1.0;
+   }
+   typedef blockforest::communication::UniformBufferedScheme<stencil::D3Q7> CommScheme;
+   typedef field::communication::PackInfo<ScalarField> Packing;
+   CommScheme commScheme(blocks);
+   commScheme.addDataToCommunicate( make_shared<Packing>(fieldID) );
+   // Create Timeloop
+   const uint_t numberOfTimesteps = uint_t(800); // number of timesteps for non-gui runs
+   SweepTimeloop timeloop ( blocks, numberOfTimesteps );
+   // Registering the sweep
+   timeloop.add() << BeforeFunction(  commScheme, "Communication" )
+                  << Sweep( pystencils::JacobiKernel3D(fieldID, 1.0), "Jacobi Kernel" );
+   timeloop.run();
+   auto firstBlock = blocks->begin();
+   auto f = firstBlock->getData<ScalarField>( fieldID );
+   WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 8.0));
+}
+int main( int argc, char ** argv )
+{
+   mpi::Environment env( argc, argv );
+   debug::enterTestMode();
+   testJacobi2D();
+   testJacobi3D();
+   return 0;
+}
--- a/tests/field/codegen/JacobiKernel2D.gen.py
+++ b/tests/field/codegen/JacobiKernel2D.gen.py
+from pystencils_walberla import Sweep
+k = Sweep(dim=2)
+src = k.field("f1")
+dst = k.temporaryField(src)
+h = k.constant("h")
+rhs = (src[1,0] + src[-1,0] + src[0,1] + src[0, -1] ) / (4 * h**2)
+k.addEq(dst[0,0], rhs)
+k.generate()
--- a/tests/field/codegen/JacobiKernel3D.gen.py
+++ b/tests/field/codegen/JacobiKernel3D.gen.py
+from pystencils_walberla import Sweep
+k = Sweep(dim=3)
+src = k.field("f1")
+dst = k.temporaryField(src)
+h = k.constant("h")
+rhs = (src[1,0,0] + src[-1,0,0] + src[0,1,0] + src[0, -1, 0] + src[0, 0, 1] + src[0, 0 , -1] ) / (6 * h**2)
+k.addEq(dst[0,0,0], rhs)
+k.generate()