Commit ef946fcf authored by Christoph Rettinger's avatar Christoph Rettinger
Browse files

Merge branch 'documentation' into 'master'

Documentation maintenance

See merge request walberla/walberla!479
parents 3b00f584 25af88f4
Pipeline #34263 failed with stages
in 26 minutes and 29 seconds
...@@ -1452,7 +1452,7 @@ int main( int argc, char **argv ) ...@@ -1452,7 +1452,7 @@ int main( int argc, char **argv )
WALBERLA_LOG_INFO_ON_ROOT("Refreshing blockforest...") WALBERLA_LOG_INFO_ON_ROOT("Refreshing blockforest...")
// check refinement criterions and refine/coarsen if necessary // check refinement criteria and refine/coarsen if necessary
uint_t stampBefore = blocks->getBlockForest().getModificationStamp(); uint_t stampBefore = blocks->getBlockForest().getModificationStamp();
blocks->refresh(); blocks->refresh();
uint_t stampAfter = blocks->getBlockForest().getModificationStamp(); uint_t stampAfter = blocks->getBlockForest().getModificationStamp();
...@@ -2090,7 +2090,7 @@ int main( int argc, char **argv ) ...@@ -2090,7 +2090,7 @@ int main( int argc, char **argv )
WALBERLA_LOG_INFO_ON_ROOT("Refreshing blockforest...") WALBERLA_LOG_INFO_ON_ROOT("Refreshing blockforest...")
// check refinement criterions and refine/coarsen if necessary // check refinement criteria and refine/coarsen if necessary
uint_t stampBefore = blocks->getBlockForest().getModificationStamp(); uint_t stampBefore = blocks->getBlockForest().getModificationStamp();
blocks->refresh(); blocks->refresh();
uint_t stampAfter = blocks->getBlockForest().getModificationStamp(); uint_t stampAfter = blocks->getBlockForest().getModificationStamp();
......
...@@ -929,7 +929,7 @@ int main( int argc, char **argv ) ...@@ -929,7 +929,7 @@ int main( int argc, char **argv )
if( !useStaticRefinement && refinementCheckFrequency == 0 && numberOfLevels != 1 ) if( !useStaticRefinement && refinementCheckFrequency == 0 && numberOfLevels != 1 )
{ {
// determine check frequency automatically based on maximum admissable velocity and block sizes // determine check frequency automatically based on maximum admissible velocity and block sizes
real_t uMax = real_t(0.1); real_t uMax = real_t(0.1);
real_t refinementCheckFrequencyFinestLevel = ( overlap + real_c(blockSize) - real_t(2) * real_t(FieldGhostLayers) * dx) / uMax; real_t refinementCheckFrequencyFinestLevel = ( overlap + real_c(blockSize) - real_t(2) * real_t(FieldGhostLayers) * dx) / uMax;
refinementCheckFrequency = uint_c( refinementCheckFrequencyFinestLevel / real_t(lbmTimeStepsPerTimeLoopIteration)); refinementCheckFrequency = uint_c( refinementCheckFrequencyFinestLevel / real_t(lbmTimeStepsPerTimeLoopIteration));
...@@ -1252,7 +1252,7 @@ int main( int argc, char **argv ) ...@@ -1252,7 +1252,7 @@ int main( int argc, char **argv )
(*velocityCommunicationScheme)(); (*velocityCommunicationScheme)();
} }
// check refinement criterions and refine/coarsen if necessary // check refinement criteria and refine/coarsen if necessary
uint_t stampBefore = blocks->getBlockForest().getModificationStamp(); uint_t stampBefore = blocks->getBlockForest().getModificationStamp();
blocks->refresh(); blocks->refresh();
uint_t stampAfter = blocks->getBlockForest().getModificationStamp(); uint_t stampAfter = blocks->getBlockForest().getModificationStamp();
......
...@@ -6,7 +6,7 @@ import os ...@@ -6,7 +6,7 @@ import os
class Parameter: class Parameter:
def __init__(self, name, type, defValue="", comment=""): def __init__(self, name, type, defValue="", comment=""):
"""Propery of a data strcuture """Property of a data structure
Parameters Parameters
---------- ----------
......
...@@ -878,7 +878,7 @@ int main( int argc, char **argv ) ...@@ -878,7 +878,7 @@ int main( int argc, char **argv )
real_t defaultOmegaBulk = lbm_mesapd_coupling::omegaBulkFromOmega(omega, real_t(1)); real_t defaultOmegaBulk = lbm_mesapd_coupling::omegaBulkFromOmega(omega, real_t(1));
shared_ptr<OmegaBulkAdapter_T> omegaBulkAdapter = make_shared<OmegaBulkAdapter_T>(blocks, omegaBulkFieldID, accessor, defaultOmegaBulk, omegaBulk, adaptionLayerSize, sphereSelector); shared_ptr<OmegaBulkAdapter_T> omegaBulkAdapter = make_shared<OmegaBulkAdapter_T>(blocks, omegaBulkFieldID, accessor, defaultOmegaBulk, omegaBulk, adaptionLayerSize, sphereSelector);
timeloopAfterParticles.add() << Sweep( makeSharedSweep(omegaBulkAdapter), "Omega Bulk Adapter"); timeloopAfterParticles.add() << Sweep( makeSharedSweep(omegaBulkAdapter), "Omega Bulk Adapter");
// initally adapt // initially adapt
for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) { for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) {
(*omegaBulkAdapter)(blockIt.get()); (*omegaBulkAdapter)(blockIt.get());
} }
......
...@@ -843,7 +843,7 @@ int main( int argc, char **argv ) ...@@ -843,7 +843,7 @@ int main( int argc, char **argv )
auto sphereShape = ss->create<mesa_pd::data::Sphere>( diameter * real_t(0.5) ); auto sphereShape = ss->create<mesa_pd::data::Sphere>( diameter * real_t(0.5) );
ss->shapes[sphereShape]->updateMassAndInertia(densityRatio); ss->shapes[sphereShape]->updateMassAndInertia(densityRatio);
std::mt19937 randomGenerator (static_cast<unsigned int>(2610)); // fixed seed: quasi-random and reproducable std::mt19937 randomGenerator (static_cast<unsigned int>(2610)); // fixed seed: quasi-random and reproducible
for( uint_t nSed = 0; nSed < numberOfSediments; ++nSed ) for( uint_t nSed = 0; nSed < numberOfSediments; ++nSed )
{ {
...@@ -962,7 +962,7 @@ int main( int argc, char **argv ) ...@@ -962,7 +962,7 @@ int main( int argc, char **argv )
if(currentPhase == 1) if(currentPhase == 1)
{ {
// damp velocites to avoid too large ones // damp velocities to avoid too large ones
ps->forEachParticle( useOpenMP, mesa_pd::kernel::SelectLocal(), *accessor, ps->forEachParticle( useOpenMP, mesa_pd::kernel::SelectLocal(), *accessor,
[](const size_t idx, ParticleAccessor_T& ac){ [](const size_t idx, ParticleAccessor_T& ac){
ac.setLinearVelocity(idx, ac.getLinearVelocity(idx) * real_t(0.5)); ac.setLinearVelocity(idx, ac.getLinearVelocity(idx) * real_t(0.5));
......
...@@ -573,7 +573,7 @@ int main( int argc, char **argv ) ...@@ -573,7 +573,7 @@ int main( int argc, char **argv )
if(maxPenetrationDepth < overlapLimit) break; if(maxPenetrationDepth < overlapLimit) break;
// reset velocites to avoid too large ones // reset velocities to avoid too large ones
ps->forEachParticle( useOpenMP, mesa_pd::kernel::SelectLocal(), *accessor, ps->forEachParticle( useOpenMP, mesa_pd::kernel::SelectLocal(), *accessor,
[](const size_t idx, ParticleAccessor_T& ac){ [](const size_t idx, ParticleAccessor_T& ac){
......
...@@ -6,7 +6,7 @@ import os ...@@ -6,7 +6,7 @@ import os
class Parameter: class Parameter:
def __init__(self, name, type, defValue=""): def __init__(self, name, type, defValue=""):
"""Propery of a data strcuture """Property of a data structure
Parameters Parameters
---------- ----------
......
...@@ -1064,7 +1064,7 @@ void keepInflowOutflowAtTheSameLevel( std::vector< std::pair< const Block *, uin ...@@ -1064,7 +1064,7 @@ void keepInflowOutflowAtTheSameLevel( std::vector< std::pair< const Block *, uin
uint_t maxInflowLevel( uint_t(0) ); uint_t maxInflowLevel( uint_t(0) );
uint_t maxOutflowLevel( uint_t(0) ); uint_t maxOutflowLevel( uint_t(0) );
// In addtion to keeping in- and outflow blocks at the same level, this callback also // In addition to keeping in- and outflow blocks at the same level, this callback also
// prevents these blocks from coarsening. // prevents these blocks from coarsening.
for( auto it = minTargetLevels.begin(); it != minTargetLevels.end(); ++it ) for( auto it = minTargetLevels.begin(); it != minTargetLevels.end(); ++it )
...@@ -1472,7 +1472,7 @@ void Evaluation< LatticeModel_T >::operator()() ...@@ -1472,7 +1472,7 @@ void Evaluation< LatticeModel_T >::operator()()
{ {
WALBERLA_LOG_RESULT_ON_ROOT( "force acting on cylinder (in dimensionless lattice units of the coarsest grid - evaluated in time step " WALBERLA_LOG_RESULT_ON_ROOT( "force acting on cylinder (in dimensionless lattice units of the coarsest grid - evaluated in time step "
<< forceEvaluationExecutionCount_ << "):\n " << force_ << oss.str() << << forceEvaluationExecutionCount_ << "):\n " << force_ << oss.str() <<
"\ndrag and lift coefficients (including extremas of last " << ( coefficients_[0].size() * checkFrequency_ ) << " time steps):" "\ndrag and lift coefficients (including extrema of last " << ( coefficients_[0].size() * checkFrequency_ ) << " time steps):"
"\n \"real\" area:" "\n \"real\" area:"
"\n c_D: " << cDRealArea << " (min = " << coefficientExtremas_[0].first << ", max = " << coefficientExtremas_[0].second << ")" << "\n c_D: " << cDRealArea << " (min = " << coefficientExtremas_[0].first << ", max = " << coefficientExtremas_[0].second << ")" <<
"\n c_L: " << cLRealArea << " (min = " << coefficientExtremas_[1].first << ", max = " << coefficientExtremas_[1].second << ")" << "\n c_L: " << cLRealArea << " (min = " << coefficientExtremas_[1].first << ", max = " << coefficientExtremas_[1].second << ")" <<
...@@ -2569,14 +2569,14 @@ void run( const shared_ptr< Config > & config, const LatticeModel_T & latticeMod ...@@ -2569,14 +2569,14 @@ void run( const shared_ptr< Config > & config, const LatticeModel_T & latticeMod
blockforest::DynamicDiffusionBalance< blockforest::NoPhantomData >( maxIterations, flowIterations ) ); blockforest::DynamicDiffusionBalance< blockforest::NoPhantomData >( maxIterations, flowIterations ) );
} }
// add callback functions which are executed after all block data was unpakced after the dynamic load balancing // add callback functions which are executed after all block data was unpacked after the dynamic load balancing
// for blocks that have *not* migrated: store current flag field state (required for lbm::PostProcessing) // for blocks that have *not* migrated: store current flag field state (required for lbm::PostProcessing)
blockforest.addRefreshCallbackFunctionAfterBlockDataIsUnpacked( lbm::MarkerFieldGenerator< LatticeModel_T, field::FlagFieldEvaluationFilter<FlagField_T> >( blockforest.addRefreshCallbackFunctionAfterBlockDataIsUnpacked( lbm::MarkerFieldGenerator< LatticeModel_T, field::FlagFieldEvaluationFilter<FlagField_T> >(
pdfFieldId, markerDataId, flagFieldFilter ) ); pdfFieldId, markerDataId, flagFieldFilter ) );
// (re)set boundaries = (re)initialize flag field for every block with respect to the new block structure (the size of neighbor blocks might have changed) // (re)set boundaries = (re)initialize flag field for every block with respect to the new block structure (the size of neighbor blocks might have changed)
blockforest.addRefreshCallbackFunctionAfterBlockDataIsUnpacked( blockforest::BlockForest::RefreshCallbackWrappper( boundarySetter ) ); blockforest.addRefreshCallbackFunctionAfterBlockDataIsUnpacked( blockforest::BlockForest::RefreshCallbackWrappper( boundarySetter ) );
// treat boundary-fluid cell convertions // treat boundary-fluid cell conversions
blockforest.addRefreshCallbackFunctionAfterBlockDataIsUnpacked( lbm::PostProcessing< LatticeModel_T, field::FlagFieldEvaluationFilter<FlagField_T> >( blockforest.addRefreshCallbackFunctionAfterBlockDataIsUnpacked( lbm::PostProcessing< LatticeModel_T, field::FlagFieldEvaluationFilter<FlagField_T> >(
pdfFieldId, markerDataId, flagFieldFilter ) ); pdfFieldId, markerDataId, flagFieldFilter ) );
// (re)set velocity field (velocity field data is not migrated!) // (re)set velocity field (velocity field data is not migrated!)
...@@ -2920,10 +2920,10 @@ int main( int argc, char **argv ) ...@@ -2920,10 +2920,10 @@ int main( int argc, char **argv )
"// //\n" "// //\n"
"// Schaefer Turek Benchmark //\n" "// Schaefer Turek Benchmark //\n"
"// //\n" "// //\n"
"// Reference: Schaefer, M. and Turek, S. (1996) 'Benchmark computations of laminar flow around a cylinder (with support //\n" "// Reference: Schaefer, M. and Turek, S. (1996) Benchmark computations of laminar flow around a cylinder (with support //\n"
"// by F. Durst, E. Krause and R. Rannacher), in E. Hirschel (Ed.): Flow Simulation with High-Performance //\n" "// by F. Durst, E. Krause and R. Rannacher), in E. Hirschel (Ed.): Flow Simulation with High-Performance //\n"
"// Computers II. DFG Priority Research Program Results 1993-1995, No. 52 in Notes Numer, Fluid Mech., //\n" "// Computers II. DFG Priority Research Program Results 1993-1995, No. 48 in Notes on Numerical Fluid //\n"
"// pp.547-566, Vieweg, Weisbaden. //\n" "// Mechanics, pp.547-566, Vieweg, Weisbaden. //\n"
"// //\n" "// //\n"
"//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////" ); "//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////" );
......
...@@ -295,7 +295,7 @@ class DummySweep ...@@ -295,7 +295,7 @@ class DummySweep
void emptyFunction() {} void emptyFunction() {}
//******************************************************************************************************************* //*******************************************************************************************************************
/*!\brief Simualtion of a strongly heterogeneous sized particulate flow system using combined resolved and unresolved /*!\brief Simulation of a strongly heterogeneous sized particulate flow system using combined resolved and unresolved
* methods. * methods.
* *
* For the coupling of resolved particles the Momentum Exchange Method (MEM) is used, whereas for the * For the coupling of resolved particles the Momentum Exchange Method (MEM) is used, whereas for the
......
...@@ -599,7 +599,7 @@ int main(int argc, char** argv) { ...@@ -599,7 +599,7 @@ int main(int argc, char** argv) {
WALBERLA_CHECK(!(useCurlCriterion && useVorticityCriterion), WALBERLA_CHECK(!(useCurlCriterion && useVorticityCriterion),
"Using curl and vorticity criterion together makes no sense."); "Using curl and vorticity criterion together makes no sense.");
// create base dir if it doesnt already exist // create base dir if it doesn't already exist
filesystem::path bpath(baseFolder); filesystem::path bpath(baseFolder);
if (!filesystem::exists(bpath)) { if (!filesystem::exists(bpath)) {
filesystem::create_directory(bpath); filesystem::create_directory(bpath);
......
...@@ -236,7 +236,7 @@ There is a third sweep concept called "Sweep on Block" where the members of the ...@@ -236,7 +236,7 @@ There is a third sweep concept called "Sweep on Block" where the members of the
are in fact block local data.\n are in fact block local data.\n
This is described in more detail here: \ref sweepTimeloop_onBlock This is described in more detail here: \ref sweepTimeloop_onBlock
The next tutorial covers writing a real algorithm, which also requires communication among blocks: \ref tutorial03 The next tutorial covers writing a real algorithm, which also requires communication among blocks: \ref tutorial_basics_03
\tableofcontents \tableofcontents
......
...@@ -22,7 +22,7 @@ For interactive devolvement, the next section can be written in a <a target="_bl ...@@ -22,7 +22,7 @@ For interactive devolvement, the next section can be written in a <a target="_bl
First, we introduce the variables contained in the PDE and its discretization as symbols. For the two-grid algorithm, we require one source field `u` and one destination field `u_tmp`. Both are set as generic 2-dimensional fields. We explicitly set their memory layout to `fzyx`. Both waLBerla and pystencils support two kinds of memory layouts. The short `fzyx` lists the four domain dimensions (three spatial, one for values per cell) in the order of arrangement in memory. `fzyx` describes a Struct of Arrays (SOA) layout where the domain is split along `f` and then linearized. When iterating, the outermost loop runs over `f`, and the innermost loop runs over `x`. The alternative is an %Array of Structs layout (AOS) which is designated `zyxf`, iterating over `f` in the innermost loop. In our case, where we only have one value per cell, it does not matter which layout is selected. In contrast, for simulating an Advection-Diffusion-Process with multiple, independent particle distributions, `fzyx` performs better in most cases as it improves data locality and enables vectorization (SIMD, SIMT). For more information on SOA and AOS, consider <a target="_blank" href="https://software.intel.com/content/www/us/en/develop/articles/memory-layout-transformations.html">this</a> article. First, we introduce the variables contained in the PDE and its discretization as symbols. For the two-grid algorithm, we require one source field `u` and one destination field `u_tmp`. Both are set as generic 2-dimensional fields. We explicitly set their memory layout to `fzyx`. Both waLBerla and pystencils support two kinds of memory layouts. The short `fzyx` lists the four domain dimensions (three spatial, one for values per cell) in the order of arrangement in memory. `fzyx` describes a Struct of Arrays (SOA) layout where the domain is split along `f` and then linearized. When iterating, the outermost loop runs over `f`, and the innermost loop runs over `x`. The alternative is an %Array of Structs layout (AOS) which is designated `zyxf`, iterating over `f` in the innermost loop. In our case, where we only have one value per cell, it does not matter which layout is selected. In contrast, for simulating an Advection-Diffusion-Process with multiple, independent particle distributions, `fzyx` performs better in most cases as it improves data locality and enables vectorization (SIMD, SIMT). For more information on SOA and AOS, consider <a target="_blank" href="https://software.intel.com/content/www/us/en/develop/articles/memory-layout-transformations.html">this</a> article.
\code \code{.py}
u, u_tmp = ps.fields("u, u_tmp: [2D]", layout='fzyx') u, u_tmp = ps.fields("u, u_tmp: [2D]", layout='fzyx')
kappa = sp.Symbol("kappa") kappa = sp.Symbol("kappa")
dx = sp.Symbol("dx") dx = sp.Symbol("dx")
...@@ -31,7 +31,7 @@ dt = sp.Symbol("dt") ...@@ -31,7 +31,7 @@ dt = sp.Symbol("dt")
With the pystencils buildings blocks, we can directly define the time and spatial derivative of the PDE. With the pystencils buildings blocks, we can directly define the time and spatial derivative of the PDE.
\code \code{.py}
heat_pde = ps.fd.transient(u) - kappa * ( ps.fd.diff( u, 0, 0 ) + ps.fd.diff( u, 1, 1 ) ) heat_pde = ps.fd.transient(u) - kappa * ( ps.fd.diff( u, 0, 0 ) + ps.fd.diff( u, 1, 1 ) )
\endcode \endcode
...@@ -42,7 +42,7 @@ Printing `heat_pde` inside a Jupyter notebook shows the equation as: ...@@ -42,7 +42,7 @@ Printing `heat_pde` inside a Jupyter notebook shows the equation as:
Next, the PDE will be discretized. We use the `Discretization2ndOrder` class to apply finite differences discretization to the spatial components, and explicit Euler discretization for the time step. Next, the PDE will be discretized. We use the `Discretization2ndOrder` class to apply finite differences discretization to the spatial components, and explicit Euler discretization for the time step.
\code \code{.py}
discretize = ps.fd.Discretization2ndOrder(dx=dx, dt=dt) discretize = ps.fd.Discretization2ndOrder(dx=dx, dt=dt)
heat_pde_discretized = discretize(heat_pde) heat_pde_discretized = discretize(heat_pde)
\endcode \endcode
...@@ -55,7 +55,7 @@ Printing `heat_pde_discretized` reveals ...@@ -55,7 +55,7 @@ Printing `heat_pde_discretized` reveals
This equation can be simplified by combining the two fractions on the right-hand side. Furthermore, we would like to pre-calculate the division outside the loop of the compute kernel. To achieve this, we will first apply the simplification functionality of sympy, and then replace the division by introducing a subexpression. This equation can be simplified by combining the two fractions on the right-hand side. Furthermore, we would like to pre-calculate the division outside the loop of the compute kernel. To achieve this, we will first apply the simplification functionality of sympy, and then replace the division by introducing a subexpression.
\code \code{.py}
heat_pde_discretized = heat_pde_discretized.args[1] + heat_pde_discretized.args[0].simplify() heat_pde_discretized = heat_pde_discretized.args[1] + heat_pde_discretized.args[0].simplify()
@ps.kernel @ps.kernel
...@@ -85,7 +85,7 @@ We will now use the waLBerla build system to generate a sweep from this symbolic ...@@ -85,7 +85,7 @@ We will now use the waLBerla build system to generate a sweep from this symbolic
We create a python file called *HeatEquationKernel.py* in our application folder. This file contains the python code we have developed above. Additionally, to `sympy` and `pystencils`, we add the import directive `from pystencils_walberla import CodeGeneration, generate_sweep`. At the end of the file, we add these two lines: We create a python file called *HeatEquationKernel.py* in our application folder. This file contains the python code we have developed above. Additionally, to `sympy` and `pystencils`, we add the import directive `from pystencils_walberla import CodeGeneration, generate_sweep`. At the end of the file, we add these two lines:
\code \code{.py}
with CodeGeneration() as ctx: with CodeGeneration() as ctx:
generate_sweep(ctx, 'HeatEquationKernel', ac) generate_sweep(ctx, 'HeatEquationKernel', ac)
\endcode \endcode
...@@ -94,7 +94,7 @@ The `CodeGeneration` context and the function `generate_sweep` are provided by w ...@@ -94,7 +94,7 @@ The `CodeGeneration` context and the function `generate_sweep` are provided by w
The code generation script will later be called by the build system while compiling the application. The complete script looks like this: The code generation script will later be called by the build system while compiling the application. The complete script looks like this:
\code \code{.py}
import sympy as sp import sympy as sp
import pystencils as ps import pystencils as ps
from pystencils_walberla import CodeGeneration, generate_sweep from pystencils_walberla import CodeGeneration, generate_sweep
...@@ -124,7 +124,7 @@ with CodeGeneration() as ctx: ...@@ -124,7 +124,7 @@ with CodeGeneration() as ctx:
\endcode \endcode
As a next step, we register the script with the CMake build system. Outside of our application folder, open *CMakeLists.txt* and add these lines (replace `codegen` by the name of your folder): As a next step, we register the script with the CMake build system. Outside of our application folder, open *CMakeLists.txt* and add these lines (replace `codegen` by the name of your folder):
\code \code{.unparsed}
if( WALBERLA_BUILD_WITH_CODEGEN ) if( WALBERLA_BUILD_WITH_CODEGEN )
add_subdirectory(codegen) add_subdirectory(codegen)
endif() endif()
...@@ -132,7 +132,7 @@ endif() ...@@ -132,7 +132,7 @@ endif()
The `if` block makes sure our application is only built if the CMake flag `WALBERLA_BUILD_WITH_CODEGEN` is set. In the application folder, create another *CMakeLists.txt* file. For registering a code generation target, the build system provides the `walberla_generate_target_from_python` macro. Apart from the target name, we need to pass it the name of our python script and the names of the generated C++ header and source files. Their names need to match the class name passed to `generate_sweep` in the script. Add the following lines to your *CMakeLists.txt*. The `if` block makes sure our application is only built if the CMake flag `WALBERLA_BUILD_WITH_CODEGEN` is set. In the application folder, create another *CMakeLists.txt* file. For registering a code generation target, the build system provides the `walberla_generate_target_from_python` macro. Apart from the target name, we need to pass it the name of our python script and the names of the generated C++ header and source files. Their names need to match the class name passed to `generate_sweep` in the script. Add the following lines to your *CMakeLists.txt*.
\code \code{.unparsed}
if( WALBERLA_BUILD_WITH_CODEGEN ) if( WALBERLA_BUILD_WITH_CODEGEN )
walberla_generate_target_from_python( NAME CodegenHeatEquationKernel walberla_generate_target_from_python( NAME CodegenHeatEquationKernel
FILE HeatEquationKernel.py FILE HeatEquationKernel.py
...@@ -148,7 +148,7 @@ When running `make` again at a later time, the code will only be regenerated if ...@@ -148,7 +148,7 @@ When running `make` again at a later time, the code will only be regenerated if
Finally, we can use the generated sweep in an actual waLBerla application. In the application folder, create the source file *01_CodegenHeatEquation.cpp*. Open *CMakeLists.txt* and register the source file as an executable using the macro `walberla_add_executable`. Add all required waLBerla modules as dependencies, as well as the generated target. Finally, we can use the generated sweep in an actual waLBerla application. In the application folder, create the source file *01_CodegenHeatEquation.cpp*. Open *CMakeLists.txt* and register the source file as an executable using the macro `walberla_add_executable`. Add all required waLBerla modules as dependencies, as well as the generated target.
\code \code{.unparsed}
walberla_add_executable ( NAME 01_CodegenHeatEquation walberla_add_executable ( NAME 01_CodegenHeatEquation
FILES 01_CodegenHeatEquation.cpp FILES 01_CodegenHeatEquation.cpp
DEPENDS blockforest core field stencil timeloop vtk pde CodegenHeatEquationKernel ) DEPENDS blockforest core field stencil timeloop vtk pde CodegenHeatEquationKernel )
......
...@@ -22,7 +22,7 @@ In the code generation python script, we first require a few imports from lbmpy ...@@ -22,7 +22,7 @@ In the code generation python script, we first require a few imports from lbmpy
From the `lbmpy.creationfunctions` we require the functions to create collision and update rules. For the actual code generation, `generate_lattice_model` from `lbmpy_walberla` is required. Since we will define symbols, `SymPy` is also needed. From the `lbmpy.creationfunctions` we require the functions to create collision and update rules. For the actual code generation, `generate_lattice_model` from `lbmpy_walberla` is required. Since we will define symbols, `SymPy` is also needed.
\code \code{.py}
import sympy as sp import sympy as sp
from lbmpy.creationfunctions import create_lb_collision_rule, create_lb_update_rule from lbmpy.creationfunctions import create_lb_collision_rule, create_lb_update_rule
...@@ -32,7 +32,7 @@ from lbmpy_walberla import generate_lattice_model ...@@ -32,7 +32,7 @@ from lbmpy_walberla import generate_lattice_model
\endcode \endcode
First, we define a few general parameters. These include the stencil (D2Q9) and the memory layout (`fzyx`, see \ref tutorial_codegen01 ). We define a SymPy symbol for the relaxation rate \f$ \omega \f$. This means we can later set it to a specific value from the waLBerla code. A dictionary with optimization parameters is also set up. Here, we enable global common subexpression elimination (`cse_global`) and set the PDF field's memory layout. First, we define a few general parameters. These include the stencil (D2Q9) and the memory layout (`fzyx`, see \ref tutorial_codegen01 ). We define a SymPy symbol for the relaxation rate \f$ \omega \f$. This means we can later set it to a specific value from the waLBerla code. A dictionary with optimization parameters is also set up. Here, we enable global common subexpression elimination (`cse_global`) and set the PDF field's memory layout.
\code \code{.py}
stencil = 'D2Q9' stencil = 'D2Q9'
omega = sp.Symbol('omega') omega = sp.Symbol('omega')
layout = 'fzyx' layout = 'fzyx'
...@@ -45,7 +45,7 @@ Next, we set the parameters for the SRT method in a dictionary and create both t ...@@ -45,7 +45,7 @@ Next, we set the parameters for the SRT method in a dictionary and create both t
The update rule is still needed in the code generation process; namely for the pack info generation. The collision step only acts within one cell. Thus, the collision rule's equations contain no neighbour accesses. Calling `create_lb_update_rule` inserts the two-fields pull scheme as `generate_lattice_model`, and resulting update rule contains exactly those neighbour accesses which are required for `generate_pack_info_from_kernel` to build the optimized pack info. The update rule is still needed in the code generation process; namely for the pack info generation. The collision step only acts within one cell. Thus, the collision rule's equations contain no neighbour accesses. Calling `create_lb_update_rule` inserts the two-fields pull scheme as `generate_lattice_model`, and resulting update rule contains exactly those neighbour accesses which are required for `generate_pack_info_from_kernel` to build the optimized pack info.
\code \code{.py}
srt_params = {'stencil': stencil, srt_params = {'stencil': stencil,
'method': 'srt', 'method': 'srt',
'relaxation_rate': omega} 'relaxation_rate': omega}
...@@ -56,7 +56,7 @@ srt_update_rule = create_lb_update_rule(collision_rule=srt_collision_rule, optim ...@@ -56,7 +56,7 @@ srt_update_rule = create_lb_update_rule(collision_rule=srt_collision_rule, optim
Finally, we create the code generation context and call the respective functions for generating the lattice model and the pack info. Both require the context and a class name as parameters. To `generate_lattice_model`, we also pass the collision rule and the field layout; `generate_pack_info_from_kernel` receives the update rule. Finally, we create the code generation context and call the respective functions for generating the lattice model and the pack info. Both require the context and a class name as parameters. To `generate_lattice_model`, we also pass the collision rule and the field layout; `generate_pack_info_from_kernel` receives the update rule.
\code \code{.py}
with CodeGeneration() as ctx: with CodeGeneration() as ctx:
generate_lattice_model(ctx, "SRTLatticeModel", srt_collision_rule, field_layout=layout) generate_lattice_model(ctx, "SRTLatticeModel", srt_collision_rule, field_layout=layout)
generate_pack_info_from_kernel(ctx, "SRTPackInfo", srt_update_rule) generate_pack_info_from_kernel(ctx, "SRTPackInfo", srt_update_rule)
...@@ -68,7 +68,7 @@ Furthermore, if we optimise the waLBerla for the machine, it is compiled on with ...@@ -68,7 +68,7 @@ Furthermore, if we optimise the waLBerla for the machine, it is compiled on with
As a final touch, we still need to set up the CMake build target for the code generation script. This time, two distinct classes (the lattice model and the pack information) will be generated. Therefore, we need to list the header and source file names for both classes separately. As a final touch, we still need to set up the CMake build target for the code generation script. This time, two distinct classes (the lattice model and the pack information) will be generated. Therefore, we need to list the header and source file names for both classes separately.
\code \code{.unparsed}
walberla_generate_target_from_python( NAME 02_LBMLatticeModelGenerationPython walberla_generate_target_from_python( NAME 02_LBMLatticeModelGenerationPython
FILE 02_LBMLatticeModelGeneration.py FILE 02_LBMLatticeModelGeneration.py
OUT_FILES SRTLatticeModel.cpp SRTLatticeModel.h OUT_FILES SRTLatticeModel.cpp SRTLatticeModel.h
......
...@@ -3,7 +3,7 @@ namespace walberla{ ...@@ -3,7 +3,7 @@ namespace walberla{
/** /**
\page tutorial_codegen03 Tutorial - Code Generation 3: Advanced LBM Code Generation \page tutorial_codegen03 Tutorial - Code Generation 3: Advanced LBM Code Generation
\section overview Overview \section advancedlbmcodegen_overview Overview
This tutorial demonstrates how to use [pystencils](https://pycodegen.pages.i10git.cs.fau.de/pystencils) and [lbmpy](https://pycodegen.pages.i10git.cs.fau.de/lbmpy) to generate highly optimised and hardware-specific Lattice Boltzmann simulation code within the waLBerla framework. Other than in \ref tutorial_codegen02, we will be generating a full LBM sweep instead of a lattice model class. Furthermore, we will generate a communication pack info class and a sweep to initialise the PDF field. A hardware-specific implementation of a NoSlip boundary handler will also be generated. Those components will then be combined in a waLBerla application for simulating the same shear flow scenario as in the previous tutorial. This tutorial demonstrates how to use [pystencils](https://pycodegen.pages.i10git.cs.fau.de/pystencils) and [lbmpy](https://pycodegen.pages.i10git.cs.fau.de/lbmpy) to generate highly optimised and hardware-specific Lattice Boltzmann simulation code within the waLBerla framework. Other than in \ref tutorial_codegen02, we will be generating a full LBM sweep instead of a lattice model class. Furthermore, we will generate a communication pack info class and a sweep to initialise the PDF field. A hardware-specific implementation of a NoSlip boundary handler will also be generated. Those components will then be combined in a waLBerla application for simulating the same shear flow scenario as in the previous tutorial.
...@@ -19,7 +19,7 @@ For the stream-pull-collide type kernel, we need two PDF fields which we set up ...@@ -19,7 +19,7 @@ For the stream-pull-collide type kernel, we need two PDF fields which we set up
For VTK output and the initial velocity setup, we define a velocity vector field as an output field for the LB method. For VTK output and the initial velocity setup, we define a velocity vector field as an output field for the LB method.
\code \code{.py}
stencil = 'D2Q9' stencil = 'D2Q9'
omega = sp.Symbol('omega') omega = sp.Symbol('omega')
layout = 'fzyx' layout = 'fzyx'
...@@ -40,7 +40,7 @@ optimization = {'cse_global': True, ...@@ -40,7 +40,7 @@ optimization = {'cse_global': True,
We set up the cumulant-based MRT method with relaxation rates as described above. We use `generate_lb_update_rule` from lbmpy to derive the set of equations describing the collision operator together with the *pull* streaming pattern. These equations define the entire LBM sweep. We set up the cumulant-based MRT method with relaxation rates as described above. We use `generate_lb_update_rule` from lbmpy to derive the set of equations describing the collision operator together with the *pull* streaming pattern. These equations define the entire LBM sweep.
\code \code{.py}
lbm_params = {'stencil': stencil, lbm_params = {'stencil': stencil,
'method': 'mrt_raw', 'method': 'mrt_raw',
'relaxation_rates': [0, 0, 0, omega, omega, omega, 1, 1, 1], 'relaxation_rates': [0, 0, 0, omega, omega, omega, 1, 1, 1],
...@@ -56,7 +56,7 @@ lbm_method = lbm_update_rule.method ...@@ -56,7 +56,7 @@ lbm_method = lbm_update_rule.method
In \ref tutorial_codegen02, we were able to use the framework built around the waLBerla lattice model template API for setting up the shear flow's initial velocity profile. Since we are not using a lattice model class this time, this API is not available to us. With lbmpy, though, we can generate a kernel which takes in scalar values or fields for the initial density and velocity and sets the initial PDF values to the corresponding equilibrium. The function `macroscopic_values_setter` from `lbmpy.macroscopic_value_kernels` returns a set of assignments for this initialization procedure. It takes the LB method definition as an argument, as well as either symbols or pystencils field accesses for the initial density `rho` and the initial velocity. Lastly, it takes the PDF field's centre vector as the destination for the PDF values. We define a separate symbol for the density and use the velocity field defined above. In \ref tutorial_codegen02, we were able to use the framework built around the waLBerla lattice model template API for setting up the shear flow's initial velocity profile. Since we are not using a lattice model class this time, this API is not available to us. With lbmpy, though, we can generate a kernel which takes in scalar values or fields for the initial density and velocity and sets the initial PDF values to the corresponding equilibrium. The function `macroscopic_values_setter` from `lbmpy.macroscopic_value_kernels` returns a set of assignments for this initialization procedure. It takes the LB method definition as an argument, as well as either symbols or pystencils field accesses for the initial density `rho` and the initial velocity. Lastly, it takes the PDF field's centre vector as the destination for the PDF values. We define a separate symbol for the density and use the velocity field defined above.
\code \code{.py}
initial_rho = sp.Symbol('rho_0') initial_rho = sp.Symbol('rho_0')
pdfs_setter = macroscopic_values_setter(lbm_method, pdfs_setter = macroscopic_values_setter(lbm_method,
...@@ -74,7 +74,7 @@ Several functions from `pystencils_walberla` and `lbmpy_walberla` are called to ...@@ -74,7 +74,7 @@ Several functions from `pystencils_walberla` and `lbmpy_walberla` are called to
- The PDF initialization kernel is generated from the `pdfs_setter` assignment collection using `generate_sweep`. - The PDF initialization kernel is generated from the `pdfs_setter` assignment collection using `generate_sweep`.
- Using `generate_boundary`, we generate an optimised implementation of a NoSlip boundary handler for the domain's walls. - Using `generate_boundary`, we generate an optimised implementation of a NoSlip boundary handler for the domain's walls.
\code \code{.py}
with CodeGeneration() as ctx: with CodeGeneration() as ctx:
if ctx.cuda: if ctx.cuda:
target = 'gpu' target = 'gpu'
...@@ -104,7 +104,7 @@ We will now integrate the generated classes into a waLBerla application. After a ...@@ -104,7 +104,7 @@ We will now integrate the generated classes into a waLBerla application. After a
#include "CumulantMRTNoSlip.h" #include "CumulantMRTNoSlip.h"
#include "CumulantMRTPackInfo.h" #include "CumulantMRTPackInfo.h"
#include "CumulantMRTSweep.h" #include "CumulantMRTSweep.h"
#include "DensityAndVelocityFieldSetter.h" #include "InitialPDFsSetter.h"
\endcode \endcode
We set up typedef aliases for the generated pack info and the D2Q9 stencil. For the PDF and velocity fields, we use instances of the field::GhostLayerField template. The number of entries of the PDF field is specified by the `Stencil_T::Size` parameter. As our domain is two-dimensional, the velocity at each lattice node is a two-dimensional vector. Thus, we set up the velocity field to have two index dimensions passing the stencil's dimension as a template parameter. Finally, we also define a typedef alias for our generated NoSlip boundary. We set up typedef aliases for the generated pack info and the D2Q9 stencil. For the PDF and velocity fields, we use instances of the field::GhostLayerField template. The number of entries of the PDF field is specified by the `Stencil_T::Size` parameter. As our domain is two-dimensional, the velocity at each lattice node is a two-dimensional vector. Thus, we set up the velocity field to have two index dimensions passing the stencil's dimension as a template parameter. Finally, we also define a typedef alias for our generated NoSlip boundary.
......
...@@ -134,11 +134,11 @@ ...@@ -134,11 +134,11 @@
AssignmentCollection: u_tmp_C, <- f(u_S, u_N, u_E, u_W, dt, u_C, kappa, dx) AssignmentCollection: u_tmp_C, <- f(u_S, u_N, u_E, u_W, dt, u_C, kappa, dx)
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Our numeric solver's symbolic representation is now complete! Next, we use pystencils to generate and compile a C implementation of our kernel. The code is generated as shown below, compiled into a shared libary and then bound to `kernel_func`. All unbound sympy symbols (`dx`, `dt` and `kappa`) as well as the fields `u` and `u_tmp` are arguments to the generated kernel function. Our numeric solver's symbolic representation is now complete! Next, we use pystencils to generate and compile a C implementation of our kernel. The code is generated as shown below, compiled into a shared library and then bound to `kernel_func`. All unbound sympy symbols (`dx`, `dt` and `kappa`) as well as the fields `u` and `u_tmp` are arguments to the generated kernel function.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
kernel_ast = ps.create_kernel(update, cpu_openmp = 4) kernel_ast = ps.create_kernel(update, cpu_openmp = 4)
...@@ -155,11 +155,11 @@ ...@@ -155,11 +155,11 @@
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Prototype Simulation ### Prototype Simulation
We can set up and run a simple simulation wich the generated kernel right here. The first step is to set up the fields and simulation parameters. We can set up and run a simple simulation with the generated kernel right here. The first step is to set up the fields and simulation parameters.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
domain_size = 1.0 domain_size = 1.0
......
...@@ -130,7 +130,7 @@ CommScheme communication( blocks ); ...@@ -130,7 +130,7 @@ CommScheme communication( blocks );
communication.addDataToCommunicate( make_shared<field::communication::UniformMPIDatatypeInfo<GPUField> > (gpuFieldSrcID) ); communication.addDataToCommunicate( make_shared<field::communication::UniformMPIDatatypeInfo<GPUField> > (gpuFieldSrcID) );
\endcode \endcode
This scheme also supports heterogenous simulations, i.e. using a CPU field on This scheme also supports heterogeneous simulations, i.e. using a CPU field on
some processes and a GPU field on other processes. some processes and a GPU field on other processes.
*/ */
......
...@@ -5,7 +5,7 @@ namespace walberla { ...@@ -5,7 +5,7 @@ namespace walberla {