From 690183aa39d7fef07126162e1baa21ebeb98e663 Mon Sep 17 00:00:00 2001
From: markus holzer <markus.holzer@fau.de>
Date: Tue, 15 Sep 2020 20:45:38 +0200
Subject: [PATCH] Updated tutorial 02

---
 .../codegen/02_LBMLatticeModelGeneration.cpp  |  1 -
 .../codegen/02_LBMLatticeModelGeneration.dox  | 64 +++++++++----------
 .../codegen/02_LBMLatticeModelGeneration.prm  |  4 +-
 .../codegen/02_LBMLatticeModelGeneration.py   | 22 ++++---
 4 files changed, 45 insertions(+), 46 deletions(-)

diff --git a/apps/tutorials/codegen/02_LBMLatticeModelGeneration.cpp b/apps/tutorials/codegen/02_LBMLatticeModelGeneration.cpp
index af5346046..934ab64ff 100644
--- a/apps/tutorials/codegen/02_LBMLatticeModelGeneration.cpp
+++ b/apps/tutorials/codegen/02_LBMLatticeModelGeneration.cpp
@@ -29,7 +29,6 @@
 #include "geometry/all.h"
 
 #include "lbm/boundary/factories/DefaultBoundaryHandling.h"
-#include "lbm/communication/PdfFieldPackInfo.h"
 #include "lbm/field/AddToStorage.h"
 #include "lbm/field/PdfField.h"
 #include "lbm/field/initializer/all.h"
diff --git a/apps/tutorials/codegen/02_LBMLatticeModelGeneration.dox b/apps/tutorials/codegen/02_LBMLatticeModelGeneration.dox
index d67b6e8f0..781378aec 100644
--- a/apps/tutorials/codegen/02_LBMLatticeModelGeneration.dox
+++ b/apps/tutorials/codegen/02_LBMLatticeModelGeneration.dox
@@ -5,23 +5,22 @@ namespace walberla{
 
 \section overview Overview
 
-This tutorial demonstrates how to use <a href="https://pycodegen.pages.i10git.cs.fau.de/lbmpy/notebooks/00_tutorial_lbmpy_walberla_overview.html" target="_blank">lbmpy</a> in conjunction with waLBerla to generate efficient implementations of various Lattice Boltzmann Methods to be included in large-scale distributed memory fluid flow simulations. While waLBerla provides an advanced framework for setting up and running complex fluid simulations, it only implements a small set of Lattice Boltzmann collision operators. Writing an efficient C++ implementation of an advanced Lattice-Boltzmann method can be very cumbersome. For this reason, lbmpy was developed. It is a Python framework which allows to define a set of LB equations at different levels of abstraction, and then generate a highly optimized C implementation of these equations. An introduction to lbmpy can be found <a href="https://pycodegen.pages.i10git.cs.fau.de/lbmpy/notebooks/00_tutorial_lbmpy_walberla_overview.html" target="_blank">here</a>. 
-
+This tutorial demonstrates how to use <a href="https://pycodegen.pages.i10git.cs.fau.de/lbmpy/notebooks/00_tutorial_lbmpy_walberla_overview.html" target="_blank">lbmpy</a> with waLBerla to generate efficient implementations of various Lattice Boltzmann Methods (LBM) to be included in large-scale distributed memory fluid flow simulations. While waLBerla provides an advanced framework for setting up and running complex fluid simulations, lbmpy brings the flexibility to generate highly optimized LBMs for CPUs and GPUs. Manually writing an efficient C++ or GPU implementation of an advanced LBM can be very cumbersome. Especially because the intensive compute kernel needs to be optimized for specific hardware. For this reason, lbmpy was developed. It is a Python framework which allows defining a set of LB equations at an abstract level, which allows developing on the mathematical description of the problem directly and then generates a highly optimized C, OpenCL or CUDA implementation of these equations. An introduction to lbmpy can be found <a href="https://pycodegen.pages.i10git.cs.fau.de/lbmpy/notebooks/00_tutorial_lbmpy_walberla_overview.html" target="_blank">here</a>.
 
 As in the previous tutorial (\ref tutorial_codegen01), we will first define the numeric methods and set up code generation in a Python script. We will then include the generated code in a waLBerla application to simulate a two-dimensional shear flow in a periodic domain. Additionally, it will be shown how a waLBerla simulation with complex initial conditions can be initialized using parameter files.
 
 \section lbmpy_codegen_script Setting up code generation in Python
 
-In this section, we will define an SRT collision operator in Python using lbmpy and generate a waLBerla lattice model implementation from its equations. A lattice model defines several things, including:
+In this section, we will define a single relaxation time (SRT) collision operator in Python using lbmpy and generate a waLBerla lattice model implementation from its equations. A lattice model defines several things, including:
 - The **velocity set** together with its weights. For our two-dimensional domain, we will be using the D2Q9 velocity set.
 - The **LBM sweep** which consists of the collision operator and the streaming pattern. We will be using a standard pull-scheme with a temporary field.
 - The **force model**. Since no forces will be present in our simulation, we will not use a force model in this tutorial. For details about force models, see <a href="https://pycodegen.pages.i10git.cs.fau.de/lbmpy/sphinx/forcemodels.html" target="_blank">lbmpy's documentation on force models</a>.
 
 In addition to the lattice model, it will be shown how to generate a method-specific pack info class for MPI communication to reduce communication load to the necessary minimum. 
 
-In the code generation python script, we first require a few imports from lbmpy itself and from the waLBerla code generation libraries. lbmpy code generation is based on pystencils; the basic procedure is thus the same as in the previous tutorial. We need the `CodeGeneration` context from the `pystencils_walberla` module for the connection to the build system. For generating the communication pack info, we will use `generate_pack_info_from_kernel` from the same module. This method of pack info generation is not limited to LBM implementations, but can be used with any kind of sweep kernel. The function `generate_pack_info_from_kernel` simply takes a pystencils `AssignmentCollection` and extracts all field accesses to determine which cell entries need to be communicated.
+In the code generation python script, we first require a few imports from lbmpy itself and from the waLBerla code generation libraries. lbmpy code generation is based on pystencils; the basic procedure is thus the same as in the previous tutorial. We need the `CodeGeneration` context from the `pystencils_walberla` module for the connection to the build system. For generating the communication pack info, we will use `generate_pack_info_from_kernel` from the same module. This method of pack info generation is not limited to LBM implementations but can be used with any sweep kernel. The function `generate_pack_info_from_kernel` takes a pystencils `AssignmentCollection` and extracts all field accesses to determine which cell entries need to be communicated.
 
-From the `lbmpy.creationfunctions` module we require the functions to create collision and update rules. For the actual code generation, `generate_lattice_model` from `lbmpy_walberla` is required. Since we will define symbols, `sympy` is also needed.
+From the `lbmpy.creationfunctions` we require the functions to create collision and update rules. For the actual code generation, `generate_lattice_model` from `lbmpy_walberla` is required. Since we will define symbols, `SymPy` is also needed.
 
 \code
 import sympy as sp
@@ -32,54 +31,53 @@ from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel
 from lbmpy_walberla import generate_lattice_model
 \endcode
 
-First, we define a few general parameters. These include the stencil (D2Q9) and the memory layout (`fzyx`, see \ref tutorial_codegen01 ). We define a sympy symbol for the relaxation rate $\omega$ since it will later be set from C++ code. A dictionary with optimization parameters is also set up. Here, we define the compilation target, enable global common subexpression elimination (`cse_global`) and set the PDF field's memory layout. In general, the target could be set to `gpu` to create a CUDA implementation, but this is currently not possible for lattice models.
-
+First, we define a few general parameters. These include the stencil (D2Q9) and the memory layout (`fzyx`, see \ref tutorial_codegen01 ). We define a SymPy symbol for the relaxation rate $\omega$. This means we can later set it to a specific value from the waLBerla code. A dictionary with optimization parameters is also set up. Here, we define the compilation target, enable global common subexpression elimination (`cse_global`) and set the PDF field's memory layout. In general, the target could be set to `gpu` to create a CUDA implementation, but this is currently not possible for creating a `LatticeModel` class.
 \code
-STENCIL = 'D2Q9'
-OMEGA = sp.Symbol('omega')
-LAYOUT = 'fzyx'
+stencil = 'D2Q9'
+omega = sp.Symbol('omega')
+layout = 'fzyx'
 
 #   Optimization
-OPT = {'target': 'cpu', 'cse_global': True, 'field_layout': LAYOUT}
+optimizations = {'target': 'cpu', 'cse_global': True, 'field_layout': layout}
 \endcode
 
-Next, we set the parameters for the SRT method in a dictionary and create both the collision and update rules by calling the respective lbmpy functions. They both return an `AssignmentCollection` containing all necessary equations. The only parameters needed for SRT are the stencil and the relaxation rate. For generating the lattice model, we only require the collision rule's equations since `generate_lattice_model` adds the two-fields pull scheme for the streaming step internally. In fact, lattice model generation is limited to this standard streaming pattern. 
+Next, we set the parameters for the SRT method in a dictionary and create both the collision and update rules by calling the respective lbmpy functions. They both return an `AssignmentCollection` containing all necessary equations. The only parameters needed for SRT are the stencil and the relaxation rate. For generating the lattice model, we only require the collision rule's equations since `generate_lattice_model` adds the two-fields pull scheme for the streaming step internally. At this point, the lattice model generation is limited to the standard stream-pull-collide scheme.
 
-The update rule is still needed in the code generation process; namely for the pack info generation. The collision step only acts within one cell. Thus, the collision rule's equations contain no neighbour accesses. Calling `create_lb_update_rule` extends this by the same two-fields pull scheme as `generate_lattice_model`, and resulting update rule contains exactly those neighbour accesses which are required for `generate_pack_info_from_kernel` to build the optimized pack info.
+The update rule is still needed in the code generation process; namely for the pack info generation. The collision step only acts within one cell. Thus, the collision rule's equations contain no neighbour accesses. Calling `create_lb_update_rule` inserts the two-fields pull scheme as `generate_lattice_model`, and resulting update rule contains exactly those neighbour accesses which are required for `generate_pack_info_from_kernel` to build the optimized pack info.
 
 \code
-srt_params = {'stencil': STENCIL,
+srt_params = {'stencil': stencil,
               'method': 'srt',
-              'relaxation_rate': OMEGA}
+              'relaxation_rate': omega}
 
-srt_collision_rule = create_lb_collision_rule(optimization=OPT, **srt_params)
-srt_update_rule = create_lb_update_rule(collision_rule=srt_collision_rule, optimization=OPT)
+srt_collision_rule = create_lb_collision_rule(optimization=optimizations, **srt_params)
+srt_update_rule = create_lb_update_rule(collision_rule=srt_collision_rule, optimization=optimizations)
 \endcode
 
-Finally, we retrieve the code generation context and call the respective functions for generating the lattice model and the pack info. Both require the context and a class name as parameters. To  `generate_lattice_model`, we also pass the collision rule and the field layout; `generate_pack_info_from_kernel` in turn gets the update rule. 
+Finally, we create the code generation context and call the respective functions for generating the lattice model and the pack info. Both require the context and a class name as parameters. To  `generate_lattice_model`, we also pass the collision rule and the field layout; `generate_pack_info_from_kernel` receives the update rule.
 
 \code
 with CodeGeneration() as ctx:
-    generate_lattice_model(ctx, "SRTLatticeModel", srt_collision_rule, field_layout=LAYOUT)
+    generate_lattice_model(ctx, "SRTLatticeModel", srt_collision_rule, field_layout=layout)
     generate_pack_info_from_kernel(ctx, "SRTPackInfo", srt_update_rule)
 \endcode
 
 Notice that, other than in \ref tutorial_codegen01, we did not need to define any fields. Both the source and destination PDF fields are created internally by lbmpy and `generate_lattice_model`. 
 
-As a final touch, we still need to set up the CMake build target for the code generation script. This time, two distinct classes will be generated. Therefore we need to list the header and source file names for both classes separately.
+As a final touch, we still need to set up the CMake build target for the code generation script. This time, two distinct classes (the lattice model and the pack information) will be generated. Therefore, we need to list the header and source file names for both classes separately.
 
 \code
 walberla_generate_target_from_python( NAME 02_LBMLatticeModelGenerationPython
-        FILE 02_LBMLatticeModelGeneration.py
-        OUT_FILES   SRTLatticeModel.cpp SRTLatticeModel.h 
-                    SRTPackInfo.cpp SRTPackInfo.h )
+                                      FILE 02_LBMLatticeModelGeneration.py
+                                      OUT_FILES SRTLatticeModel.cpp SRTLatticeModel.h
+                                                SRTPackInfo.cpp SRTPackInfo.h )
 \endcode
 
 This completes the code generation part.
 
 \section lbmpy_simulation_app The Simulation application
 
-This section is concerned with the implementation of a waLBerla C++ application for the simulation of a periodic shear flow using the generated SRT lattice model and pack info. After adding the code generation target defined above as a dependency to the application's CMake target, we can straightforwardly include both generated classes:
+This section is concerned with the implementation of a waLBerla C++ application for the simulation of a periodic shear flow using the generated SRT lattice model and pack info. After adding the code generation target defined above as a dependency to the application's CMake target, we can include both generated classes:
 
 \code
 #include "SRTLatticeModel.h"
@@ -112,7 +110,7 @@ The application will be mostly similar to \ref tutorial_lbm01, so we will mainly
 
 \subsection lbmpy_add_lattice_model_sweep Adding the generated sweep
 
-The only major difference caused by the used of a generated lattice model is when the LBM sweep is added to the timeloop. The sweep is exposed as a static member of the generated lattice model class and can be added like this:
+The only significant difference caused by the usage of a generated lattice model is when the LBM sweep is added to the timeloop. The sweep is exposed as a static member of the generated lattice model class and can be added like this:
 
 \code
 timeloop.add() << Sweep(LatticeModel_T::Sweep(pdfFieldId), "LB stream & collide");
@@ -122,9 +120,9 @@ The remaining extensions concern only the setup of boundaries and the initial ve
 
 \subsection lbmpy_shear_flow_init Setup of the Shear Flow Scenario
 
-We will set up a shear flow scenario in a rectangular, two-dimensional domain which is periodic in the x-direction and limited by NoSlip-boundaries (i.e. walls) to the north and south. The fluid will be moving to the right in the northern and southern parts of the domain, and to the right in the middle with the same speed. Its velocity in the y-direction will not be initially zero, but slightly perturbed by random noise. This will cause the development of vortices between the shear layers during the simulation.
+We will set up a shear flow scenario in a rectangular, two-dimensional domain which is periodic in the x-direction and limited by NoSlip-boundaries (i.e. walls) to the north and south. The fluid will be moving rightwards in the northern and southern parts of the domain, and leftwards in the middle with the same speed. Its velocity in the y-direction will be slightly perturbed by random noise, which will cause the development of vortices between the shear layers during the simulation.
 
-For simplicity, the boundaries are set up using the lbm::DefaultBoundaryHandlingFactory as described in \ref tutorial_lbm01. Other than there, no pressure or velocity boundary conditions exist; therefore the declaration simplifies to:
+For simplicity, the boundaries are set up using the lbm::DefaultBoundaryHandlingFactory as described in \ref tutorial_lbm01.
 
 \code
 BlockDataID boundaryHandlingId =
@@ -132,7 +130,7 @@ BlockDataID boundaryHandlingId =
                                               Vector3< real_t >(), Vector3< real_t >(), real_c(0.0), real_c(0.0));
 \endcode
 
-In the parameter file, the boundary block thus only defines the NoSlip boundaries. Also, in the `DomainSetup` block, the domain needs to be periodic in the x-direction.
+In the parameter file, the boundary block only defines the NoSlip boundaries. Also, in the `DomainSetup` block, the domain needs to be periodic in the x-direction.
 
 \code
 DomainSetup
@@ -150,9 +148,9 @@ Boundaries
 }
 \endcode
 
-The most substantial extension we are making is the velocity initialization. We will be using the lbm::initializer::PdfFieldInitializer class in conjunction with lbm::initializer::ExprSystemInitFunction which is capable of parsing mathematical expressions from the parameter file to set up complex initial flows. For this purpose, waLBerla uses the [exprtk](http://www.partow.net/programming/exprtk/index.html) C++ library. We will need to extend this functionality a little to introduce the random noise.
+The velocity initialization can be defined directly in the parameter file. We will be using the lbm::initializer::PdfFieldInitializer class with lbm::initializer::ExprSystemInitFunction which is capable of parsing mathematical expressions from the parameter file to set up complex initial flows. For this purpose, waLBerla uses the [exprtk](http://www.partow.net/programming/exprtk/index.html) C++ library. We will need to extend this functionality a little to introduce the random noise.
 
-The PdfFieldInitializer's initDensityAndVelocity function expects a function of type `std::vector< real_t > (const Cell&)`. This function will receive a `Cell` with global coordinates and should return a `std::vector` with four entries: One density and three cartesian velocity components for the given cell. For this purpose, we create a functor struct with these members:
+The PdfFieldInitializer's `initDensityAndVelocity` function expects a function of type `std::vector< real_t > (const Cell&)`. This function will receive a `Cell` with global coordinates and should return a `std::vector` with four entries: One density and three cartesian velocity components for the given cell. For this purpose, we create a functor struct with these members:
 
 - An instance `exprInitFunc_` of lbm::initializer::ExprSystemInitFunction for initializing the x-velocities;
 - A random number generator `rng_` for the y-velocities, which is an instance of walberla::math::RealRandom;
@@ -189,9 +187,9 @@ struct ShearFlowInit
 };
 \endcode
 
-All the required parameters and the expressions for initializing the density and velocity are defined in the parameter file, inside the block `ShearFlowSetup`. For `rho`, `u_x`, `u_y` and `u_z`, mathematical expressions can be specified which may include the variables `x`, `y`, `z` for a cell's global position and `n_x`, `n_y`, `n_z` which hold the number of cells in each direction. These expressions will then be evaluated for each domain cell.
+The required parameters and expressions for initializing the density and velocity are defined in the parameter file. The block is called `ShearFlowSetup`. For `rho`, `u_x`, `u_y` and `u_z`, mathematical expressions can be specified which may include the variables `x`, `y`, `z` for a cell's global position and `n_x`, `n_y`, `n_z` representing the number of cells in each direction. These expressions will be evaluated for each domain cell.
 
-A seed for the random number generator is also specified, controlling the random noise and making it reproducible.
+A seed for the random number generator is also specified, which controls the random noise and to make the test case reproducible.
 
 \code
 ShearFlowSetup
@@ -211,7 +209,7 @@ This completes the C++ implementation. It will produce VTK files which can then
 
 \section lbmpy_lattice_model_outlook Outlook
 
-Although generated lattice models are easy to define and easy to integrate with the waLBerla framework, they are somewhat limited in features. For example, the waLBerla code generation methods for lattice models do not allow advanced streaming patterns, or the generation of CUDA code. This is remedied by generating sweeps using `generate_sweep` directly instead of `generate_lattice_model`. The result is a generic waLBerla sweep which is not consistent with the lattice model API, thus making implementations a little more complicated since many class templates created for the use with lattice models can not be used. For this problem too, code generation offers solutions which will be further explained in the next tutorial. 
+Although generated lattice models are easy to define and easy to integrate with the waLBerla framework, they are somewhat limited in features. For example, the waLBerla code generation methods for lattice models do not allow advanced streaming patterns or the generation of CUDA code. This is remedied by generating sweeps using `generate_sweep` directly instead of `generate_lattice_model`. The result is a generic waLBerla sweep which is not consistent with the lattice model API, thus making implementations a little more complicated since many class templates created for the use with lattice models can not be used. For this problem too, code generation offers solutions which will be further explained in the next tutorial.
 
 \tableofcontents
 
diff --git a/apps/tutorials/codegen/02_LBMLatticeModelGeneration.prm b/apps/tutorials/codegen/02_LBMLatticeModelGeneration.prm
index 5df6b7e31..004374b23 100644
--- a/apps/tutorials/codegen/02_LBMLatticeModelGeneration.prm
+++ b/apps/tutorials/codegen/02_LBMLatticeModelGeneration.prm
@@ -1,7 +1,7 @@
 
 Parameters 
 {
-   omega           1.8;
+    omega           1.8;
 	timesteps       10000;
 
 	remainingTimeLoggerFrequency 3; // in seconds
@@ -35,7 +35,7 @@ StabilityChecker
 
 Boundaries 
 {   
-	Border { direction S,N; walldistance -1; NoSlip {} }		
+	Border { direction S, N; walldistance -1; NoSlip {} }
 }
 
 
diff --git a/apps/tutorials/codegen/02_LBMLatticeModelGeneration.py b/apps/tutorials/codegen/02_LBMLatticeModelGeneration.py
index c9c7e4982..97dd55b37 100644
--- a/apps/tutorials/codegen/02_LBMLatticeModelGeneration.py
+++ b/apps/tutorials/codegen/02_LBMLatticeModelGeneration.py
@@ -9,28 +9,30 @@ from lbmpy_walberla import generate_lattice_model
 #      General Parameters
 #   ========================
 
-STENCIL = 'D2Q9'
-OMEGA = sp.Symbol('omega')
-LAYOUT = 'fzyx'
+stencil = 'D2Q9'
+omega = sp.Symbol('omega')
+layout = 'fzyx'
 
-#   Optimization
-OPT = {'target': 'cpu', 'cse_global': True, 'field_layout': LAYOUT}
+#   Optimizations to be used by the code generator
+optimizations = {'target': 'cpu', 'cse_global': True, 'field_layout': layout}
 
 #   ===========================
 #      SRT Method Definition
 #   ===========================
 
-srt_params = {'stencil': STENCIL,
+srt_params = {'stencil': stencil,
               'method': 'srt',
-              'relaxation_rate': OMEGA}
+              'relaxation_rate': omega}
 
-srt_collision_rule = create_lb_collision_rule(optimization=OPT, **srt_params)
-srt_update_rule = create_lb_update_rule(collision_rule=srt_collision_rule, optimization=OPT)
+srt_collision_rule = create_lb_collision_rule(optimization=optimizations, **srt_params)
+srt_update_rule = create_lb_update_rule(collision_rule=srt_collision_rule, optimization=optimizations)
 
 #   =====================
 #      Code Generation
 #   =====================
 
 with CodeGeneration() as ctx:
-    generate_lattice_model(ctx, "SRTLatticeModel", srt_collision_rule, field_layout=LAYOUT)
+    # generation of the lattice model ...
+    generate_lattice_model(ctx, "SRTLatticeModel", srt_collision_rule, field_layout=layout)
+    # ... and generation of the pack information to be used for the MPI communication
     generate_pack_info_from_kernel(ctx, "SRTPackInfo", srt_update_rule)
-- 
GitLab