diff --git a/apps/tutorials/codegen/02_LBMLatticeModelGeneration.dox b/apps/tutorials/codegen/02_LBMLatticeModelGeneration.dox
index 7e7afaac16df46dc27249ef46bb0d93f50ac2d16..e02a8715aaba5a94101d9c08368233b5883a05de 100644
--- a/apps/tutorials/codegen/02_LBMLatticeModelGeneration.dox
+++ b/apps/tutorials/codegen/02_LBMLatticeModelGeneration.dox
@@ -5,11 +5,11 @@ namespace walberla{
 
 \section overview Overview
 
-This tutorial demonstrates how to use <a href="https://pycodegen.pages.i10git.cs.fau.de/lbmpy/notebooks/00_tutorial_lbmpy_walberla_overview.html" target="_blank">lbmpy</a> with waLBerla to generate efficient implementations of Lattice Boltzmann Methods (LBM) to be included in large-scale distributed memory fluid flow simulations. While waLBerla provides an advanced framework for setting up and running complex fluid simulations, lbmpy brings the flexibility to generate highly optimized LBMs for CPUs and GPUs. Manually writing an efficient C++ or GPU implementation of an advanced LBM can be very cumbersome, especially because the compute kernel needs to be optimized for specific hardware. For this reason, lbmpy was developed. It is a Python framework which allows defining a set of LB equations at an abstract level, thus enabling development on the mathematical description of the problem directly and then generate a highly optimized C, OpenCL or CUDA implementation of these equations. An introduction to lbmpy can be found <a href="https://pycodegen.pages.i10git.cs.fau.de/lbmpy/notebooks/00_tutorial_lbmpy_walberla_overview.html" target="_blank">here</a>.
+This tutorial demonstrates how to use <a href="https://pycodegen.pages.i10git.cs.fau.de/lbmpy" target="_blank">lbmpy</a> with waLBerla to generate efficient implementations of Lattice Boltzmann Methods (LBM) to be included in large-scale distributed memory fluid flow simulations. While waLBerla provides an advanced framework for setting up and running complex fluid simulations, lbmpy brings the flexibility to generate highly optimized LBMs for CPUs and GPUs. Manually writing an efficient C++ or GPU implementation of an advanced LBM can be very cumbersome, especially because the compute kernel needs to be optimized for specific hardware. For this reason, lbmpy was developed. It is a Python framework which allows defining a set of LB equations at an abstract level, thus enabling development on the mathematical description of the problem directly and then generate a highly optimized C, OpenCL or CUDA implementation of these equations. An introduction to lbmpy can be found <a href="https://pycodegen.pages.i10git.cs.fau.de/lbmpy/notebooks/00_tutorial_lbmpy_walberla_overview.html" target="_blank">here</a>.
 
 As in the previous tutorial (\ref tutorial_codegen01), we will first define the numeric methods and set up code generation in a Python script. We will then include the generated code in a waLBerla application to simulate a two-dimensional shear flow in a periodic domain. Additionally, it will be shown how a waLBerla simulation with complex initial conditions can be initialized using parameter files.
 
-\section lbmpy_codegen_script Setting up code generation in Python
+\section latticemodelcodegen_python Setting up code generation in Python
 
 In this section, we will define a single relaxation time (SRT) collision operator in Python using lbmpy and generate a waLBerla lattice model implementation from its equations. A lattice model defines several things, including:
 - The **velocity set** together with its weights. For our two-dimensional domain, we will be using the D2Q9 velocity set.
diff --git a/apps/tutorials/codegen/03_AdvancedLBMCodegen.cpp b/apps/tutorials/codegen/03_AdvancedLBMCodegen.cpp
index 011c68fae1f0f62b6a0ccf243d471404cd19c455..44ad274c6580fa6b86960766492c91bae541638d 100644
--- a/apps/tutorials/codegen/03_AdvancedLBMCodegen.cpp
+++ b/apps/tutorials/codegen/03_AdvancedLBMCodegen.cpp
@@ -25,11 +25,10 @@
 #include "domain_decomposition/all.h"
 
 #include "field/all.h"
+#include "field/vtk/VTKWriter.h"
 
 #include "geometry/all.h"
 
-#include "lbm/boundary/factories/DefaultBoundaryHandling.h"
-#include "field/vtk/VTKWriter.h"
 #include "lbm/vtk/VTKOutput.h"
 
 #include "stencil/D2Q9.h"
@@ -40,7 +39,7 @@
 #include "CumulantMRTNoSlip.h"
 #include "CumulantMRTPackInfo.h"
 #include "CumulantMRTSweep.h"
-#include "DensityAndVelocityFieldSetter.h"
+#include "InitialPDFsSetter.h"
 
 namespace walberla
 {
@@ -69,7 +68,8 @@ typedef lbm::CumulantMRTNoSlip NoSlip_T;
 /// Shear Flow Velocity Initialization ///
 //////////////////////////////////////////
 
-void initShearFlowVelocityField(const shared_ptr< StructuredBlockForest >& blocks, const BlockDataID& velocityFieldId,
+void initShearFlowVelocityField(const shared_ptr< StructuredBlockForest >& blocks, 
+                                const BlockDataID& velocityFieldId,
                                 const Config::BlockHandle& config)
 {
    math::RealRandom< real_t > rng(config.getParameter< std::mt19937::result_type >("noiseSeed", 42));
@@ -97,47 +97,6 @@ void initShearFlowVelocityField(const shared_ptr< StructuredBlockForest >& block
    }
 }
 
-////////////////////////
-/// VTK Output Setup ///
-////////////////////////
-
-struct VTKOutputSetup
-{
- private:
-   const ConstBlockDataID velocityFieldId_;
-   const ConstBlockDataID flagFieldId_;
-   const FlagUID& domainFlag_;
-
- public:
-   VTKOutputSetup(const BlockDataID& velocityFieldId, const BlockDataID& flagFieldId, const FlagUID& domainFlag)
-      : velocityFieldId_(velocityFieldId), flagFieldId_(flagFieldId), domainFlag_(domainFlag)
-   {}
-
-   void operator()(std::vector< shared_ptr< vtk::BlockCellDataWriterInterface > >& writers,
-                   std::map< std::string, vtk::VTKOutput::CellFilter >& filters,
-                   std::map< std::string, vtk::VTKOutput::BeforeFunction >& /*beforeFunctions*/) const
-   {
-      // Add VTK writers for velocity and velocity magnitude
-      writers.push_back(make_shared< field::VTKWriter< VectorField_T, float > >(velocityFieldId_, "Velocity"));
-
-      // Add domain cell filter
-      filters["DomainFilter"] = field::FlagFieldCellFilter< FlagField_T >(flagFieldId_, domainFlag_);
-   }
-
-   void initializeAndAdd(SweepTimeloop& timeloop, const shared_ptr< StructuredBlockForest >& blocks,
-                         const shared_ptr<Config> & config)
-   {
-      std::map< std::string, vtk::SelectableOutputFunction > vtkOutputFunctions;
-      vtk::initializeVTKOutput(vtkOutputFunctions, *this, blocks, config, "VTK");
-
-      for (auto funcIt = vtkOutputFunctions.begin(); funcIt != vtkOutputFunctions.end(); ++funcIt)
-      {
-         timeloop.addFuncBeforeTimeStep(funcIt->second.outputFunction, funcIt->first,
-                                        funcIt->second.requiredGlobalStates, funcIt->second.incompatibleGlobalStates);
-      }
-   }
-};
-
 /////////////////////
 /// Main Function ///
 /////////////////////
@@ -171,15 +130,14 @@ int main(int argc, char** argv)
    BlockDataID pdfFieldId  = field::addToStorage< PdfField_T >(blocks, "pdf field", real_c(0.0), field::fzyx);
    BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
 
-   // First, set up the initial shear flow in the velocity field...
-
+   // Velocity field setup
    auto shearFlowSetup = walberlaEnv.config()->getOneBlock("ShearFlowSetup");
    initShearFlowVelocityField(blocks, velocityFieldId, shearFlowSetup);
 
-   real_t rho = shearFlowSetup.getParameter("rho", real_c(1.8));
+   real_t rho = shearFlowSetup.getParameter("rho", real_c(1.0));
 
-   // ... and then use the generated PDF setter to initialize the PDF field.
-   pystencils::DensityAndVelocityFieldSetter pdfSetter(pdfFieldId, velocityFieldId, rho);
+   // pdfs setup
+   pystencils::InitialPDFsSetter pdfSetter(pdfFieldId, velocityFieldId, rho);
 
    for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
    {
@@ -227,23 +185,16 @@ int main(int argc, char** argv)
    int vtkWriteFrequency = 100;
    if (vtkWriteFrequency > 0)
    {
-      const std::string path = "vtk_out";
-      auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, path,
-                                                      "simulation_step", false, true, true, false, 0);
+      const std::string path = "vtk_out/tut03";
+      auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "cumulant_mrt_velocity_field", vtkWriteFrequency, 0,
+                                                      false, path, "simulation_step", false, true, true, false, 0);
 
-
-      auto velWriter = make_shared<field::VTKWriter<VectorField_T>>(velocityFieldId, "Velocity");
+      auto velWriter = make_shared< field::VTKWriter< VectorField_T > >(velocityFieldId, "Velocity");
       vtkOutput->addCellDataWriter(velWriter);
 
       timeloop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
-
-
    }
 
-   // VTK Output
-   // VTKOutputSetup vtkOutput(velocityFieldId, flagFieldId, fluidFlagUID);
-   // vtkOutput.initializeAndAdd(timeloop, blocks, walberlaEnv.config());
-
    timeloop.run();
 
    return EXIT_SUCCESS;
diff --git a/apps/tutorials/codegen/03_AdvancedLBMCodegen.dox b/apps/tutorials/codegen/03_AdvancedLBMCodegen.dox
new file mode 100644
index 0000000000000000000000000000000000000000..59afa276e9b13728ac1e4659b5b002ae049c54c2
--- /dev/null
+++ b/apps/tutorials/codegen/03_AdvancedLBMCodegen.dox
@@ -0,0 +1,233 @@
+namespace walberla{
+
+/**
+\page tutorial_codegen03 Tutorial - Code Generation 3: Advanced LBM Code Generation
+
+\section overview Overview
+
+This tutorial demonstrates how to use [pystencils](https://pycodegen.pages.i10git.cs.fau.de/pystencils) and [lbmpy](https://pycodegen.pages.i10git.cs.fau.de/lbmpy) to their full potential for generating highly optimized and hardware-specific Lattice Boltzmann simulation code within the waLBerla framework. Other than in \ref tutorial_codegen02, we will be generating a full LBM sweep instead of a lattice model class. Furthermore, we will generate a communication pack info class and a sweep for setting the initial densities and velocities of the LBM field. A hardware-specific implementation of a NoSlip boundary handler will also be generated. Those components will then be combined in a waLBerla application for simulating the same shear flow scenario as in the previous tutorial.
+
+In this tutorial, we will be using the more advanced cumulant-based multiple-relaxation-time (MRT) collision operator. Instead of relaxing the entire distribution functions toward their equilibrium values, their [cumulants](https://en.wikipedia.org/wiki/Cumulant) are relaxed with individual relaxation rates. We will also be using the D2Q9 velocity set. For this velocity set, the zeroth- and first-order cumulants correspond to density and momentum which are conserved during collisions, so their relaxation rates can be set to zero. We will only specify one common relaxation rate $\omega$ for the three second-order cumulants; the higher-order cumulants will be set to their equilibrium values which corresponds to a relaxation rate of 1.
+
+\section advancedlbmcodegen_python Code Generation in Python
+
+We start off by defining some general parameters as well as [SymPy](https://www.sympy.org/) symbols and pystencils fields which we will need later. Those include the stencil (`D2Q9`), the memory layout (`fzyx`, see \ref tutorial_codegen01) and the symbol $\omega$ for the relaxation rate of the second-order cumulants.
+
+For the stream-pull-collide type kernel we will be implementing, two pdf fields are required which we set up using pystencils. We will use the built-in field swapping functionality of `generate_sweep` to swap the temporary destination field with the source field after each time step. This requires us to obey a certain naming convention: The temporary field *must* have the same *field name* as the original field, but with a suffix `_tmp`. Otherwise, an error will occur during compilation. This does not mean the name of the python variable, but the name passed to `ps.fields`.
+
+For VTK output and the initial velocity setup, we define a velocity vector field as an output field for the LB method. Such an output field can also be used for the coupling of methods which require the macroscopic velocity, such as the phase-field model for simulating multicomponent flows.
+
+\code
+stencil = 'D2Q9'
+omega = sp.Symbol('omega')
+layout = 'fzyx'
+
+#   PDF Fields
+pdfs, pdfs_tmp = ps.fields('pdfs(9), pdfs_tmp(9): [2D]', layout=layout)
+
+#   Velocity Output Field
+velocity = ps.fields("velocity(2): [2D]", layout=layout)
+output = {'velocity': velocity}
+
+#   Optimization
+optimization = {'target': 'cpu',
+                'cse_global': True,
+                'symbolic_field': pdfs,
+                'symbolic_temporary_field': pdfs_tmp,
+                'field_layout': layout}
+\endcode
+
+We set up the cumulant-based MRT method with relaxation rates as described above. We use `generate_lb_update_rule` from lbmpy to derive the set of equations describing the collision operator together with the *pull* streaming pattern. These equations define the entire LBM sweep.
+
+\code
+lbm_params = {'stencil': stencil,
+              'method': 'mrt_raw',
+              'relaxation_rates': [0, 0, 0, omega, omega, omega, 1, 1, 1],
+              'cumulant': True,
+              'compressible': True}
+
+lbm_update_rule = create_lb_update_rule(optimization=optimization,
+                                        output=output,
+                                        **lbm_params)
+
+lbm_method = lbm_update_rule.method
+\endcode
+
+In \ref tutorial_codegen02, we were able to use the framework built around the waLBerla lattice model template API for setting up the shear flow's initial velocity profile. Since we are not using a lattice model class this time, this API is not available to us. With lbmpy, though, we can generate a kernel which takes in scalar values and/or fields for the initial density and velocity, and sets the initial PDF values to the corresponding equilibrium. The function `macroscopic_values_setter` from `lbmpy.macroscopic_value_kernels` returns a set of assignments for this initialization procedure. It takes the LB method definition as an argument, as well as either symbols or pystencils field accesses for the initial density `rho` and the initial velocity. Lastly, it takes the pdf field's center vector as the destination for the pdf values. We define a separate symbol for the density and use the velocity field defined above.
+
+\code
+initial_rho = sp.Symbol('rho_0')
+
+pdfs_setter = macroscopic_values_setter(lbm_method,
+                                        initial_rho,
+                                        velocity.center_vector,
+                                        pdfs.center_vector)
+\endcode
+
+Everything is now prepared to generate the actual C++ code. We create the code generation context and call several functions from `pystencils_walberla` and `lbmpy_walberla`:
+
+- The LBM sweep is generated from the `lbm_update_rule` equations using `generate_sweep`. This function takes an additional parameter `field_swaps` which takes a list of pairs of fields. Each of these pairs consists of a source and a destination (or temporary) field which shall be swapped after the sweep is completed. 
+- The communication pack info is generated using `generate_pack_info_from_kernel` which infers from the update rule's write accesses the pdf indices that need to be communicated. Without further specification, it assumes a pull-type kernel.
+- The pdf initialization kernel is generated from the `pdfs_setter` assignment collection using `generate_sweep`.
+- Using `generate_boundary`, we generate an optimized implementation of a NoSlip boundary handler for the domain's walls. 
+
+All implementations generated this way will be optimized for the hardware targets specified in the waLberla build configuration. If, for example, CUDA was enabled and the hardware target set to `gpu` above, a highly efficient CUDA implementation of every class involved would be created for running the simulation on a graphics card.
+
+\code 
+with CodeGeneration() as ctx:
+
+    #   LBM Sweep
+    generate_sweep(ctx, "CumulantMRTSweep", lbm_update_rule, field_swaps=[(pdfs, pdfs_tmp)])
+
+    #   Pack Info
+    generate_pack_info_from_kernel(ctx, "CumulantMRTPackInfo", lbm_update_rule)
+
+    #   Macroscopic Values Setter
+    generate_sweep(ctx, "DensityAndVelocityFieldSetter", pdfs_setter)
+
+    #   NoSlip Boundary
+    generate_boundary(ctx, "CumulantMRTNoSlip", NoSlip(), lbm_method)
+\endcode
+
+As in \ref tutorial_codegen02, the classes generated by above code need to be registered with CMake using the `walberla_generate_target_from_python` macro.
+
+\section advancedlbmcodegen_application The waLBerla application
+
+We will now integrate the generated classes into a waLBerla application. After adding the code generation target as a CMake dependency, we can include their header files:
+
+\code
+#include "CumulantMRTNoSlip.h"
+#include "CumulantMRTPackInfo.h"
+#include "CumulantMRTSweep.h"
+#include "DensityAndVelocityFieldSetter.h"
+\endcode
+
+We set up typedef aliases for the generated pack info and the D2Q9 stencil. For the PDF and velocity fields we use instances of the field::GhostLayerField template. The pdf field has nine entries per spatial position, as specified by the `Stencil_T::Size` parameter. As our domain is two-dimensional, the velocity at each lattice node is a two-dimensional vector. Thus, we set up the velocity field to have two index dimensions by passing the stencil's dimension as a template parameter. Finally, we also define a typedef alias for our generated NoSlip boundary.
+
+\code
+// Communication Pack Info
+typedef pystencils::CumulantMRTPackInfo PackInfo_T;
+
+// LB Method Stencil
+typedef stencil::D2Q9 Stencil_T;
+
+// PDF field type
+typedef field::GhostLayerField< real_t, Stencil_T::Size > PdfField_T;
+
+// Velocity Field Type
+typedef field::GhostLayerField< real_t, Stencil_T::D > VectorField_T;
+
+// Boundary Handling
+typedef walberla::uint8_t flag_t;
+typedef FlagField< flag_t > FlagField_T;
+typedef lbm::CumulantMRTNoSlip NoSlip_T;
+\endcode
+
+The pdf field, the velocity field and the flag field which stores information about the cell types for boundary handling are set up using field::addToStorage. The memory layout `fzyx` is explicitly set for the pdf and velocity fields. We set initial values of `0.0` for both of them since they will be explicitly initialized later on.
+
+\subsection advancedlbmpy_app_sweep_and_packinfo Using the generated sweep and pack info
+
+The generated pack info class can be used in exactly the same way as a waLBerla-native pack info. It simply needs to be registered with the communication scheme by calling `addPackInfo`:
+
+\code
+   blockforest::communication::UniformBufferedScheme< Stencil_T > communication(blocks);
+   communication.addPackInfo(make_shared< PackInfo_T >(pdfFieldId));
+\endcode
+
+Upon construction, the generated sweep class requires the pdf field, the velocity field and the relaxation rate `omega`. The parameter `omega` is read from a parameter file. After creation, the sweep functor can be passed directly to the timeloop.
+
+\code
+   timeloop.add() << Sweep(pystencils::CumulantMRTSweep(pdfFieldId, velocityFieldId, omega));
+\endcode
+
+\subsection advancedlbmpy_app_boundary Adding the NoSlip Boundary
+
+The generated NoSlip boundary works independent of the native waLBerla LBM boundary handling classes, which are templated for the use with lattice models. Still, we use the functions exposed by the geometry module to initialize the flag field from the parameter file. The NoSlip boundary handler is constructed by passing the block grid and the pdf field's identifier. After the flag field has been set up, the `fillFromFlagField` method is called to register the boundary cells with the boundary handler.
+
+\code
+   const FlagUID fluidFlagUID("Fluid");
+
+   auto boundariesConfig = walberlaEnv.config()->getOneBlock("Boundaries");
+
+   NoSlip_T noSlip(blocks, pdfFieldId);
+
+   geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldId, boundariesConfig);
+   geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldId, fluidFlagUID);
+
+   noSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("NoSlip"), fluidFlagUID);
+\endcode
+
+The generated boundary handler holds an internal list of tuples `(x, y, dir)`, the so called index vector. Each entry corresponds to a fluid cell which is a direct neighbour of a boundary cell. For each of these fluid cells, their position `x, y` is saved as well as the direction pointing toward the boundary cell. In our scenario, the north and south domain borders are marked as NoSlip-boundaries. The boundary cells are located on the northern and southern ghost layers, so the top and bottom row of fluid cells are neighbours to these boundaries. Thus, in the index vector, for all cells of the bottom row the tuples `(x, 0, 2)`, `(x, 0, 7)` and `(x, 0, 8)` are saved. The indices 2, 7 and 8 correspond to the South, South-East and South-West directions of the D2Q9 lattice, respectively. The boundary sweep then runs over this index vector, reflecting the outgoing populations in the stored directions off the wall according to the NoSlip rule.
+
+The NoSlip sweep can now be added to the timeloop before the LBM sweep. We also register the communication functor as a BeforeFunction of the boundary sweep, since it needs to be run first in every time step.
+
+\code
+   timeloop.add() << BeforeFunction(communication, "communication") << Sweep(noSlip);
+\endcode
+
+\subsection advancedlbmpy_velinit Initialization of the Velocity Profile
+
+Setting up the shear flow's initial velocity profile consists of two parts. First, the velocity field is initialized to the shear flow. Second, the prepared velocity field is passed to the generated pdf field setter which then initializes the equilibrium pdf values from the velocities. The density is uniformly set to unity.
+
+The `initShearFlowVelocityField` function takes a pointer to the block storage, the velocity field's identifier and a handle to a parameter file block. It then reads the magnitudes of the velocity's $x$-component and the perturbation in the $y$-direction. For the random noise, a random number generator is created with a seed specified in the parameter file. Then, the function iterates over all blocks of the current process, and over all cells of these blocks. A cell's velocity is then set according to its global position.
+
+\code
+void initShearFlowVelocityField(const shared_ptr< StructuredBlockForest >& blocks, 
+                                const BlockDataID& velocityFieldId,
+                                const Config::BlockHandle& config)
+{
+   math::RealRandom< real_t > rng(config.getParameter< std::mt19937::result_type >("noiseSeed", 42));
+
+   real_t velocityMagnitude = config.getParameter< real_t >("velocityMagnitude", real_c(0.08));
+   real_t noiseMagnitude    = config.getParameter< real_t >("noiseMagnitude", real_c(0.1) * velocityMagnitude);
+
+   real_t n_y = real_c(blocks->getNumberOfYCells());
+
+   for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+   {
+      auto u = (*blockIt).getData< VectorField_T >(velocityFieldId);
+
+      for (auto cellIt = u->beginWithGhostLayerXYZ(); cellIt != u->end(); ++cellIt)
+      {
+         Cell globalCell(cellIt.cell());
+         blocks->transformBlockLocalToGlobalCell(globalCell, *blockIt);
+
+         real_t relative_y = real_c(globalCell.y()) / n_y;
+
+         u->get(cellIt.cell(), 0) = relative_y < 0.3 || relative_y > 0.7 ? velocityMagnitude : -velocityMagnitude;
+
+         u->get(cellIt.cell(), 1) = noiseMagnitude * rng();
+      }
+   }
+}
+\endcode
+
+After the velocity field has been initialized, the generated `InitialPDFsSetter` sweep is created and run once on each block. After this step, the pdf field will be initialized with the correct equilibrium distributions for the first iteration.
+
+\code
+   // Velocity field setup
+   auto shearFlowSetup = walberlaEnv.config()->getOneBlock("ShearFlowSetup");
+   initShearFlowVelocityField(blocks, velocityFieldId, shearFlowSetup);
+
+   real_t rho = shearFlowSetup.getParameter("rho", real_c(1.0));
+
+   // pdfs setup
+   pystencils::InitialPDFsSetter pdfSetter(pdfFieldId, velocityFieldId, rho);
+
+   for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
+   {
+      pdfSetter(&(*blockIt));
+   }
+\endcode
+
+The simulation is now ready to be run.
+
+\section advancedlbmpy_conclusion Conclusion and Outlook
+
+We have now successfully implemented a waLBerla LBM simulation application with an advanced collision operator, which can be specialized for any hardware target. This is still just a glimpse of the capabilities of code generation. One possible extension would be the use of advanced streaming patterns like the AA-pattern or EsoTwist to reduce the simulation's memory footprint. Also, lbmpy gives us the tools to develop advanced lattice boltzmann methods for many kinds of applications. The basic principles demonstrated in these tutorials can thus be used for creating much more complicated simulations with specially tailored, optimized lattice boltzmann code.
+
+\tableofcontents
+
+*/
+
+}
\ No newline at end of file
diff --git a/apps/tutorials/codegen/03_AdvancedLBMCodegen.prm b/apps/tutorials/codegen/03_AdvancedLBMCodegen.prm
index 3b78cc083aaf687153cd95b36132c47d65c2de11..b194b4149f3013143b16a71d6bc6e7a9be459fa3 100644
--- a/apps/tutorials/codegen/03_AdvancedLBMCodegen.prm
+++ b/apps/tutorials/codegen/03_AdvancedLBMCodegen.prm
@@ -2,7 +2,7 @@
 Parameters 
 {
 	omega           1.8;
-	timesteps       1000;
+	timesteps       100000;
 
 	remainingTimeLoggerFrequency 3; // in seconds
 }
diff --git a/apps/tutorials/codegen/03_AdvancedLBMCodegen.py b/apps/tutorials/codegen/03_AdvancedLBMCodegen.py
index e3113c92c5d8a5a9e5350467d9a77e3aa234dcc4..302afa4147866fe1970257ef81f93033bf0d65e0 100644
--- a/apps/tutorials/codegen/03_AdvancedLBMCodegen.py
+++ b/apps/tutorials/codegen/03_AdvancedLBMCodegen.py
@@ -13,38 +13,38 @@ from lbmpy_walberla import generate_boundary
 #      General Parameters
 #   ========================
 
-STENCIL = 'D2Q9'
-OMEGA = sp.Symbol('omega')
-LAYOUT = 'fzyx'
+stencil = 'D2Q9'
+omega = sp.Symbol('omega')
+layout = 'fzyx'
 
 #   PDF Fields
-pdfs, pdfs_tmp = ps.fields('pdfs(9), pdfs_tmp(9): [2D]', layout=LAYOUT)
+pdfs, pdfs_tmp = ps.fields('pdfs(9), pdfs_tmp(9): [2D]', layout=layout)
 
 #   Velocity Output Field
-velocity = ps.fields("velocity(2): [2D]", layout=LAYOUT)
-OUTPUT = {'velocity': velocity}
+velocity = ps.fields("velocity(2): [2D]", layout=layout)
+output = {'velocity': velocity}
 
 #   Optimization
-OPT = {'target': 'cpu',
-       'cse_global': True,
-       'symbolic_field': pdfs,
-       'symbolic_temporary_field': pdfs_tmp,
-       'field_layout': LAYOUT}
+optimization = {'target': 'cpu',
+                'cse_global': True,
+                'symbolic_field': pdfs,
+                'symbolic_temporary_field': pdfs_tmp,
+                'field_layout': layout}
 
 
 #   ==================
 #      Method Setup
 #   ==================
 
-lbm__params = {'stencil': STENCIL,
-               'method': 'mrt_raw',
-               'relaxation_rates': [0, 0, 0, OMEGA, OMEGA, OMEGA, 1, 1, 1],
-               'cumulant': True,
-               'compressible': True}
+lbm_params = {'stencil': stencil,
+              'method': 'mrt_raw',
+              'relaxation_rates': [0, 0, 0, omega, omega, omega, 1, 1, 1],
+              'cumulant': True,
+              'compressible': True}
 
-lbm_update_rule = create_lb_update_rule(optimization=OPT,
-                                        output=OUTPUT,
-                                        **lbm__params)
+lbm_update_rule = create_lb_update_rule(optimization=optimization,
+                                        output=output,
+                                        **lbm_params)
 
 lbm_method = lbm_update_rule.method
 
@@ -54,9 +54,9 @@ lbm_method = lbm_update_rule.method
 
 initial_rho = sp.Symbol('rho_0')
 
-pdfs_setter = macroscopic_values_setter(lbm_method, 
-                                        initial_rho, 
-                                        velocity.center_vector, 
+pdfs_setter = macroscopic_values_setter(lbm_method,
+                                        initial_rho,
+                                        velocity.center_vector,
                                         pdfs.center_vector)
 
 #   =====================
@@ -72,7 +72,7 @@ with CodeGeneration() as ctx:
     generate_pack_info_from_kernel(ctx, "CumulantMRTPackInfo", lbm_update_rule)
 
     #   Macroscopic Values Setter
-    generate_sweep(ctx, "DensityAndVelocityFieldSetter", pdfs_setter)
+    generate_sweep(ctx, "InitialPDFsSetter", pdfs_setter)
 
     #   NoSlip Boundary
     generate_boundary(ctx, "CumulantMRTNoSlip", NoSlip(), lbm_method)
diff --git a/apps/tutorials/codegen/CMakeLists.txt b/apps/tutorials/codegen/CMakeLists.txt
index cc3a2939e400ae6121673ac042b70342ee49a1f5..c9eb292d38b1b0aeefcd522df1092ca3cc27e80c 100644
--- a/apps/tutorials/codegen/CMakeLists.txt
+++ b/apps/tutorials/codegen/CMakeLists.txt
@@ -24,12 +24,11 @@ if( WALBERLA_BUILD_WITH_CODEGEN )
                     DEPENDS blockforest core domain_decomposition field geometry timeloop lbm stencil vtk 02_LBMLatticeModelGenerationPython )
 
     #   Tutorial 3: Advanced lbmpy Code Generation
-
     walberla_generate_target_from_python( NAME 03_AdvancedLBMCodegenPython
         FILE 03_AdvancedLBMCodegen.py
         OUT_FILES   CumulantMRTSweep.cpp CumulantMRTSweep.h
                     CumulantMRTPackInfo.cpp CumulantMRTPackInfo.h
-                    DensityAndVelocityFieldSetter.cpp DensityAndVelocityFieldSetter.h
+                    InitialPDFsSetter.cpp InitialPDFsSetter.h
                     CumulantMRTNoSlip.cpp CumulantMRTNoSlip.h)
 
     walberla_add_executable ( NAME 03_AdvancedLBMCodegenApp