From 58d371d70bd4652c384fe81fee8a884012139870 Mon Sep 17 00:00:00 2001
From: Markus Holzer <markus.holzer@fau.de>
Date: Fri, 12 Feb 2021 18:01:07 +0100
Subject: [PATCH] Generated outflow bc

---
 AUTHORS.txt                                   |   1 +
 .../UniformGridGPU/UniformGridGPU.py          |   5 +-
 .../UniformGridGenerated.py                   |   5 +-
 apps/pythonmodule/PythonModule.cpp            |   2 +
 .../PhaseFieldAllenCahn/CPU/CMakeLists.txt    |   3 +-
 .../PhaseFieldAllenCahn/GPU/CMakeLists.txt    |   3 +-
 .../codegen/03_AdvancedLBMCodegen.py          |   5 +-
 python/lbmpy_walberla/__init__.py             |   3 +-
 .../lbmpy_walberla/additional_data_handler.py | 127 +++++++++++
 python/lbmpy_walberla/boundary.py             |  30 ++-
 python/lbmpy_walberla/packinfo.py             |  82 +++++++
 .../lbmpy_walberla/walberla_lbm_generation.py |   1 -
 .../additional_data_handler.py                |  55 +++++
 python/pystencils_walberla/boundary.py        |  73 +++---
 python/pystencils_walberla/codegen.py         |  15 +-
 python/pystencils_walberla/jinja_filters.py   |  31 +++
 .../templates/Boundary.tmpl.cpp               |  22 +-
 .../templates/Boundary.tmpl.h                 |  62 +++--
 .../templates/Sweep.tmpl.cpp                  |   2 +-
 src/lbm/field/QCriterion.h                    |  59 ++---
 src/timeloop/SweepTimeloop.cpp                |  10 +-
 tests/lbm/CMakeLists.txt                      |  26 ++-
 tests/lbm/codegen/CMakeLists.txt              |   8 +
 tests/lbm/codegen/GeneratedOutflowBC.cpp      | 194 ++++++++++++++++
 tests/lbm/codegen/GeneratedOutflowBC.prm      |  24 ++
 tests/lbm/codegen/GeneratedOutflowBC.py       |  92 ++++++++
 .../lbm/codegen/LbmPackInfoGenerationTest.py  |  29 +++
 tests/lbm/diff_packinfos.sh                   |   6 +
 tests/timeloop/CMakeLists.txt                 |   4 +-
 tests/timeloop/TimeloopAndSweepRegister.cpp   | 212 +++++++-----------
 30 files changed, 945 insertions(+), 246 deletions(-)
 create mode 100644 python/lbmpy_walberla/additional_data_handler.py
 create mode 100644 python/lbmpy_walberla/packinfo.py
 create mode 100644 python/pystencils_walberla/additional_data_handler.py
 create mode 100644 tests/lbm/codegen/CMakeLists.txt
 create mode 100644 tests/lbm/codegen/GeneratedOutflowBC.cpp
 create mode 100644 tests/lbm/codegen/GeneratedOutflowBC.prm
 create mode 100644 tests/lbm/codegen/GeneratedOutflowBC.py
 create mode 100644 tests/lbm/codegen/LbmPackInfoGenerationTest.py
 create mode 100755 tests/lbm/diff_packinfos.sh

diff --git a/AUTHORS.txt b/AUTHORS.txt
index b0dba6103..2f956f1b1 100644
--- a/AUTHORS.txt
+++ b/AUTHORS.txt
@@ -13,6 +13,7 @@ Dominik Bartuschat
 Ehsan Fattahi
 Felix Winterhalter
 Florian Schornbaum
+Frederik Hennig
 Grigorii Drozdov
 Helen Schottenhamml
 Igor Ostanin
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
index c1371278a..1a7973f6d 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
@@ -69,11 +69,10 @@ options_dict = {
         'relaxation_rate': omega,
     },
     'cumulant': {
+        'method': 'cumulant',
         'stencil': 'D3Q19',
         'compressible': True,
-        'method': 'mrt',
-        'cumulant': True,
-        'relaxation_rates': [0, omega, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+        'relaxation_rate': omega,
     },
 }
 
diff --git a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py
index b9ca24ba1..fc587c445 100644
--- a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py
+++ b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py
@@ -55,11 +55,10 @@ options_dict = {
         'relaxation_rate': omega,
     },
     'cumulant': {
+        'method': 'cumulant',
         'stencil': 'D3Q19',
         'compressible': True,
-        'method': 'mrt',
-        'cumulant': True,
-        'relaxation_rates': [0, omega, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+        'relaxation_rate': omega,
     },
 }
 
diff --git a/apps/pythonmodule/PythonModule.cpp b/apps/pythonmodule/PythonModule.cpp
index b32f103dd..3059e3f05 100644
--- a/apps/pythonmodule/PythonModule.cpp
+++ b/apps/pythonmodule/PythonModule.cpp
@@ -40,11 +40,13 @@ using namespace walberla;
    Field<walberla::real_t,1>,\
    Field<walberla::real_t,2>,\
    Field<walberla::real_t,3>,\
+   Field<walberla::real_t,4>,\
    Field<walberla::real_t,9>,\
    Field<walberla::real_t,15>,\
    Field<walberla::real_t,19>,\
    Field<walberla::real_t,27>,\
    Field<walberla::int8_t,1>,\
+   Field<walberla::int32_t,1>,\
    Field<walberla::int64_t,1>,\
    Field<walberla::int64_t,2>,\
    Field<walberla::int64_t,3>,\
diff --git a/apps/showcases/PhaseFieldAllenCahn/CPU/CMakeLists.txt b/apps/showcases/PhaseFieldAllenCahn/CPU/CMakeLists.txt
index 0e8f653f7..adacc3263 100644
--- a/apps/showcases/PhaseFieldAllenCahn/CPU/CMakeLists.txt
+++ b/apps/showcases/PhaseFieldAllenCahn/CPU/CMakeLists.txt
@@ -17,5 +17,4 @@ waLBerla_add_executable(NAME multiphaseCPU
         FILES multiphase.cpp PythonExports.cpp InitializerFunctions.cpp contact.cpp CalculateNormals.cpp multiphase_codegen.py
         DEPENDS blockforest core field postprocessing lbm geometry timeloop gui PhaseFieldCodeGenCPU)
 
-
-
+set_target_properties(multiphaseCPU PROPERTIES CXX_VISIBILITY_PRESET hidden)
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/CMakeLists.txt b/apps/showcases/PhaseFieldAllenCahn/GPU/CMakeLists.txt
index c65a87749..51c692203 100644
--- a/apps/showcases/PhaseFieldAllenCahn/GPU/CMakeLists.txt
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/CMakeLists.txt
@@ -20,5 +20,4 @@ waLBerla_add_executable(NAME multiphaseGPU
         FILES multiphase.cpp PythonExports.cpp InitializerFunctions.cpp CalculateNormals.cpp contact.cu multiphase_codegen.py
         DEPENDS blockforest core cuda field postprocessing lbm geometry timeloop gui PhaseFieldCodeGenGPU)
 
-
-
+set_target_properties(multiphaseGPU PROPERTIES CXX_VISIBILITY_PRESET hidden)
diff --git a/apps/tutorials/codegen/03_AdvancedLBMCodegen.py b/apps/tutorials/codegen/03_AdvancedLBMCodegen.py
index a39790980..65a0602f2 100644
--- a/apps/tutorials/codegen/03_AdvancedLBMCodegen.py
+++ b/apps/tutorials/codegen/03_AdvancedLBMCodegen.py
@@ -36,9 +36,8 @@ optimization = {'cse_global': True,
 #   ==================
 
 lbm_params = {'stencil': stencil,
-              'method': 'mrt_raw',
-              'relaxation_rates': [0, 0, 0, omega, omega, omega, 1, 1, 1],
-              'cumulant': True,
+              'method': 'cumulant',
+              'relaxation_rate': omega,
               'compressible': True}
 
 lbm_update_rule = create_lb_update_rule(optimization=optimization,
diff --git a/python/lbmpy_walberla/__init__.py b/python/lbmpy_walberla/__init__.py
index f7b022ab0..1a7b136a7 100644
--- a/python/lbmpy_walberla/__init__.py
+++ b/python/lbmpy_walberla/__init__.py
@@ -1,4 +1,5 @@
 from .boundary import generate_boundary
 from .walberla_lbm_generation import RefinementScaling, generate_lattice_model
+from .packinfo import generate_lb_pack_info
 
-__all__ = ['generate_lattice_model', 'RefinementScaling', 'generate_boundary']
+__all__ = ['generate_lattice_model', 'RefinementScaling', 'generate_boundary', 'generate_lb_pack_info']
diff --git a/python/lbmpy_walberla/additional_data_handler.py b/python/lbmpy_walberla/additional_data_handler.py
new file mode 100644
index 000000000..804c4953e
--- /dev/null
+++ b/python/lbmpy_walberla/additional_data_handler.py
@@ -0,0 +1,127 @@
+from pystencils.stencil import inverse_direction
+
+from lbmpy.advanced_streaming import AccessPdfValues, numeric_offsets, numeric_index
+from lbmpy.boundaries import ExtrapolationOutflow, UBB
+
+from pystencils_walberla.additional_data_handler import AdditionalDataHandler
+
+
+class UBBAdditionalDataHandler(AdditionalDataHandler):
+    def __init__(self, stencil, boundary_object):
+        assert isinstance(boundary_object, UBB)
+        self._boundary_object = boundary_object
+        super(UBBAdditionalDataHandler, self).__init__(stencil=stencil)
+
+    @property
+    def constructor_arguments(self):
+        return ", std::function<Vector3<real_t>(const Cell &, const shared_ptr<StructuredBlockForest>&, IBlock&)>& " \
+               "velocityCallback "
+
+    @property
+    def initialiser_list(self):
+        return "elementInitaliser(velocityCallback),"
+
+    @property
+    def additional_arguments_for_fill_function(self):
+        return "blocks, "
+
+    @property
+    def additional_parameters_for_fill_function(self):
+        return " const shared_ptr<StructuredBlockForest> &blocks, "
+
+    def data_initialisation(self, direction):
+        init_list = ["Vector3<real_t> InitialisatonAdditionalData = elementInitaliser(Cell(it.x(), it.y(), it.z()), "
+                     "blocks, *block);", "element.vel_0 = InitialisatonAdditionalData[0];",
+                     "element.vel_1 = InitialisatonAdditionalData[1];"]
+        if self._dim == 3:
+            init_list.append("element.vel_2 = InitialisatonAdditionalData[2];")
+
+        return "\n".join(init_list)
+
+    @property
+    def additional_member_variable(self):
+        return "std::function<Vector3<real_t>(const Cell &, const shared_ptr<StructuredBlockForest>&, IBlock&)> " \
+               "elementInitaliser; "
+
+
+class OutflowAdditionalDataHandler(AdditionalDataHandler):
+    def __init__(self, stencil, boundary_object, target='cpu', field_name='pdfs'):
+        assert isinstance(boundary_object, ExtrapolationOutflow)
+        self._boundary_object = boundary_object
+        self._stencil = boundary_object.stencil
+        self._lb_method = boundary_object.lb_method
+        self._normal_direction = boundary_object.normal_direction
+        self._field_name = field_name
+        self._target = target
+        super(OutflowAdditionalDataHandler, self).__init__(stencil=stencil)
+
+        assert sum([a != 0 for a in self._normal_direction]) == 1,\
+            "The outflow boundary is only implemented for straight walls at the moment."
+
+    @property
+    def constructor_arguments(self):
+        return f", BlockDataID {self._field_name}CPUID_" if self._target == 'gpu' else ""
+
+    @property
+    def initialiser_list(self):
+        return f"{self._field_name}CPUID({self._field_name}CPUID_)," if self._target == 'gpu' else ""
+
+    @property
+    def additional_field_data(self):
+        identifier = "CPU" if self._target == "gpu" else ""
+        return f"auto {self._field_name} = block->getData< field::GhostLayerField<double, " \
+               f"{len(self._stencil)}> >({self._field_name}{identifier}ID); "
+
+    def data_initialisation(self, direction_index):
+        pdf_acc = AccessPdfValues(self._boundary_object.stencil,
+                                  streaming_pattern=self._boundary_object.streaming_pattern,
+                                  timestep=self._boundary_object.zeroth_timestep,
+                                  streaming_dir='out')
+
+        init_list = []
+        for key, value in self.get_init_dict(pdf_acc, direction_index).items():
+            init_list.append(f"element.{key} = {self._field_name}->get({value});")
+
+        return "\n".join(init_list)
+
+    @property
+    def additional_member_variable(self):
+        return f"BlockDataID {self._field_name}CPUID;"
+
+    @property
+    def stencil_info(self):
+        stencil_info = []
+        for i, d in enumerate(self._stencil):
+            if any([a != 0 and b != 0 and a == b for a, b in zip(self._normal_direction, d)]):
+                direction = d if self._dim == 3 else d + (0,)
+                stencil_info.append((i, direction, ", ".join([str(e) for e in direction])))
+        return stencil_info
+
+    def get_init_dict(self, pdf_accessor, direction_index):
+        """The Extrapolation Outflow boundary needs additional data. This function provides a list of all values
+        which have to be initialised"""
+        position = ["it.x()", "it.y()", "it.z()"]
+        direction = self._stencil[direction_index]
+        inv_dir = self._stencil.index(inverse_direction(direction))
+
+        tangential_offset = tuple(offset - normal for offset, normal in zip(direction, self._normal_direction))
+
+        result = {}
+        pos = []
+        offsets = numeric_offsets(pdf_accessor.accs[inv_dir])
+        for p, o, t in zip(position, offsets, tangential_offset):
+            pos.append(p + f" + cell_idx_c({str(o + t)})")
+        if self._dim == 2:
+            pos.append("0")
+        pos.append(str(numeric_index(pdf_accessor.accs[inv_dir])[0]))
+        result[f'pdf'] = ', '.join(pos)
+
+        pos = []
+        for p, o, t in zip(position, offsets, tangential_offset):
+            pos.append(p + f" + cell_idx_c({str(o + t)})")
+        if self._dim == 2:
+            pos.append("0")
+        pos.append(str(numeric_index(pdf_accessor.accs[inv_dir])[0]))
+        result[f'pdf_nd'] = ', '.join(pos)
+
+        return result
diff --git a/python/lbmpy_walberla/boundary.py b/python/lbmpy_walberla/boundary.py
index 8f751f529..b0dafb7b3 100644
--- a/python/lbmpy_walberla/boundary.py
+++ b/python/lbmpy_walberla/boundary.py
@@ -1,5 +1,6 @@
 import pystencils_walberla.boundary
 from lbmpy.boundaries.boundaryhandling import create_lattice_boltzmann_boundary_kernel
+from lbmpy.advanced_streaming import Timestep, is_inplace
 
 
 def generate_boundary(generation_context,
@@ -7,15 +8,29 @@ def generate_boundary(generation_context,
                       boundary_object,
                       lb_method,
                       field_name='pdfs',
+                      streaming_pattern='pull',
+                      always_generate_separate_classes=False,
+                      additional_data_handler=None,
                       **create_kernel_params):
-
     def boundary_creation_function(field, index_field, stencil, boundary_functor, target='cpu', openmp=True, **kwargs):
-        return create_lattice_boltzmann_boundary_kernel(field,
-                                                        index_field,
-                                                        lb_method,
-                                                        boundary_functor,
-                                                        target=target,
-                                                        **kwargs)
+        pargs = (field, index_field, lb_method, boundary_functor)
+        kwargs = {'target': target, **kwargs}
+        if is_inplace(streaming_pattern) or always_generate_separate_classes:
+            return {
+                'EvenSweep': create_lattice_boltzmann_boundary_kernel(*pargs,
+                                                                      streaming_pattern=streaming_pattern,
+                                                                      prev_timestep=Timestep.EVEN,
+                                                                      **kwargs),
+                'OddSweep': create_lattice_boltzmann_boundary_kernel(*pargs,
+                                                                     streaming_pattern=streaming_pattern,
+                                                                     prev_timestep=Timestep.ODD,
+                                                                     **kwargs)
+            }
+        else:
+            return create_lattice_boltzmann_boundary_kernel(*pargs,
+                                                            streaming_pattern=streaming_pattern,
+                                                            prev_timestep=Timestep.BOTH,
+                                                            **kwargs)
 
     pystencils_walberla.boundary.generate_boundary(generation_context,
                                                    class_name,
@@ -25,4 +40,5 @@ def generate_boundary(generation_context,
                                                    index_shape=[len(lb_method.stencil)],
                                                    kernel_creation_function=boundary_creation_function,
                                                    namespace='lbm',
+                                                   additional_data_handler=additional_data_handler,
                                                    **create_kernel_params)
diff --git a/python/lbmpy_walberla/packinfo.py b/python/lbmpy_walberla/packinfo.py
new file mode 100644
index 000000000..54a4e6755
--- /dev/null
+++ b/python/lbmpy_walberla/packinfo.py
@@ -0,0 +1,82 @@
+from collections import defaultdict
+from lbmpy.advanced_streaming.utility import Timestep, get_accessor, get_timesteps
+from lbmpy.advanced_streaming.communication import _extend_dir
+from pystencils.stencil import inverse_direction
+from pystencils_walberla.codegen import comm_directions, generate_pack_info
+from pystencils import Assignment, Field
+
+
+def generate_lb_pack_info(generation_context,
+                          class_name_prefix: str,
+                          stencil,
+                          pdf_field,
+                          streaming_pattern='pull',
+                          lb_collision_rule=None,
+                          always_generate_separate_classes=False,
+                          namespace='lbm',
+                          **create_kernel_params):
+    """Generates waLBerla MPI PackInfos for an LBM kernel, based on a given method
+    and streaming pattern. For in-place streaming patterns, two PackInfos are generated;
+    one for the even and another for the odd time steps.
+
+    Args:
+        generation_context: see documentation of `generate_sweep`
+        class_name_prefix: Prefix of the desired class name which will be extended with
+                           'Even' or 'Odd' for in-place kernels
+        stencil: The tuple of directions specifying the employed LB stencil.
+        pdf_field: pdf field for which the pack info is created
+        streaming_pattern: The employed streaming pattern.
+        lb_collision_rule: Optional. The collision rule defining the lattice boltzmann kernel, as returned
+                           by `create_lb_collision_rule`. If specified, it will be scanned for non-local
+                           accesses to other fields other than the PDF fields (as might be required for
+                           computing gradients in coupled simulations), whose communication will then
+                           be included in the PackInfo.
+        always_generate_separate_classes: If True, generate a pair of Even/Odd PackInfos even for a two-
+                                          fields kernel (i.e. the pull/push patterns). Otherwise, for two-fields
+                                          kernels, only one PackInfo class will be generated without a
+                                          suffix to its name.
+        namespace: inner namespace of the generated class
+        **create_kernel_params: remaining keyword arguments are passed to `pystencils.create_kernel`
+    """
+    timesteps = [Timestep.EVEN, Timestep.ODD] \
+        if always_generate_separate_classes \
+        else get_timesteps(streaming_pattern)
+
+    common_spec = defaultdict(set)
+
+    if lb_collision_rule is not None:
+        assignments = lb_collision_rule.all_assignments
+        reads = set()
+        for a in assignments:
+            if not isinstance(a, Assignment):
+                continue
+            reads.update(a.rhs.atoms(Field.Access))
+        for fa in reads:
+            assert all(abs(e) <= 1 for e in fa.offsets)
+            if all(offset == 0 for offset in fa.offsets):
+                continue
+            comm_direction = inverse_direction(fa.offsets)
+            for comm_dir in comm_directions(comm_direction):
+                common_spec[(comm_dir,)].add(fa.field.center(*fa.index))
+
+    for t in timesteps:
+        spec = common_spec.copy()
+        write_accesses = get_accessor(streaming_pattern, t).write(pdf_field, stencil)
+        for comm_dir in stencil:
+            if all(d == 0 for d in comm_dir):
+                continue
+
+            for streaming_dir in set(_extend_dir(comm_dir)) & set(stencil):
+                d = stencil.index(streaming_dir)
+                fa = write_accesses[d]
+                spec[(comm_dir,)].add(fa)
+
+        if t == Timestep.EVEN:
+            class_name_suffix = 'Even'
+        elif t == Timestep.ODD:
+            class_name_suffix = 'Odd'
+        else:
+            class_name_suffix = ''
+
+        class_name = class_name_prefix + class_name_suffix
+        generate_pack_info(generation_context, class_name, spec, namespace=namespace, **create_kernel_params)
diff --git a/python/lbmpy_walberla/walberla_lbm_generation.py b/python/lbmpy_walberla/walberla_lbm_generation.py
index a7563b76c..82867d8b8 100644
--- a/python/lbmpy_walberla/walberla_lbm_generation.py
+++ b/python/lbmpy_walberla/walberla_lbm_generation.py
@@ -6,7 +6,6 @@ from jinja2 import Environment, PackageLoader, StrictUndefined, Template
 from sympy.tensor import IndexedBase
 
 import pystencils as ps
-# from lbmpy.creationfunctions import create_lb_update_rule, update_with_default_parameters
 from lbmpy.fieldaccess import CollideOnlyInplaceAccessor, StreamPullTwoFieldsAccessor
 from lbmpy.relaxationrates import relaxation_rate_scaling
 from lbmpy.stencils import get_stencil
diff --git a/python/pystencils_walberla/additional_data_handler.py b/python/pystencils_walberla/additional_data_handler.py
new file mode 100644
index 000000000..7abe7d5aa
--- /dev/null
+++ b/python/pystencils_walberla/additional_data_handler.py
@@ -0,0 +1,55 @@
+from pystencils.stencil import inverse_direction
+
+
+class AdditionalDataHandler:
+    """Base class that defines how to handle boundary conditions holding additional data."""
+
+    def __init__(self, stencil):
+        self._dim = len(stencil[0])
+
+        # waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D
+        if self._dim == 2:
+            self._walberla_stencil = ()
+            for d in stencil:
+                d = d + (0,)
+                self._walberla_stencil = self._walberla_stencil + (d,)
+        else:
+            self._walberla_stencil = stencil
+
+    @property
+    def constructor_arguments(self):
+        return ""
+
+    @property
+    def initialiser_list(self):
+        return ""
+
+    @property
+    def additional_arguments_for_fill_function(self):
+        return ""
+
+    @property
+    def additional_parameters_for_fill_function(self):
+        return ""
+
+    @property
+    def additional_field_data(self):
+        return ""
+
+    def data_initialisation(self, direction_index):
+        return ""
+
+    @property
+    def additional_member_variable(self):
+        return ""
+
+    @property
+    def stencil_info(self):
+        return [(i, d, ", ".join([str(e) for e in d])) for i, d in enumerate(self._walberla_stencil)]
+
+    @property
+    def inverse_directions(self):
+        inv_dirs = []
+        for direction in self._walberla_stencil:
+            inv_dirs.append(self._walberla_stencil.index(inverse_direction(direction)))
+        return inv_dirs
diff --git a/python/pystencils_walberla/boundary.py b/python/pystencils_walberla/boundary.py
index 73d429ec5..0d251236c 100644
--- a/python/pystencils_walberla/boundary.py
+++ b/python/pystencils_walberla/boundary.py
@@ -1,3 +1,4 @@
+from collections import OrderedDict
 import numpy as np
 from jinja2 import Environment, PackageLoader, StrictUndefined
 from pystencils import Field, FieldType
@@ -8,6 +9,7 @@ from pystencils.boundaries.createindexlist import (
 from pystencils.data_types import TypedSymbol, create_type
 from pystencils_walberla.codegen import KernelInfo, default_create_kernel_parameters
 from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
+from pystencils_walberla.additional_data_handler import AdditionalDataHandler
 
 
 def generate_boundary(generation_context,
@@ -20,7 +22,12 @@ def generate_boundary(generation_context,
                       kernel_creation_function=None,
                       target='cpu',
                       namespace='pystencils',
+                      additional_data_handler=None,
                       **create_kernel_params):
+
+    if boundary_object.additional_data and additional_data_handler is None:
+        raise ValueError("Boundary object has additional data but you have not provided an AdditionalDataHandler.")
+
     struct_name = "IndexInfo"
     boundary_object.name = class_name
     dim = len(neighbor_stencil[0])
@@ -44,36 +51,44 @@ def generate_boundary(generation_context,
     if not kernel_creation_function:
         kernel_creation_function = create_boundary_kernel
 
-    kernel = kernel_creation_function(field, index_field, neighbor_stencil, boundary_object, **create_kernel_params)
-    kernel.function_name = "boundary_" + boundary_object.name
-    kernel.assumed_inner_stride_one = False
-
-    # waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D
-    if dim == 2:
-        stencil = ()
-        for d in neighbor_stencil:
-            d = d + (0,)
-            stencil = stencil + (d,)
+    kernels = kernel_creation_function(field, index_field, neighbor_stencil, boundary_object, **create_kernel_params)
+    if isinstance(kernels, dict):
+        sweep_to_kernel_info_dict = OrderedDict()
+        dummy_kernel_info = None
+        for sweep_class, sweep_kernel in kernels.items():
+            sweep_kernel.function_name = "boundary_" + boundary_object.name + '_' + sweep_class
+            sweep_kernel.assumed_inner_stride_one = False
+            kernel_info = KernelInfo(sweep_kernel)
+            sweep_to_kernel_info_dict[sweep_class] = kernel_info
+            if dummy_kernel_info is None:
+                dummy_kernel_info = kernel_info
+            # elif not dummy_kernel_info.has_same_interface(kernel_info):
+            #     raise ValueError("Multiple boundary sweeps must have the same kernel interface!")
+        multi_sweep = True
     else:
-        stencil = neighbor_stencil
+        multi_sweep = False
+        kernel = kernels
+        kernel.function_name = "boundary_" + boundary_object.name
+        kernel.assumed_inner_stride_one = False
+        kernel_info = KernelInfo(kernel)
+        sweep_to_kernel_info_dict = {'': kernel_info}
+        dummy_kernel_info = kernel_info
 
-    stencil_info = [(i, d, ", ".join([str(e) for e in d])) for i, d in enumerate(stencil)]
-    inv_dirs = []
-    for direction in stencil:
-        inverse_dir = tuple([-i for i in direction])
-        inv_dirs.append(stencil.index(inverse_dir))
+    if additional_data_handler is None:
+        additional_data_handler = AdditionalDataHandler(stencil=neighbor_stencil)
 
     context = {
         'class_name': boundary_object.name,
+        'sweep_classes': sweep_to_kernel_info_dict,
+        'multi_sweep': multi_sweep,
+        'dummy_kernel_info': dummy_kernel_info,
         'StructName': struct_name,
         'StructDeclaration': struct_from_numpy_dtype(struct_name, index_struct_dtype),
-        'kernel': KernelInfo(kernel),
-        'stencil_info': stencil_info,
-        'inverse_directions': inv_dirs,
         'dim': dim,
         'target': target,
         'namespace': namespace,
-        'inner_or_boundary': boundary_object.inner_or_boundary
+        'inner_or_boundary': boundary_object.inner_or_boundary,
+        'additional_data_handler': additional_data_handler
     }
 
     env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined)
@@ -83,8 +98,8 @@ def generate_boundary(generation_context,
     source = env.get_template('Boundary.tmpl.cpp').render(**context)
 
     source_extension = "cpp" if target == "cpu" else "cu"
-    generation_context.write_file("{}.h".format(class_name), header)
-    generation_context.write_file("{}.{}".format(class_name, source_extension), source)
+    generation_context.write_file(f"{class_name}.h", header)
+    generation_context.write_file(f"{class_name}.{source_extension}", source)
 
 
 def generate_staggered_boundary(generation_context, class_name, boundary_object,
@@ -102,23 +117,23 @@ def generate_staggered_flux_boundary(generation_context, class_name, boundary_ob
 
 
 def struct_from_numpy_dtype(struct_name, numpy_dtype):
-    result = "struct %s { \n" % (struct_name,)
+    result = f"struct {struct_name} {{ \n"
 
     equality_compare = []
     constructor_params = []
     constructor_initializer_list = []
     for name, (sub_type, offset) in numpy_dtype.fields.items():
         pystencils_type = create_type(sub_type)
-        result += "    %s %s;\n" % (pystencils_type, name)
+        result += f"    {pystencils_type} {name};\n"
         if name in boundary_index_array_coordinate_names or name == direction_member_name:
-            constructor_params.append("%s %s_" % (pystencils_type, name))
-            constructor_initializer_list.append("%s(%s_)" % (name, name))
+            constructor_params.append(f"{pystencils_type} {name}_")
+            constructor_initializer_list.append(f"{name}({name}_)")
         else:
-            constructor_initializer_list.append("%s()" % name)
+            constructor_initializer_list.append(f"{name}()")
         if pystencils_type.is_float():
-            equality_compare.append("floatIsEqual(%s, o.%s)" % (name, name))
+            equality_compare.append(f"floatIsEqual({name}, o.{name})")
         else:
-            equality_compare.append("%s == o.%s" % (name, name))
+            equality_compare.append(f"{name} == o.{name}")
 
     result += "    %s(%s) : %s {}\n" % \
               (struct_name, ", ".join(constructor_params), ", ".join(constructor_initializer_list))
diff --git a/python/pystencils_walberla/codegen.py b/python/pystencils_walberla/codegen.py
index 37013cba6..896a795cc 100644
--- a/python/pystencils_walberla/codegen.py
+++ b/python/pystencils_walberla/codegen.py
@@ -20,7 +20,7 @@ __all__ = ['generate_sweep', 'generate_pack_info', 'generate_pack_info_for_field
 
 def generate_sweep(generation_context, class_name, assignments,
                    namespace='pystencils', field_swaps=(), staggered=False, varying_parameters=(),
-                   inner_outer_split=False,
+                   inner_outer_split=False, ghost_layers_to_include=0,
                    **create_kernel_params):
     """Generates a waLBerla sweep from a pystencils representation.
 
@@ -44,6 +44,8 @@ def generate_sweep(generation_context, class_name, assignments,
                             the C++ class constructor even if the kernel does not need them.
         inner_outer_split: if True generate a sweep that supports separate iteration over inner and outer regions
                            to allow for communication hiding.
+        ghost_layers_to_include: determines how many ghost layers should be included for the Sweep.
+                                 This is relevant if a setter kernel should also set correct values to the ghost layers.
         **create_kernel_params: remaining keyword arguments are passed to `pystencils.create_kernel`
     """
     create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params)
@@ -78,6 +80,7 @@ def generate_sweep(generation_context, class_name, assignments,
             'class_name': class_name,
             'target': create_kernel_params.get("target", "cpu"),
             'headers': get_headers(ast),
+            'ghost_layers_to_include': ghost_layers_to_include
         }
         header = env.get_template("Sweep.tmpl.h").render(**jinja_context)
         source = env.get_template("Sweep.tmpl.cpp").render(**jinja_context)
@@ -93,6 +96,7 @@ def generate_sweep(generation_context, class_name, assignments,
             'target': create_kernel_params.get("target", "cpu"),
             'field': representative_field,
             'headers': get_headers(ast),
+            'ghost_layers_to_include': 0
         }
         header = env.get_template("SweepInnerOuter.tmpl.h").render(**jinja_context)
         source = env.get_template("SweepInnerOuter.tmpl.cpp").render(**jinja_context)
@@ -132,7 +136,7 @@ def generate_pack_info_from_kernel(generation_context, class_name: str, assignme
         class_name: name of the generated class
         assignments: list of assignments from the compute kernel - generates PackInfo for "pull" part only
                      i.e. the kernel is expected to only write to the center
-        kind:
+        kind: can either be pull or push
         **create_kernel_params: remaining keyword arguments are passed to `pystencils.create_kernel`
     """
     assert kind in ('push', 'pull')
@@ -240,7 +244,6 @@ def generate_pack_info(generation_context, class_name: str,
         pack_kernels[direction_strings] = KernelInfo(pack_ast)
         unpack_kernels[direction_strings] = KernelInfo(unpack_ast)
         elements_per_cell[direction_strings] = len(terms)
-
     fused_kernel = create_kernel([Assignment(buffer.center, t) for t in all_accesses], **create_kernel_params)
     fused_kernel.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one']
 
@@ -333,6 +336,12 @@ class KernelInfo:
         self.varying_parameters = tuple(varying_parameters)
         self.parameters = ast.get_parameters()  # cache parameters here
 
+    def has_same_interface(self, other):
+        return self.temporary_fields == other.temporary_fields \
+            and self.field_swaps == other.field_swaps \
+            and self.varying_parameters == other.varying_parameters \
+            and self.parameters == other.parameters
+
 
 def get_vectorize_instruction_set(generation_context):
     if generation_context.optimize_for_localhost:
diff --git a/python/pystencils_walberla/jinja_filters.py b/python/pystencils_walberla/jinja_filters.py
index 60903be9e..7158f8168 100644
--- a/python/pystencils_walberla/jinja_filters.py
+++ b/python/pystencils_walberla/jinja_filters.py
@@ -338,6 +338,26 @@ def generate_constructor_parameters(kernel_info, parameters_to_ignore=None):
     return ", ".join(parameter_list + varying_parameters)
 
 
+def generate_constructor_call_arguments(kernel_info, parameters_to_ignore=None):
+    if parameters_to_ignore is None:
+        parameters_to_ignore = []
+
+    varying_parameters = []
+    if hasattr(kernel_info, 'varying_parameters'):
+        varying_parameters = kernel_info.varying_parameters
+    varying_parameter_names = tuple(e[1] for e in varying_parameters)
+    parameters_to_ignore += kernel_info.temporary_fields + varying_parameter_names
+
+    parameter_list = []
+    for param in kernel_info.parameters:
+        if param.is_field_pointer and param.field_name not in parameters_to_ignore:
+            parameter_list.append("%sID" % (param.field_name, ))
+        elif not param.is_field_parameter and param.symbol.name not in parameters_to_ignore:
+            parameter_list.append(f'{param.symbol.name}_')
+    varying_parameters = ["%s_" % e for e in varying_parameter_names]
+    return ", ".join(parameter_list + varying_parameters)
+
+
 @jinja2.contextfilter
 def generate_members(ctx, kernel_info, parameters_to_ignore=(), only_fields=False):
     ast = kernel_info.ast
@@ -382,14 +402,25 @@ def generate_destructor(kernel_info, class_name):
         return temporary_constructor.format(contents=contents, class_name=class_name)
 
 
+@jinja2.contextfilter
+def nested_class_method_definition_prefix(ctx, nested_class_name):
+    outer_class = ctx['class_name']
+    if len(nested_class_name) == 0:
+        return outer_class
+    else:
+        return outer_class + '::' + nested_class_name
+
+
 def add_pystencils_filters_to_jinja_env(jinja_env):
     jinja_env.filters['generate_definition'] = generate_definition
     jinja_env.filters['generate_declaration'] = generate_declaration
     jinja_env.filters['generate_members'] = generate_members
     jinja_env.filters['generate_constructor_parameters'] = generate_constructor_parameters
     jinja_env.filters['generate_constructor_initializer_list'] = generate_constructor_initializer_list
+    jinja_env.filters['generate_constructor_call_arguments'] = generate_constructor_call_arguments
     jinja_env.filters['generate_call'] = generate_call
     jinja_env.filters['generate_block_data_to_field_extraction'] = generate_block_data_to_field_extraction
     jinja_env.filters['generate_swaps'] = generate_swaps
     jinja_env.filters['generate_refs_for_kernel_parameters'] = generate_refs_for_kernel_parameters
     jinja_env.filters['generate_destructor'] = generate_destructor
+    jinja_env.filters['nested_class_method_definition_prefix'] = nested_class_method_definition_prefix
diff --git a/python/pystencils_walberla/templates/Boundary.tmpl.cpp b/python/pystencils_walberla/templates/Boundary.tmpl.cpp
index 33f799107..ef1ec3b7b 100644
--- a/python/pystencils_walberla/templates/Boundary.tmpl.cpp
+++ b/python/pystencils_walberla/templates/Boundary.tmpl.cpp
@@ -51,7 +51,11 @@ namespace {{namespace}} {
 #endif
 
 
-{{kernel|generate_definition}}
+{% for sweep_class, sweep_kernel in sweep_classes.items() %}
+
+{{sweep_kernel|generate_definition}}
+
+{% endfor %}
 
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
@@ -61,8 +65,10 @@ namespace {{namespace}} {
 #pragma pop
 #endif
 
+{% for sweep_class, sweep_kernel in sweep_classes.items() %}
+
 
-void {{class_name}}::run( IBlock * block, IndexVectors::Type type {% if target == 'gpu'%}, cudaStream_t stream {%endif%})
+void {{sweep_class|nested_class_method_definition_prefix}}::run( IBlock * block, IndexVectors::Type type {% if target == 'gpu'%}, cudaStream_t stream {%endif%})
 {
     auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
     int64_t indexVectorSize = int64_c( indexVectors->indexVector(type).size() );
@@ -77,25 +83,27 @@ void {{class_name}}::run( IBlock * block, IndexVectors::Type type {% if target =
 
     uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
 
-    {{kernel|generate_block_data_to_field_extraction(['indexVector', 'indexVectorSize'])|indent(4)}}
-    {{kernel|generate_call(spatial_shape_symbols=['indexVectorSize'], stream='stream')|indent(4)}}
+    {{sweep_kernel|generate_block_data_to_field_extraction(['indexVector', 'indexVectorSize'])|indent(4)}}
+    {{sweep_kernel|generate_refs_for_kernel_parameters(prefix='', parameters_to_ignore=['indexVectorSize'], ignore_fields=True)|indent(4) }}
+    {{sweep_kernel|generate_call(spatial_shape_symbols=['indexVectorSize'], stream='stream')|indent(4)}}
 }
 
-void {{class_name}}::operator() ( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} )
+void {{sweep_class|nested_class_method_definition_prefix}}::operator() ( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} )
 {
     run( block, IndexVectors::ALL{% if target == 'gpu'%}, stream {%endif%});
 }
 
-void {{class_name}}::inner( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} )
+void {{sweep_class|nested_class_method_definition_prefix}}::inner( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} )
 {
     run( block, IndexVectors::INNER{% if target == 'gpu'%}, stream {%endif%} );
 }
 
-void {{class_name}}::outer( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} )
+void {{sweep_class|nested_class_method_definition_prefix}}::outer( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} )
 {
     run( block, IndexVectors::OUTER{% if target == 'gpu'%}, stream {%endif%} );
 }
 
+{% endfor %}
 
 } // namespace {{namespace}}
 } // namespace walberla
diff --git a/python/pystencils_walberla/templates/Boundary.tmpl.h b/python/pystencils_walberla/templates/Boundary.tmpl.h
index c856adcea..c0a39c32b 100644
--- a/python/pystencils_walberla/templates/Boundary.tmpl.h
+++ b/python/pystencils_walberla/templates/Boundary.tmpl.h
@@ -17,13 +17,13 @@
 //! \\author pystencils
 //======================================================================================================================
 
-
 #include "core/DataTypes.h"
 
 {% if target is equalto 'cpu' -%}
 #include "field/GhostLayerField.h"
 {%- elif target is equalto 'gpu' -%}
 #include "cuda/GPUField.h"
+#include "cuda/FieldCopy.h"
 {%- endif %}
 #include "domain_decomposition/BlockDataID.h"
 #include "domain_decomposition/IBlock.h"
@@ -64,7 +64,7 @@ public:
             NUM_TYPES = 3
         };
 
-        IndexVectors() : cpuVectors_(NUM_TYPES)  {}
+        IndexVectors() = default;
         bool operator==(IndexVectors & other) { return other.cpuVectors_ == cpuVectors_; }
 
         {% if target == 'gpu' -%}
@@ -72,14 +72,14 @@ public:
             for( auto & gpuVec: gpuVectors_)
                 cudaFree( gpuVec );
         }
-        {% endif %}
+        {% endif -%}
 
         CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
         {{StructName}} * pointerCpu(Type t)  { return &(cpuVectors_[t][0]); }
 
         {% if target == 'gpu' -%}
         {{StructName}} * pointerGpu(Type t)  { return gpuVectors_[t]; }
-        {% endif %}
+        {% endif -%}
 
         void syncGPU()
         {
@@ -100,39 +100,66 @@ public:
         }
 
     private:
-        std::vector<CpuIndexVector> cpuVectors_;
+        std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
 
         {% if target == 'gpu' -%}
         using GpuIndexVector = {{StructName}} *;
         std::vector<GpuIndexVector> gpuVectors_;
-        {% endif %}
+        {%- endif %}
+    };
+
+    {% if multi_sweep %}
+    {% for sweep_class_name, sweep_kernel_info in sweep_classes.items() %}
+    class {{sweep_class_name}}
+    {
+        public:
+            {{sweep_class_name}} ( {{sweep_kernel_info|generate_constructor_parameters(['indexVectorSize'])}} )
+                : {{ sweep_kernel_info|generate_constructor_initializer_list(['indexVectorSize']) }} {};
+
+            void operator() ( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%});
+            void inner( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%});
+            void outer( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%});
+        
+        private:
+            void run( IBlock * block, IndexVectors::Type type{% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%});
+
+            {{sweep_kernel_info|generate_members(['indexVectorSize'])|indent(12)}}
     };
 
+    {{sweep_class_name}} get{{sweep_class_name}} ()
+    {
+        return {{sweep_class_name}} ( {{sweep_kernel_info|generate_constructor_call_arguments(['indexVectorSize'])|indent(12)}} );
+    }
+    {% endfor %}
+    {% endif %}
 
     {{class_name}}( const shared_ptr<StructuredBlockForest> & blocks,
-                   {{kernel|generate_constructor_parameters(['indexVector', 'indexVectorSize'])}} )
-        : {{ kernel|generate_constructor_initializer_list(['indexVector', 'indexVectorSize']) }}
+                   {{dummy_kernel_info|generate_constructor_parameters(['indexVector', 'indexVectorSize'])}}{{additional_data_handler.constructor_arguments}})
+        :{{additional_data_handler.initialiser_list}} {{ dummy_kernel_info|generate_constructor_initializer_list(['indexVector', 'indexVectorSize']) }}
     {
         auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
         indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_{{class_name}}");
     };
 
+    {% if not multi_sweep %}
+
     void operator() ( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%});
     void inner( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%});
     void outer( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%});
 
+    {% endif %}
 
     template<typename FlagField_T>
     void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
                             FlagUID boundaryFlagUID, FlagUID domainFlagUID)
     {
         for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
-            fillFromFlagField<FlagField_T>( &*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
+            fillFromFlagField<FlagField_T>({{additional_data_handler.additional_arguments_for_fill_function}}&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
     }
 
 
     template<typename FlagField_T>
-    void fillFromFlagField( IBlock * block, ConstBlockDataID flagFieldID,
+    void fillFromFlagField({{additional_data_handler.additional_parameters_for_fill_function}}IBlock * block, ConstBlockDataID flagFieldID,
                             FlagUID boundaryFlagUID, FlagUID domainFlagUID )
     {
         auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
@@ -140,8 +167,8 @@ public:
         auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
         auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
 
-
         auto * flagField = block->getData< FlagField_T > ( flagFieldID );
+        {{additional_data_handler.additional_field_data|indent(4)}}
 
         if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) ))
             return;
@@ -152,7 +179,6 @@ public:
         auto inner = flagField->xyzSize();
         inner.expand( cell_idx_t(-1) );
 
-
         indexVectorAll.clear();
         indexVectorInner.clear();
         indexVectorOuter.clear();
@@ -161,15 +187,15 @@ public:
         {
             if( ! isFlagSet(it, domainFlag) )
                 continue;
-
-            {%- for dirIdx, dirVec, offset in stencil_info %}
+            {%- for dirIdx, dirVec, offset in additional_data_handler.stencil_info %}
             if ( isFlagSet( it.neighbor({{offset}} {%if dim == 3%}, 0 {%endif %}), boundaryFlag ) )
             {
                 {% if inner_or_boundary -%}
                 auto element = {{StructName}}(it.x(), it.y(), {%if dim == 3%} it.z(), {%endif %} {{dirIdx}} );
                 {% else -%}
-                auto element = {{StructName}}(it.x() + cell_idx_c({{dirVec[0]}}), it.y() + cell_idx_c({{dirVec[1]}}), {%if dim == 3%} it.z() + cell_idx_c({{dirVec[2]}}), {%endif %} {{inverse_directions[dirIdx]}} );
+                auto element = {{StructName}}(it.x() + cell_idx_c({{dirVec[0]}}), it.y() + cell_idx_c({{dirVec[1]}}), {%if dim == 3%} it.z() + cell_idx_c({{dirVec[2]}}), {%endif %} {{additional_data_handler.inverse_directions[dirIdx]}} );
                 {% endif -%}
+                {{additional_data_handler.data_initialisation(dirIdx)|indent(16)}}
                 indexVectorAll.push_back( element );
                 if( inner.contains( it.x(), it.y(), it.z() ) )
                     indexVectorInner.push_back( element );
@@ -178,16 +204,18 @@ public:
             }
             {% endfor %}
         }
-
         indexVectors->syncGPU();
     }
 
 private:
+    {% if not multi_sweep %}
     void run( IBlock * block, IndexVectors::Type type{% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%});
+    {% endif %}
 
     BlockDataID indexVectorID;
+    {{additional_data_handler.additional_member_variable|indent(4)}}
 public:
-    {{kernel|generate_members(('indexVector', 'indexVectorSize'))|indent(4)}}
+    {{dummy_kernel_info|generate_members(('indexVector', 'indexVectorSize'))|indent(4)}}
 };
 
 
diff --git a/python/pystencils_walberla/templates/Sweep.tmpl.cpp b/python/pystencils_walberla/templates/Sweep.tmpl.cpp
index b3b12c6b3..b26d9c6db 100644
--- a/python/pystencils_walberla/templates/Sweep.tmpl.cpp
+++ b/python/pystencils_walberla/templates/Sweep.tmpl.cpp
@@ -59,7 +59,7 @@ void {{class_name}}::operator()( IBlock * block{%if target is equalto 'gpu'%} ,
 {
     {{kernel|generate_block_data_to_field_extraction|indent(4)}}
     {{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(4) }}
-    {{kernel|generate_call(stream='stream')|indent(4)}}
+    {{kernel|generate_call(ghost_layers_to_include=ghost_layers_to_include, stream='stream')|indent(4)}}
     {{kernel|generate_swaps|indent(4)}}
 }
 
diff --git a/src/lbm/field/QCriterion.h b/src/lbm/field/QCriterion.h
index eff9b8015..c68111778 100644
--- a/src/lbm/field/QCriterion.h
+++ b/src/lbm/field/QCriterion.h
@@ -28,36 +28,40 @@
 // You should never use these functions directly, always refer to the member functions
 // of PdfField or the free functions that can be found in MacroscopicValueCalculation.h
 
-namespace walberla {
-namespace lbm {
-
-struct QCriterion {
+namespace walberla
+{
+namespace lbm
+{
+struct QCriterion
+{
    template< typename VelocityField_T, typename Filter_T >
-   static inline real_t get(const VelocityField_T & velocityField, const Filter_T & filter,
-           const cell_idx_t x, const cell_idx_t y, const cell_idx_t z,
-           real_t dx = real_t(1), real_t dy = real_t(1), real_t dz = real_t(1)) {
+   static inline real_t get(const VelocityField_T& velocityField, const Filter_T& filter, const cell_idx_t x,
+                            const cell_idx_t y, const cell_idx_t z, real_t dx = real_t(1), real_t dy = real_t(1),
+                            real_t dz = real_t(1))
+   {
       const auto one = cell_idx_t(1);
 
-      if(filter(x,y,z) && filter(x+one,y,z) && filter(x-one,y,z) && filter(x,y+one,z)
-         && filter(x,y-one,z) && filter(x,y,z+one) && filter(x,y,z-one)) {
-         const Vector3<real_t> xa = velocityField.get(x+one,y,z);
-         const Vector3<real_t> xb = velocityField.get(x-one,y,z);
-         const Vector3<real_t> ya = velocityField.get(x,y+one,z);
-         const Vector3<real_t> yb = velocityField.get(x,y-one,z);
-         const Vector3<real_t> za = velocityField.get(x,y,z+one);
-         const Vector3<real_t> zb = velocityField.get(x,y,z-one);
+      auto f(velocityField.flattenedShallowCopy());
+
+      if (filter(x, y, z) && filter(x + one, y, z) && filter(x - one, y, z) && filter(x, y + one, z) &&
+          filter(x, y - one, z) && filter(x, y, z + one) && filter(x, y, z - one))
+      {
+         Vector3< real_t > xa(f->get(x + one, y, z, 0), f->get(x + one, y, z, 1), f->get(x + one, y, z, 2));
+         Vector3< real_t > xb(f->get(x - one, y, z, 0), f->get(x - one, y, z, 1), f->get(x - one, y, z, 2));
+         Vector3< real_t > ya(f->get(x, y + one, z, 0), f->get(x, y + one, z, 1), f->get(x, y + one, z, 2));
+         Vector3< real_t > yb(f->get(x, y - one, z, 0), f->get(x, y - one, z, 1), f->get(x, y - one, z, 2));
+         Vector3< real_t > za(f->get(x, y, z + one, 0), f->get(x, y, z + one, 1), f->get(x, y, z + one, 2));
+         Vector3< real_t > zb(f->get(x, y, z - one, 0), f->get(x, y, z - one, 1), f->get(x, y, z - one, 2));
 
          return calculate(xa, xb, ya, yb, za, zb, dx, dy, dz);
       }
-
       return real_t(0);
    }
 
-   static inline real_t calculate(const Vector3<real_t> xa, const Vector3<real_t> xb,
-           const Vector3<real_t> ya, const Vector3<real_t> yb,
-           const Vector3<real_t> za, const Vector3<real_t> zb,
-           const real_t dx, const real_t dy, const real_t dz) {
-
+   static inline real_t calculate(const Vector3< real_t > xa, const Vector3< real_t > xb, const Vector3< real_t > ya,
+                                  const Vector3< real_t > yb, const Vector3< real_t > za, const Vector3< real_t > zb,
+                                  const real_t dx, const real_t dy, const real_t dz)
+   {
       const auto halfInvDx = real_t(0.5) / dx;
       const auto halfInvDy = real_t(0.5) / dy;
       const auto halfInvDz = real_t(0.5) / dz;
@@ -75,18 +79,17 @@ struct QCriterion {
       const real_t duzdz = (za[2] - zb[2]) * halfInvDz;
 
       // Q = 1/2 * (||W||² - ||S||²)
-      real_t sNormSq = duxdx*duxdx + duydy*duydy + duzdz*duzdz +
-              real_t(0.5)*(duxdy+duydx)*(duxdy+duydx) + real_t(0.5)*(duydz+duzdy)*(duydz+duzdy) +
-              real_t(0.5)*(duxdz+duzdx)*(duxdz+duzdx);
+      real_t sNormSq = duxdx * duxdx + duydy * duydy + duzdz * duzdz + real_t(0.5) * (duxdy + duydx) * (duxdy + duydx) +
+                       real_t(0.5) * (duydz + duzdy) * (duydz + duzdy) +
+                       real_t(0.5) * (duxdz + duzdx) * (duxdz + duzdx);
 
-      real_t omegaNormSq = real_t(0.5)*(duxdz-duzdx)*(duxdz-duzdx) +
-              real_t(0.5)*(duxdy-duydx)*(duxdy-duydx) +
-              real_t(0.5)*(duydz-duzdy)*(duydz-duzdy);
+      real_t omegaNormSq = real_t(0.5) * (duxdz - duzdx) * (duxdz - duzdx) +
+                           real_t(0.5) * (duxdy - duydx) * (duxdy - duydx) +
+                           real_t(0.5) * (duydz - duzdy) * (duydz - duzdy);
 
       return real_t(0.5) * (omegaNormSq - sNormSq);
    }
 };
 
-
 } // namespace lbm
 } // namespace walberla
diff --git a/src/timeloop/SweepTimeloop.cpp b/src/timeloop/SweepTimeloop.cpp
index 7fc72a38a..15b407191 100644
--- a/src/timeloop/SweepTimeloop.cpp
+++ b/src/timeloop/SweepTimeloop.cpp
@@ -54,10 +54,8 @@ void SweepTimeloop::doTimeStep(const Set<SUID> &selectors)
          }
 
          Sweep * selectedSweep = s.sweep.getUnique( selectors + bi->getState() );
-         if( !selectedSweep )
-            WALBERLA_ABORT("Selecting Sweep " << sweepIt->first << ": " <<
-                           "Ambiguous, or no sweep selected. Check your selector " <<
-                            selectors + bi->getState() << std::endl << s.sweep);
+         if (!selectedSweep)
+            continue;
 
          WALBERLA_LOG_PROGRESS_SECTION()
          {
@@ -111,9 +109,7 @@ void SweepTimeloop::doTimeStep(const Set<SUID> &selectors, WcTimingPool &timing)
          Sweep * selectedSweep = s.sweep.getUnique( selectors + bi->getState(), sweepName );
 
          if( !selectedSweep )
-            WALBERLA_ABORT("Selecting Sweep " << sweepIt->first << ": " <<
-                           "Ambiguous, or no sweep selected. Check your selector " <<
-                            selectors + bi->getState() << std::endl << s.sweep);
+            continue;
 
          WALBERLA_LOG_PROGRESS("Running sweep \"" << sweepName << "\" on block " << bi->getId() );
 
diff --git a/tests/lbm/CMakeLists.txt b/tests/lbm/CMakeLists.txt
index b17f65cfa..a2ddd6690 100644
--- a/tests/lbm/CMakeLists.txt
+++ b/tests/lbm/CMakeLists.txt
@@ -76,6 +76,21 @@ waLBerla_execute_test( NAME QCriterionTest )
 
 # Code Generation
 if( WALBERLA_BUILD_WITH_CODEGEN )
+add_subdirectory(codegen)
+
+waLBerla_generate_target_from_python(NAME GeneratedOutflowBCGenerated
+        FILE codegen/GeneratedOutflowBC.py
+        OUT_FILES GeneratedOutflowBC_Sweep.cpp GeneratedOutflowBC_Sweep.h
+        GeneratedOutflowBC_MacroSetter.cpp GeneratedOutflowBC_MacroSetter.h
+        GeneratedOutflowBC_Dynamic_UBB.cpp GeneratedOutflowBC_Dynamic_UBB.h
+        GeneratedOutflowBC_Static_UBB.cpp GeneratedOutflowBC_Static_UBB.h
+        GeneratedOutflowBC_NoSlip.cpp GeneratedOutflowBC_NoSlip.h
+        GeneratedOutflowBC_Outflow.cpp GeneratedOutflowBC_Outflow.h
+        GeneratedOutflowBC_PackInfo.cpp GeneratedOutflowBC_PackInfo.h
+        GeneratedOutflowBC_InfoHeader.h)
+waLBerla_compile_test( FILES codegen/GeneratedOutflowBC.cpp DEPENDS GeneratedOutflowBCGenerated)
+waLBerla_execute_test( NAME GeneratedOutflowBC COMMAND $<TARGET_FILE:GeneratedOutflowBC> ${CMAKE_CURRENT_SOURCE_DIR}/codegen/GeneratedOutflowBC.prm  )
+
 
 waLBerla_generate_target_from_python(NAME LbCodeGenerationExampleGenerated
       FILE codegen/LbCodeGenerationExample.py
@@ -95,4 +110,13 @@ waLBerla_generate_target_from_python(NAME FieldLayoutAndVectorizationTestGenerat
                                                FieldLayoutAndVectorizationTest_ZYXF_NoVec_LatticeModel.cpp FieldLayoutAndVectorizationTest_ZYXF_NoVec_LatticeModel.h )
 waLBerla_compile_test( FILES codegen/FieldLayoutAndVectorizationTest.cpp DEPENDS FieldLayoutAndVectorizationTestGenerated)
 
-endif()
\ No newline at end of file
+waLBerla_generate_target_from_python(NAME LbmPackInfoGenerationTestCodegen FILE codegen/LbmPackInfoGenerationTest.py
+                                     OUT_FILES AccessorBasedPackInfoEven.cpp AccessorBasedPackInfoEven.h
+                                               AccessorBasedPackInfoOdd.cpp AccessorBasedPackInfoOdd.h
+                                               FromKernelPackInfoPull.cpp FromKernelPackInfoPull.h
+                                               FromKernelPackInfoPush.cpp FromKernelPackInfoPush.h)
+
+waLBerla_link_files_to_builddir( "diff_packinfos.sh" )
+waLBerla_execute_test( NAME LbmPackInfoGenerationDiffTest COMMAND bash diff_packinfos.sh )
+
+endif()
diff --git a/tests/lbm/codegen/CMakeLists.txt b/tests/lbm/codegen/CMakeLists.txt
new file mode 100644
index 000000000..b149eda3f
--- /dev/null
+++ b/tests/lbm/codegen/CMakeLists.txt
@@ -0,0 +1,8 @@
+#############################################################################################################################
+#
+# Tests for lbm module
+#
+#############################################################################################################################
+
+waLBerla_link_files_to_builddir( "*.prm" )
+waLBerla_link_files_to_builddir( "ChannelFlowCodeGenParameter.py" )
\ No newline at end of file
diff --git a/tests/lbm/codegen/GeneratedOutflowBC.cpp b/tests/lbm/codegen/GeneratedOutflowBC.cpp
new file mode 100644
index 000000000..66311c7eb
--- /dev/null
+++ b/tests/lbm/codegen/GeneratedOutflowBC.cpp
@@ -0,0 +1,194 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file GeneratedOutflowBC.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//! \brief Shear flow with dynamic UBB on the left (W) with linear flow profile from zero to u_max (flow in x-direction)
+//!        along the y-direction. The upper wall (N) is a static UBB which applies u_max in x-direction.
+//!        On the right (E) an outflow boundary is used and the lower wall (S) is a No-Slip wall.
+//!        It is tested if the shear flow is correctly propagated through the entire domain after n timesteps.
+//
+//======================================================================================================================
+#include "blockforest/Initialization.h"
+#include "blockforest/communication/UniformBufferedScheme.h"
+
+#include "core/Environment.h"
+#include "core/timing/RemainingTimeLogger.h"
+
+#include "field/AddToStorage.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "geometry/InitBoundaryHandling.h"
+#include "timeloop/SweepTimeloop.h"
+
+// Generated Files
+#include "GeneratedOutflowBC_Dynamic_UBB.h"
+#include "GeneratedOutflowBC_InfoHeader.h"
+#include "GeneratedOutflowBC_MacroSetter.h"
+#include "GeneratedOutflowBC_NoSlip.h"
+#include "GeneratedOutflowBC_Outflow.h"
+#include "GeneratedOutflowBC_PackInfo.h"
+#include "GeneratedOutflowBC_Static_UBB.h"
+#include "GeneratedOutflowBC_Sweep.h"
+
+using namespace walberla;
+
+using PackInfo_T  = lbm::GeneratedOutflowBC_PackInfo;
+using flag_t      = walberla::uint8_t;
+using FlagField_T = FlagField< flag_t >;
+
+auto pdfFieldAdder = [](IBlock* const block, StructuredBlockStorage* const storage) {
+   return new PdfField_T(storage->getNumberOfXCells(*block), storage->getNumberOfYCells(*block),
+                         storage->getNumberOfZCells(*block), uint_t(1), field::fzyx,
+                         make_shared< field::AllocateAligned< real_t, 64 > >());
+};
+
+////////////////////////////////////////////
+// Linear Velocity Profile for left wall //
+//////////////////////////////////////////
+
+class ShearProfile
+{
+ public:
+
+   ShearProfile( real_t inflow_velocity ) :
+      inflow_velocity_( inflow_velocity ) {}
+
+   Vector3< real_t > operator()( const Cell& pos, const shared_ptr< StructuredBlockForest >& SbF, IBlock& block ) const;
+
+ private:
+
+   const real_t inflow_velocity_;
+}; // class ShearProfile
+
+Vector3< real_t > ShearProfile::operator()( const Cell& pos, const shared_ptr< StructuredBlockForest >& SbF, IBlock& block ) const
+{
+   Cell globalCell;
+   CellInterval domain = SbF->getDomainCellBB();
+   real_t h_y          = domain.yMax() - domain.yMin();
+   SbF->transformBlockLocalToGlobalCell(globalCell, block, pos);
+
+   real_t u = inflow_velocity_ * (globalCell[1] / h_y);
+
+   Vector3< real_t > result(u, 0.0, 0.0);
+   return result;
+}
+
+//////////
+// MAIN //
+//////////
+
+int main(int argc, char** argv)
+{
+   walberla::Environment walberlaEnv(argc, argv);
+
+   auto blocks = blockforest::createUniformBlockGridFromConfig(walberlaEnv.config());
+
+   // read parameters
+   auto parameters = walberlaEnv.config()->getOneBlock("Parameters");
+
+   const real_t omega     = parameters.getParameter< real_t >("omega", real_c(1.4));
+   const real_t u_max     = parameters.getParameter< real_t >("u_max", real_t(0.05));
+   const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(10));
+
+   const double remainingTimeLoggerFrequency =
+      parameters.getParameter< double >("remainingTimeLoggerFrequency", 3.0); // in seconds
+
+   // create fields
+   BlockDataID pdfFieldID     = blocks->addStructuredBlockData< PdfField_T >(pdfFieldAdder, "PDFs");
+   BlockDataID velFieldID     = field::addToStorage< VelocityField_T >(blocks, "velocity", real_t(0), field::fzyx);
+   BlockDataID densityFieldID = field::addToStorage< ScalarField_T >(blocks, "density", real_t(0), field::fzyx);
+
+   BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
+
+   pystencils::GeneratedOutflowBC_MacroSetter setterSweep(pdfFieldID, velFieldID);
+   for (auto& block : *blocks)
+      setterSweep(&block);
+
+   // create and initialize boundary handling
+   const FlagUID fluidFlagUID("Fluid");
+
+   auto boundariesConfig = walberlaEnv.config()->getOneBlock("Boundaries");
+
+   ShearProfile velocityCallback{u_max};
+   std::function< Vector3< real_t >(const Cell&, const shared_ptr< StructuredBlockForest >&, IBlock&) >
+      velocity_initialisation = velocityCallback;
+
+   lbm::GeneratedOutflowBC_Dynamic_UBB ubb_dynamic(blocks, pdfFieldID, velocity_initialisation);
+   lbm::GeneratedOutflowBC_Static_UBB ubb_static(blocks, pdfFieldID, u_max);
+   lbm::GeneratedOutflowBC_NoSlip noSlip(blocks, pdfFieldID);
+   lbm::GeneratedOutflowBC_Outflow outflow(blocks, pdfFieldID);
+
+   geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldId, boundariesConfig);
+   geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldId, fluidFlagUID);
+
+   ubb_dynamic.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("UBB_Inflow"), fluidFlagUID);
+   ubb_static.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("UBB_Wall"), fluidFlagUID);
+   noSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("NoSlip"), fluidFlagUID);
+   outflow.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("Outflow"), fluidFlagUID);
+
+   // create time loop
+   SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps);
+
+   // create communication for PdfField
+   blockforest::communication::UniformBufferedScheme< Stencil_T > communication(blocks);
+   communication.addPackInfo(make_shared< PackInfo_T >(pdfFieldID));
+
+   pystencils::GeneratedOutflowBC_Sweep UpdateSweep(densityFieldID, pdfFieldID, velFieldID, omega);
+
+   // add LBM sweep and communication to time loop
+   timeloop.add() << BeforeFunction(communication, "communication") << Sweep(noSlip, "noSlip boundary");
+   timeloop.add() << Sweep(ubb_dynamic, "ubb inflow");
+   timeloop.add() << Sweep(ubb_static, "ubb wall");
+   timeloop.add() << Sweep(outflow, "outflow boundary");
+   timeloop.add() << Sweep(UpdateSweep, "LB stream & collide");
+
+   // log remaining time
+   timeloop.addFuncAfterTimeStep(timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency),
+                                 "remaining time logger");
+
+   // VTK Writer
+   uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
+   if (vtkWriteFrequency > 0)
+   {
+      auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "GeneratedOutflowBC_VTK", vtkWriteFrequency, 0, false,
+                                                      "vtk_out", "simulation_step", false, true, true, false, 0);
+
+      auto velWriter     = make_shared< field::VTKWriter< VelocityField_T > >(velFieldID, "velocity");
+      auto densityWriter = make_shared< field::VTKWriter< ScalarField_T > >(densityFieldID, "density");
+
+      vtkOutput->addCellDataWriter(velWriter);
+      vtkOutput->addCellDataWriter(densityWriter);
+
+      timeloop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
+   }
+   timeloop.run();
+
+   CellInterval domain = blocks->getDomainCellBB();
+   real_t h_y          = domain.yMax() - domain.yMin();
+   for (auto& block : *blocks)
+   {
+      auto velField = block.getData<VelocityField_T>(velFieldID);
+      WALBERLA_FOR_ALL_CELLS_XYZ
+      (
+         velField,
+   Cell globalCell;
+         blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
+         WALBERLA_CHECK_FLOAT_EQUAL_EPSILON(velField->get(x, y, z, 0), u_max * (globalCell[1] / h_y), 0.01)
+      )
+   }
+
+   return EXIT_SUCCESS;
+}
diff --git a/tests/lbm/codegen/GeneratedOutflowBC.prm b/tests/lbm/codegen/GeneratedOutflowBC.prm
new file mode 100644
index 000000000..b53043d9b
--- /dev/null
+++ b/tests/lbm/codegen/GeneratedOutflowBC.prm
@@ -0,0 +1,24 @@
+
+Parameters 
+{
+	omega           1.8;
+	timesteps       500;
+	u_max           0.05;
+	vtkWriteFrequency 0;
+	remainingTimeLoggerFrequency 3; // in seconds
+}
+
+DomainSetup
+{
+   blocks        <  1,    1, 1 >;
+   cellsPerBlock <  30, 30, 1 >;
+   periodic      <  0,    0, 0 >;
+}
+
+Boundaries 
+{
+	Border { direction W;    walldistance -1;  flag UBB_Inflow; }
+	Border { direction E;    walldistance -1;  flag Outflow; }
+    Border { direction S;    walldistance -1;  flag NoSlip; }
+    Border { direction N;    walldistance -1;  flag UBB_Wall; }
+}
diff --git a/tests/lbm/codegen/GeneratedOutflowBC.py b/tests/lbm/codegen/GeneratedOutflowBC.py
new file mode 100644
index 000000000..59a916fec
--- /dev/null
+++ b/tests/lbm/codegen/GeneratedOutflowBC.py
@@ -0,0 +1,92 @@
+from pystencils.field import fields
+from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
+from lbmpy.stencils import get_stencil
+from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule
+from lbmpy.boundaries import NoSlip, UBB, ExtrapolationOutflow
+from lbmpy_walberla.additional_data_handler import UBBAdditionalDataHandler, OutflowAdditionalDataHandler
+from pystencils_walberla import CodeGeneration, generate_sweep
+from lbmpy_walberla import RefinementScaling, generate_boundary, generate_lb_pack_info
+
+import sympy as sp
+
+stencil = get_stencil("D2Q9")
+q = len(stencil)
+dim = len(stencil[0])
+
+pdfs, pdfs_tmp = fields(f"pdfs({q}), pdfs_tmp({q}): double[{dim}D]", layout='fzyx')
+velocity_field, density_field = fields(f"velocity({dim}), density(1) : double[{dim}D]", layout='fzyx')
+omega = sp.Symbol("omega")
+u_max = sp.Symbol("u_max")
+
+output = {
+    'density': density_field,
+    'velocity': velocity_field
+}
+
+options = {'method': 'cumulant',
+           'stencil': stencil,
+           'relaxation_rate': omega,
+           'galilean_correction': len(stencil) == 27,
+           'field_name': 'pdfs',
+           'output': output,
+           'optimization': {'symbolic_field': pdfs,
+                            'symbolic_temporary_field': pdfs_tmp,
+                            'cse_global': False,
+                            'cse_pdfs': False}}
+
+method = create_lb_method(**options)
+
+# getter & setter
+setter_assignments = macroscopic_values_setter(method, velocity=velocity_field.center_vector,
+                                               pdfs=pdfs, density=1)
+
+# opt = {'instruction_set': 'sse', 'assume_aligned': True, 'nontemporal': False, 'assume_inner_stride_one': True}
+
+update_rule = create_lb_update_rule(lb_method=method, **options)
+
+info_header = f"""
+using namespace walberla;
+#include "stencil/D{dim}Q{q}.h"
+using Stencil_T = walberla::stencil::D{dim}Q{q};
+using PdfField_T = GhostLayerField<real_t, {q}>;
+using VelocityField_T = GhostLayerField<real_t, {dim}>;
+using ScalarField_T = GhostLayerField<real_t, 1>;
+    """
+
+stencil = method.stencil
+
+with CodeGeneration() as ctx:
+    # sweeps
+    generate_sweep(ctx, 'GeneratedOutflowBC_Sweep', update_rule, field_swaps=[(pdfs, pdfs_tmp)])
+    generate_sweep(ctx, 'GeneratedOutflowBC_MacroSetter', setter_assignments)
+
+    # boundaries
+    ubb_dynamic = UBB(lambda *args: None, dim=dim)
+    ubb_data_handler = UBBAdditionalDataHandler(stencil, ubb_dynamic)
+
+    if dim == 2:
+        ubb_static = UBB([sp.Symbol("u_max"), 0])
+    else:
+        ubb_static = UBB([sp.Symbol("u_max"), 0, 0])
+
+    outflow = ExtrapolationOutflow(stencil[4], method)
+    outflow_data_handler = OutflowAdditionalDataHandler(stencil, outflow)
+
+    # Dynamic UBB which is used to produce a specific velocity profile at the inflow.
+    # Note that the additional data handler is needed for that kind of boundary.
+    generate_boundary(ctx, 'GeneratedOutflowBC_Dynamic_UBB', ubb_dynamic, method,
+                      additional_data_handler=ubb_data_handler)
+
+    # Static UBB which is used to apply a certain velocity u_max at the upper wall in x-direction
+    generate_boundary(ctx, 'GeneratedOutflowBC_Static_UBB', ubb_static, method)
+
+    generate_boundary(ctx, 'GeneratedOutflowBC_NoSlip', NoSlip(), method)
+
+    generate_boundary(ctx, 'GeneratedOutflowBC_Outflow', outflow, method,
+                      additional_data_handler=outflow_data_handler)
+
+    # communication
+    generate_lb_pack_info(ctx, 'GeneratedOutflowBC_PackInfo', stencil, pdfs)
+
+    # Info header containing correct template definitions for stencil and field
+    ctx.write_file("GeneratedOutflowBC_InfoHeader.h", info_header)
diff --git a/tests/lbm/codegen/LbmPackInfoGenerationTest.py b/tests/lbm/codegen/LbmPackInfoGenerationTest.py
new file mode 100644
index 000000000..5fab9c7ab
--- /dev/null
+++ b/tests/lbm/codegen/LbmPackInfoGenerationTest.py
@@ -0,0 +1,29 @@
+from lbmpy.creationfunctions import create_lb_collision_rule, create_lb_update_rule
+from lbmpy.advanced_streaming import Timestep
+from lbmpy.stencils import get_stencil
+from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel
+from lbmpy_walberla.packinfo import generate_lb_pack_info
+from pystencils.field import Field
+
+with CodeGeneration() as ctx:
+    streaming_pattern = 'aa'
+    target = 'cpu'
+    stencil = get_stencil('D3Q19')
+    dim = len(stencil[0])
+    values_per_cell = len(stencil)
+    collision_rule = create_lb_collision_rule(method='srt', stencil=stencil)
+    pdf_field = Field.create_generic('pdfs', dim, index_shape=(values_per_cell,), layout='fzyx')
+    optimization = {
+        'symbolic_field': pdf_field,
+        'target': target
+    }
+
+    #   Generate PackInfo specifically for streaming pattern
+    generate_lb_pack_info(ctx, 'AccessorBasedPackInfo', stencil, pdf_field,
+                          streaming_pattern=streaming_pattern, target=target, namespace='pystencils')
+
+    #   Generate reference using the alternating pull/push approach
+    update_rule_odd = create_lb_update_rule(collision_rule=collision_rule, optimization=optimization,
+                                            streaming_pattern=streaming_pattern, timestep=Timestep.ODD)
+    generate_pack_info_from_kernel(ctx, 'FromKernelPackInfoPull', update_rule_odd, kind='pull', target=target)
+    generate_pack_info_from_kernel(ctx, 'FromKernelPackInfoPush', update_rule_odd, kind='push', target=target)
diff --git a/tests/lbm/diff_packinfos.sh b/tests/lbm/diff_packinfos.sh
new file mode 100755
index 000000000..bfa89c5ef
--- /dev/null
+++ b/tests/lbm/diff_packinfos.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+REGEX='^((#include)|(void)|(uint_t))'
+cd default_codegen
+diff -u -B <(grep -vP "$REGEX" FromKernelPackInfoPull.cpp)  <(grep -vP "$REGEX" AccessorBasedPackInfoEven.cpp) || exit 1
+diff -u -B <(grep -vP "$REGEX" FromKernelPackInfoPush.cpp)  <(grep -vP "$REGEX" AccessorBasedPackInfoOdd.cpp) || exit 1
diff --git a/tests/timeloop/CMakeLists.txt b/tests/timeloop/CMakeLists.txt
index 265f266fd..862c3b648 100644
--- a/tests/timeloop/CMakeLists.txt
+++ b/tests/timeloop/CMakeLists.txt
@@ -5,5 +5,5 @@
 ###################################################################################################
 
 
-#waLBerla_compile_test( FILES TimeloopAndSweepRegister.cpp DEPENDS field blockforest )
-#waLBerla_execute_test(NAME TimeloopAndSweepRegister )
+waLBerla_compile_test( FILES TimeloopAndSweepRegister.cpp DEPENDS field blockforest )
+waLBerla_execute_test(NAME TimeloopAndSweepRegister )
diff --git a/tests/timeloop/TimeloopAndSweepRegister.cpp b/tests/timeloop/TimeloopAndSweepRegister.cpp
index 028e5fad7..944210821 100644
--- a/tests/timeloop/TimeloopAndSweepRegister.cpp
+++ b/tests/timeloop/TimeloopAndSweepRegister.cpp
@@ -1,175 +1,129 @@
 //======================================================================================================================
 //
-//  This file is part of waLBerla. waLBerla is free software: you can 
+//  This file is part of waLBerla. waLBerla is free software: you can
 //  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
+//  License as published by the Free Software Foundation, either version 3 of
 //  the License, or (at your option) any later version.
-//  
-//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 //  for more details.
-//  
+//
 //  You should have received a copy of the GNU General Public License along
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
 //! \file TimeloopAndSweepRegister.cpp
 //! \ingroup timeloop
-//! \author Martin Bauer <martin.bauer@fau.de>
+//! \author Markus Holzer <markus.holzer@fau.de>
 //! \brief test cases that test the registering of Sweeps at timeloop
 //
 //======================================================================================================================
 
-#include "timeloop/SweepTimeloop.h"
+#include "blockforest/Initialization.h"
+
 #include "core/DataTypes.h"
+#include "core/Environment.h"
 #include "core/debug/TestSubsystem.h"
-#include "core/logging/Logging.h"
+
+#include "field/Field.h"
+
+#include "timeloop/SweepTimeloop.h"
 
 #include <string>
 #include <vector>
 
-
-using namespace std;
 using namespace walberla;
 
+using Field_T = Field< uint_t, 1 >;
+
+auto FieldAdder = [](IBlock* const block, StructuredBlockStorage* const storage) {
+   return new Field_T(storage->getNumberOfXCells(*block), storage->getNumberOfYCells(*block),
+                      storage->getNumberOfZCells(*block), uint_t(0.0), field::fzyx,
+                      make_shared< field::AllocateAligned< uint_t, 64 > >());
+};
 
-class GeneralSweep
+class Sweep1
 {
-   public:
-      GeneralSweep(const string & name, vector<string> & vec)
-         : myName(name), outVec(vec)
-      {}
+ public:
+   Sweep1(BlockDataID fieldID) : fieldID_(fieldID) {}
 
+   void operator()(IBlock* block)
+   {
+      auto field = block->getData< Field_T >(fieldID_);
 
-      void operator() (IBlock * ){
-         outVec.push_back(myName);
-      }
+      for (auto iter = field->begin(); iter != field->end(); ++iter)
+         *iter += 1;
+   }
 
-   private:
-      string myName;
-      vector<string> & outVec;
+ private:
+   BlockDataID fieldID_;
 };
 
-
-class GeneralFunction
+class Sweep2
 {
-   public:
-      GeneralFunction(const string & name, vector<string> & vec)
-      : myName(name), outVec(vec)
-      {}
+ public:
+   Sweep2(BlockDataID fieldID) : fieldID_(fieldID) {}
 
-      void operator() (){
-         outVec.push_back(myName);
-      }
-
-   private:
-      string myName;
-      vector<string> & outVec;
-};
+   void operator()(IBlock* block)
+   {
+      auto field = block->getData< Field_T >(fieldID_);
 
+      for (auto iter = field->begin(); iter != field->end(); ++iter)
+         *iter += 2;
+   }
 
+ private:
+   BlockDataID fieldID_;
+};
 
-int main()
+int main(int argc, char** argv)
 {
    debug::enterTestMode();
+   mpi::Environment env(argc, argv);
 
-   vector<string> expectedSequence;
-   vector<string> sequence;
-
-   SUID cpuSelect("CPU");
-   SUID gpuSelect("GPU");
-
-   SUID srtSelect("SRT");
-   SUID mrtSelect("MRT");
-
-
-   //FIXME put a real test in here
-   shared_ptr<SweepTimeloop> tl = make_shared<SweepTimeloop>(shared_ptr<BlockStorage>(),100);
-
-   typedef SweepTimeloop::SweepHandle SH;
-
-   SH sweep1 =  tl->addSweep(        GeneralSweep("CPU1",sequence), cpuSelect);
-                tl->addSweep(sweep1, GeneralSweep("GPU1",sequence), gpuSelect);
-
-   SH sweep2 =  tl->addSweep(        GeneralSweep("CPU2",sequence), cpuSelect);
-                tl->addSweep(sweep2, GeneralSweep("GPU2",sequence), gpuSelect);
+   std::vector<std::string> expectedSequence;
+   std::vector<std::string> sequence;
 
+   SUID sweepSelect1("Sweep1");
+   SUID sweepSelect2("Sweep2");
 
-   tl->addFuncBeforeSweep(sweep1,GeneralFunction("Pre1",sequence));
-   tl->addFuncAfterSweep (sweep1,GeneralFunction("Post1",sequence));
+   shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid(
+      uint_c(4), uint_c(2), uint_c(2), uint_c(10), uint_c(10), uint_c(10), real_c(1), false, false, false, false);
 
-   tl->addFuncBeforeSweep(sweep2,GeneralFunction("Pre2",sequence));
-   tl->addFuncAfterSweep (sweep2,GeneralFunction("Post2",sequence));
+   BlockDataID fieldID = blocks->addStructuredBlockData< Field_T >(FieldAdder, "Test Field");
 
-   typedef Timeloop::FctHandle FH;
-   FH preTs  = tl->addFuncBeforeTimeStep(       GeneralFunction("PreTimestepCPU",sequence),cpuSelect,srtSelect);
-               tl->addFuncBeforeTimeStep(preTs, GeneralFunction("PreTimestepGPU",sequence),gpuSelect,srtSelect);
-   FH postTs = tl->addFuncAfterTimeStep (       GeneralFunction("PostTimestepCPU",sequence),cpuSelect,mrtSelect);
-               tl->addFuncAfterTimeStep (postTs,GeneralFunction("PostTimestepGPU",sequence),gpuSelect,mrtSelect);
+   for (auto& block : *blocks)
+   {
+      if (block.getAABB().min()[0] < 20)
+         block.setState(sweepSelect1);
+      else
+         block.setState(sweepSelect2);
+   }
 
+   uint_t timesteps = 10;
+   SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps);
 
-   //----------  First Run - CPU Selector ---------------------------------------------
+   timeloop.add() << Sweep(Sweep1(fieldID), "Sweep 1", sweepSelect1, sweepSelect2);
+   timeloop.add() << Sweep(Sweep2(fieldID), "Sweep 2", sweepSelect2, sweepSelect1);
 
-   expectedSequence.push_back("PreTimestepCPU");
-   expectedSequence.push_back("Pre1");
-   expectedSequence.push_back("CPU1");
-   expectedSequence.push_back("Post1");
-   expectedSequence.push_back("Pre2");
-   expectedSequence.push_back("CPU2");
-   expectedSequence.push_back("Post2");
-   expectedSequence.push_back("PostTimestepCPU");
+   WcTimingPool timingPool;
 
-   tl->singleStep(cpuSelect);
-
-   WALBERLA_CHECK_EQUAL(expectedSequence.size(), sequence.size());
-   WALBERLA_CHECK( equal(expectedSequence.begin(),expectedSequence.end(), sequence.begin() ) );
-
-   expectedSequence.clear();
-   sequence.clear();
-
-
-   // ------------ Second Run - GPU Selector -------------------------------------------
-
-   expectedSequence.push_back("PreTimestepGPU");
-   expectedSequence.push_back("Pre1");
-   expectedSequence.push_back("GPU1");
-   expectedSequence.push_back("Post1");
-   expectedSequence.push_back("Pre2");
-   expectedSequence.push_back("GPU2");
-   expectedSequence.push_back("Post2");
-   expectedSequence.push_back("PostTimestepGPU");
-
-   tl->singleStep(gpuSelect);
-
-   WALBERLA_CHECK_EQUAL(expectedSequence.size(), sequence.size());
-   WALBERLA_CHECK( equal(expectedSequence.begin(),expectedSequence.end(), sequence.begin() ) );
-
-   expectedSequence.clear();
-   sequence.clear();
-
-
-   // ------------ Second Run - GPU and SRT    -------------------------------------------
-
-
-   expectedSequence.push_back("Pre1");
-   expectedSequence.push_back("GPU1");
-   expectedSequence.push_back("Post1");
-   expectedSequence.push_back("Pre2");
-   expectedSequence.push_back("GPU2");
-   expectedSequence.push_back("Post2");
-   expectedSequence.push_back("PostTimestepGPU");
-
-   tl->singleStep(gpuSelect + srtSelect);
-
-   WALBERLA_CHECK_EQUAL(expectedSequence.size(), sequence.size());
-   WALBERLA_CHECK( equal(expectedSequence.begin(),expectedSequence.end(), sequence.begin() ) );
-
-   expectedSequence.clear();
-   sequence.clear();
-
-
-   return 0;
+   timeloop.run(timingPool);
+   for (auto& block : *blocks)
+   {
+      auto field = block.getData< Field_T >(fieldID);
+      if (block.getAABB().min()[0] < 20)
+      {
+         for (auto iter = field->begin(); iter != field->end(); ++iter)
+            WALBERLA_CHECK_EQUAL(*iter, timesteps)
+      }
+      else
+      {
+         for (auto iter = field->begin(); iter != field->end(); ++iter)
+            WALBERLA_CHECK_EQUAL(*iter, timesteps * 2)
+      }
+   }
+   
+   return EXIT_SUCCESS;
 }
-
-
-
-- 
GitLab