diff --git a/.clang-tidy b/.clang-tidy
index 7127535c8e66a377978897492b26cb954201dc20..f0e5933ad55dc18c06f14f2c6ef06dc3226eda22 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -8,6 +8,7 @@ boost-*,
 bugprone-*,
 -bugprone-branch-clone,
 -bugprone-exception-escape,
+-bugprone-easily-swappable-parameters,
 
 misc-*,
 -misc-misplaced-const,
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index a924b0fb33ec4aed389d23f8039fc5fbbde170c0..7a9ba191330df560d19193b583226e268feedca7 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -413,7 +413,7 @@ gcc_8_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -437,7 +437,7 @@ gcc_8_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -459,7 +459,7 @@ gcc_8_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -480,7 +480,7 @@ gcc_8_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -505,7 +505,7 @@ gcc_8_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -528,7 +528,7 @@ gcc_8_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -550,7 +550,7 @@ gcc_8_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -575,7 +575,7 @@ gcc_9_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -599,7 +599,7 @@ gcc_9_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -621,7 +621,7 @@ gcc_9_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -642,7 +642,7 @@ gcc_9_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -667,7 +667,7 @@ gcc_9_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -690,7 +690,7 @@ gcc_9_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -712,7 +712,7 @@ gcc_9_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -737,7 +737,7 @@ gcc_10_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -761,7 +761,7 @@ gcc_10_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -783,7 +783,7 @@ gcc_10_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -804,7 +804,7 @@ gcc_10_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -829,7 +829,7 @@ gcc_10_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -852,7 +852,7 @@ gcc_10_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -874,7 +874,7 @@ gcc_10_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -899,7 +899,7 @@ gcc_11_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -923,7 +923,7 @@ gcc_11_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -945,7 +945,7 @@ gcc_11_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -966,7 +966,7 @@ gcc_11_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -991,7 +991,7 @@ gcc_11_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1014,7 +1014,7 @@ gcc_11_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1036,7 +1036,7 @@ gcc_11_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1341,7 +1341,7 @@ clang_11.0_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1365,7 +1365,7 @@ clang_11.0_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1387,7 +1387,7 @@ clang_11.0_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1408,7 +1408,7 @@ clang_11.0_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1433,7 +1433,7 @@ clang_11.0_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1456,7 +1456,7 @@ clang_11.0_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1478,7 +1478,7 @@ clang_11.0_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1503,7 +1503,7 @@ clang_12.0_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1527,7 +1527,7 @@ clang_12.0_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1549,7 +1549,7 @@ clang_12.0_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1570,7 +1570,7 @@ clang_12.0_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1595,7 +1595,7 @@ clang_12.0_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1618,7 +1618,7 @@ clang_12.0_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1640,7 +1640,7 @@ clang_12.0_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1665,7 +1665,7 @@ clang_13.0_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1689,7 +1689,7 @@ clang_13.0_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1711,7 +1711,7 @@ clang_13.0_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1729,7 +1729,7 @@ clang_13.0_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1751,7 +1751,7 @@ clang_13.0_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1771,7 +1771,7 @@ clang_13.0_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1791,7 +1791,7 @@ clang_13.0_hybrid_dbg_sp:
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
    stage: pretest
    before_script:
-      - pip3 install lbmpy==1.1 jinja2 pytest
+      - pip3 install lbmpy==1.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 429e36ea2c67d660fb5428c5bfa8960a4fce70a1..a0a977795fac8b9f486d7f0cf82e025e2300858f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -603,7 +603,7 @@ endif ()
 ##
 #############################################################################################################################
 if ( WALBERLA_BUILD_WITH_CODEGEN )
-   set(LBMPY_MIN_VERSION 1.1)
+   set(LBMPY_MIN_VERSION 1.2)
    execute_process(COMMAND ${Python_EXECUTABLE} -c "import lbmpy; print(lbmpy.__version__)"
          RESULT_VARIABLE LBMPY_FOUND OUTPUT_VARIABLE LBMPY_VERSION)
     if(NOT LBMPY_FOUND EQUAL 0)
diff --git a/apps/benchmarks/CMakeLists.txt b/apps/benchmarks/CMakeLists.txt
index 4b95602d6daca9adec9a4932e4f12707f1fb0878..f37d24767eb383e55b1ff2764770ee525bb54c68 100644
--- a/apps/benchmarks/CMakeLists.txt
+++ b/apps/benchmarks/CMakeLists.txt
@@ -25,10 +25,12 @@ if ( WALBERLA_BUILD_WITH_PYTHON )
       add_subdirectory( FlowAroundSphereCodeGen )
       add_subdirectory( UniformGridCPU )
       add_subdirectory( PhaseFieldAllenCahn )
+      add_subdirectory( NonUniformGridCPU )
    endif()
 
    if ( WALBERLA_BUILD_WITH_CODEGEN AND WALBERLA_BUILD_WITH_GPU_SUPPORT )
       add_subdirectory( UniformGridGPU )
+      add_subdirectory( NonUniformGridGPU )
    endif()
 
 endif()
diff --git a/apps/benchmarks/FlowAroundSphereCodeGen/CMakeLists.txt b/apps/benchmarks/FlowAroundSphereCodeGen/CMakeLists.txt
index 4010341a3d5a4ba93558eae60e95f2fcd292bcbc..40a17bda2180db64d3e7887ae8d195e8e85d7656 100644
--- a/apps/benchmarks/FlowAroundSphereCodeGen/CMakeLists.txt
+++ b/apps/benchmarks/FlowAroundSphereCodeGen/CMakeLists.txt
@@ -15,6 +15,6 @@ if (WALBERLA_BUILD_WITH_CUDA)
         waLBerla_add_executable( NAME FlowAroundSphereCodeGen FILE FlowAroundSphereCodeGen.cpp
                 DEPENDS blockforest boundary core gpu domain_decomposition field geometry python_coupling timeloop vtk FlowAroundSphereGenerated)
 else ()
-        waLBerla_add_executable( NAME FlowAroundSphereCodeGen FILE FlowAroundSphereCodeGen.cpp
-                DEPENDS blockforest boundary core domain_decomposition field geometry python_coupling timeloop vtk FlowAroundSphereGenerated)
+    waLBerla_add_executable( NAME FlowAroundSphereCodeGen FILE FlowAroundSphereCodeGen.cpp
+            DEPENDS blockforest boundary core domain_decomposition field geometry python_coupling timeloop vtk FlowAroundSphereGenerated)
 endif (WALBERLA_BUILD_WITH_CUDA)
diff --git a/apps/benchmarks/FlowAroundSphereCodeGen/FlowAroundSphereCodeGen.py b/apps/benchmarks/FlowAroundSphereCodeGen/FlowAroundSphereCodeGen.py
index c170a8101422dadce166196b011444f4faf08ccb..7dd9d531b9730e9851e0f8cf53b7b48c4ae930a0 100644
--- a/apps/benchmarks/FlowAroundSphereCodeGen/FlowAroundSphereCodeGen.py
+++ b/apps/benchmarks/FlowAroundSphereCodeGen/FlowAroundSphereCodeGen.py
@@ -47,6 +47,7 @@ with CodeGeneration() as ctx:
                                                    pdfs=pdfs, density=1.0,
                                                    streaming_pattern=streaming_pattern,
                                                    previous_timestep=timesteps[0])
+    setter_assignments = setter_assignments.new_without_unused_subexpressions()
 
     # opt = {'instruction_set': 'sse', 'assume_aligned': True, 'nontemporal': False, 'assume_inner_stride_one': True}
 
diff --git a/apps/benchmarks/FlowAroundSphereCodeGen/FlowAroundSphereCodeGenParameters.py b/apps/benchmarks/FlowAroundSphereCodeGen/FlowAroundSphereCodeGenParameters.py
index 41d38d16218d97a633ccca62c951356b16c2f446..673c10e4d7a2a04117d2cb3a25ab1999d94311bd 100644
--- a/apps/benchmarks/FlowAroundSphereCodeGen/FlowAroundSphereCodeGenParameters.py
+++ b/apps/benchmarks/FlowAroundSphereCodeGen/FlowAroundSphereCodeGenParameters.py
@@ -4,10 +4,10 @@ from lbmpy.relaxationrates import relaxation_rate_from_lattice_viscosity
 
 class Scenario:
     def __init__(self):
-        self.timesteps = 1001
+        self.timesteps = 10
         self.vtkWriteFrequency = 100
 
-        self.cells = (384, 128, 128)
+        self.cells = (64, 32, 32)
         self.blocks = (1, 1, 1)
         self.periodic = (0, 0, 0)
 
diff --git a/apps/benchmarks/NonUniformGridCPU/CMakeLists.txt b/apps/benchmarks/NonUniformGridCPU/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f332e065fed35fa99367127e9d44b211849cc7b3
--- /dev/null
+++ b/apps/benchmarks/NonUniformGridCPU/CMakeLists.txt
@@ -0,0 +1,15 @@
+waLBerla_link_files_to_builddir( "*.prm" )
+waLBerla_link_files_to_builddir( "*.py" )
+waLBerla_link_files_to_builddir( "simulation_setup" )
+
+waLBerla_generate_target_from_python(NAME NonUniformGridCPUGenerated
+        FILE NonUniformGridCPU.py
+        OUT_FILES NonUniformGridCPUStorageSpecification.h NonUniformGridCPUStorageSpecification.cpp
+        NonUniformGridCPUSweepCollection.h NonUniformGridCPUSweepCollection.cpp
+        NoSlip.h NoSlip.cpp
+        UBB.h UBB.cpp
+        NonUniformGridCPUBoundaryCollection.h
+        NonUniformGridCPUInfoHeader.h)
+waLBerla_add_executable( NAME NonUniformGridCPU
+                         FILES NonUniformGridCPU.cpp
+                         DEPENDS blockforest boundary core domain_decomposition field geometry python_coupling timeloop vtk NonUniformGridCPUGenerated )
\ No newline at end of file
diff --git a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3a44867523a9b94f8f2f9b57bc8b5aeb6aac6819
--- /dev/null
+++ b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp
@@ -0,0 +1,311 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NonUniformGridCPU.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/Initialization.h"
+#include "blockforest/SetupBlockForest.h"
+#include "blockforest/loadbalancing/StaticCurve.h"
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/timing/RemainingTimeLogger.h"
+#include "core/timing/TimingPool.h"
+
+#include "field/AddToStorage.h"
+#include "field/FlagField.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "geometry/InitBoundaryHandling.h"
+
+#include "lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h"
+#include "lbm_generated/field/AddToStorage.h"
+#include "lbm_generated/field/PdfField.h"
+#include "lbm_generated/refinement/BasicRecursiveTimeStep.h"
+#include "lbm_generated/evaluation/PerformanceEvaluation.h"
+
+#include "python_coupling/CreateConfig.h"
+#include "python_coupling/PythonCallback.h"
+
+#include "timeloop/SweepTimeloop.h"
+
+#include <cmath>
+
+#include "NonUniformGridCPUInfoHeader.h"
+
+using namespace walberla;
+
+using StorageSpecification_T = lbm::NonUniformGridCPUStorageSpecification;
+using Stencil_T              = StorageSpecification_T::Stencil;
+using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil;
+
+using PdfField_T           = lbm_generated::PdfField< StorageSpecification_T >;
+using FlagField_T          = FlagField< uint8_t >;
+using BoundaryCollection_T = lbm::NonUniformGridCPUBoundaryCollection< FlagField_T >;
+
+using SweepCollection_T = lbm::NonUniformGridCPUSweepCollection;
+
+using blockforest::communication::NonUniformBufferedScheme;
+using RefinementSelectionFunctor = SetupBlockForest::RefinementSelectionFunction;
+
+
+class LDCRefinement
+{
+ private:
+   const uint_t refinementDepth_;
+
+ public:
+   LDCRefinement(const uint_t depth) : refinementDepth_(depth){};
+
+   void operator()(SetupBlockForest& forest)
+   {
+      std::vector< SetupBlock* > blocks;
+      forest.getBlocks(blocks);
+
+      for (auto block : blocks)
+      {
+         if (forest.atDomainYMaxBorder(*block))
+         {
+            if (block->getLevel() < refinementDepth_) { block->setMarker(true); }
+         }
+      }
+   }
+};
+
+class LDC
+{
+ public:
+   LDC(const uint_t depth) : refinementDepth_(depth), noSlipFlagUID_("NoSlip"), ubbFlagUID_("UBB"){};
+
+   Vector3< real_t > acceleration() const { return Vector3< real_t >(0.0); }
+   RefinementSelectionFunctor refinementSelector() { return LDCRefinement(refinementDepth_); }
+
+   void setupBoundaryFlagField(StructuredBlockForest& sbfs, const BlockDataID flagFieldID)
+   {
+      for (auto bIt = sbfs.begin(); bIt != sbfs.end(); ++bIt)
+      {
+         Block& b           = dynamic_cast< Block& >(*bIt);
+         const uint_t level       = b.getLevel();
+         auto flagField     = b.getData< FlagField_T >(flagFieldID);
+         const uint8_t noslipFlag = flagField->registerFlag(noSlipFlagUID_);
+         const uint8_t ubbFlag    = flagField->registerFlag(ubbFlagUID_);
+         for (auto cIt = flagField->beginWithGhostLayerXYZ(2); cIt != flagField->end(); ++cIt)
+         {
+            const Cell localCell = cIt.cell();
+            Cell globalCell(localCell);
+            sbfs.transformBlockLocalToGlobalCell(globalCell, b);
+            if (globalCell.y() >= cell_idx_c(sbfs.getNumberOfYCells(level))) { flagField->addFlag(localCell, ubbFlag); }
+            else if (globalCell.z() < 0 || globalCell.y() < 0 || globalCell.x() < 0 ||
+                     globalCell.x() >= cell_idx_c(sbfs.getNumberOfXCells(level)) || globalCell.z() >= cell_idx_c(sbfs.getNumberOfZCells(level)))
+            {
+               flagField->addFlag(localCell, noslipFlag);
+            }
+         }
+      }
+   }
+ private:
+   const std::string refinementProfile_;
+   const uint_t refinementDepth_;
+
+   const FlagUID noSlipFlagUID_;
+   const FlagUID ubbFlagUID_;
+};
+
+static void createSetupBlockForest(SetupBlockForest& setupBfs, const Config::BlockHandle& domainSetup, LDC& ldcSetup, const uint_t numProcesses=uint_c(MPIManager::instance()->numProcesses()))
+{
+   Vector3< real_t > domainSize = domainSetup.getParameter< Vector3< real_t > >("domainSize");
+   Vector3< uint_t > rootBlocks = domainSetup.getParameter< Vector3< uint_t > >("rootBlocks");
+   Vector3< bool > periodic     = domainSetup.getParameter< Vector3< bool > >("periodic");
+
+   auto refSelection = ldcSetup.refinementSelector();
+   setupBfs.addRefinementSelectionFunction(std::function< void(SetupBlockForest&) >(refSelection));
+   const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
+   setupBfs.addWorkloadMemorySUIDAssignmentFunction( blockforest::uniformWorkloadAndMemoryAssignment );
+   setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
+   setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalance(true), numProcesses);
+}
+
+int main(int argc, char** argv)
+{
+   const mpi::Environment env(argc, argv);
+   mpi::MPIManager::instance()->useWorldComm();
+
+   for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
+   {
+      WALBERLA_MPI_WORLD_BARRIER()
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      ///                                        SETUP AND CONFIGURATION                                             ///
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+      auto config = *cfg;
+      logging::configureLogging(config);
+      auto domainSetup = config->getOneBlock("DomainSetup");
+
+      // Reading parameters
+      auto parameters              = config->getOneBlock("Parameters");
+      const real_t omega       = parameters.getParameter< real_t >("omega", real_c(1.4));
+      const uint_t refinementDepth = parameters.getParameter< uint_t >("refinementDepth", uint_c(1));
+      const uint_t timesteps   = parameters.getParameter< uint_t >("timesteps", uint_c(50));
+      const bool writeSetupForestAndReturn = parameters.getParameter< bool >("writeSetupForestAndReturn", false);
+      const bool benchmarkKernelOnly = parameters.getParameter< bool >("benchmarkKernelOnly", false);
+      const uint_t numProcesses = parameters.getParameter< uint_t >( "numProcesses");
+
+      auto ldc = std::make_shared< LDC >(refinementDepth);
+      SetupBlockForest setupBfs;
+      if (writeSetupForestAndReturn)
+      {
+         WALBERLA_LOG_INFO_ON_ROOT("Creating SetupBlockForest for " << numProcesses << " processes")
+         WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
+         createSetupBlockForest(setupBfs, domainSetup, *ldc, numProcesses);
+
+         WALBERLA_ROOT_SECTION() { setupBfs.writeVTKOutput("SetupBlockForest"); }
+
+         WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
+         for (uint_t level = 0; level <= refinementDepth; level++)
+         {
+            const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
+            WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
+         }
+
+         WALBERLA_LOG_INFO_ON_ROOT("Ending program")
+         return EXIT_SUCCESS;
+      }
+
+      WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
+      createSetupBlockForest(setupBfs, domainSetup, *ldc);
+
+      // Create structured block forest
+      Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
+      WALBERLA_LOG_INFO_ON_ROOT("Creating structured block forest...")
+      auto bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
+      auto blocks =
+         std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
+      blocks->createCellBoundingBoxes();
+
+      WALBERLA_ROOT_SECTION() { vtk::writeDomainDecomposition(blocks, "domainDecomposition", "vtk_out"); }
+
+      WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << blocks->getNumberOfBlocks())
+      for (uint_t level = 0; level <= refinementDepth; level++)
+      {
+         WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << blocks->getNumberOfBlocks(level))
+      }
+
+      // Creating fields
+      const StorageSpecification_T StorageSpec = StorageSpecification_T();
+      const BlockDataID pdfFieldID =
+         lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(2), field::fzyx);
+      const BlockDataID velFieldID =
+         field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(2));
+      const BlockDataID densityFieldID =
+         field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(2));
+      const BlockDataID flagFieldID =
+         field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(3));
+
+      const Cell innerOuterSplit =
+         Cell(parameters.getParameter< Vector3< cell_idx_t > >("innerOuterSplit", Vector3< cell_idx_t >(1, 1, 1)));
+      SweepCollection_T sweepCollection(blocks, pdfFieldID, densityFieldID, velFieldID, omega, innerOuterSplit);
+      for (auto& block : *blocks)
+      {
+         sweepCollection.initialise(&block, 2);
+      }
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      ///                                      LB SWEEPS AND BOUNDARY HANDLING                                       ///
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+      const FlagUID fluidFlagUID("Fluid");
+      ldc->setupBoundaryFlagField(*blocks, flagFieldID);
+      geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID, 2);
+      BoundaryCollection_T boundaryCollection(blocks, flagFieldID, pdfFieldID, fluidFlagUID);
+
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      ///                                           COMMUNICATION SCHEME                                             ///
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+      WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...")
+      auto communication = std::make_shared< NonUniformBufferedScheme< CommunicationStencil_T > >(blocks);
+      auto packInfo      = lbm_generated::setupNonuniformPdfCommunication< PdfField_T >(blocks, pdfFieldID);
+      communication->addPackInfo(packInfo);
+
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      ///                                          TIME STEP DEFINITIONS                                             ///
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+      lbm_generated::BasicRecursiveTimeStep< PdfField_T, SweepCollection_T, BoundaryCollection_T > LBMMeshRefinement(
+         blocks, pdfFieldID, sweepCollection, boundaryCollection, communication, packInfo);
+
+      SweepTimeloop timeLoop(blocks->getBlockStorage(), timesteps);
+
+      if(benchmarkKernelOnly){
+         timeLoop.add() << Sweep(sweepCollection.streamCollide(SweepCollection_T::ALL), "LBM StreamCollide");
+      }
+      else{
+         LBMMeshRefinement.addRefinementToTimeLoop(timeLoop);
+      }
+
+      // VTK
+      const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
+      if (vtkWriteFrequency > 0)
+      {
+         auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
+                                                         "simulation_step", false, true, true, false, 0);
+         auto velWriter = make_shared< field::VTKWriter< VelocityField_T > >(velFieldID, "vel");
+         vtkOutput->addCellDataWriter(velWriter);
+
+         vtkOutput->addBeforeFunction([&]() {
+            for (auto& block : *blocks)
+               sweepCollection.calculateMacroscopicParameters(&block);
+         });
+         timeLoop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
+      }
+
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      ///                                               BENCHMARK                                                    ///
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+      auto remainingTimeLoggerFrequency =
+         parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(-1.0)); // in seconds
+      if (remainingTimeLoggerFrequency > 0)
+      {
+         auto logger = timing::RemainingTimeLogger(timeLoop.getNrOfTimeSteps(), remainingTimeLoggerFrequency);
+         timeLoop.addFuncAfterTimeStep(logger, "remaining time logger");
+      }
+
+      lbm_generated::PerformanceEvaluation<FlagField_T> const performance(blocks, flagFieldID, fluidFlagUID);
+      field::CellCounter< FlagField_T > fluidCells( blocks, flagFieldID, fluidFlagUID );
+      fluidCells();
+
+      WALBERLA_LOG_INFO_ON_ROOT( "Non uniform Grid benchmark with " << fluidCells.numberOfCells() << " fluid cells (in total on all levels)")
+
+      WcTimingPool timeloopTiming;
+      WcTimer simTimer;
+
+      WALBERLA_LOG_INFO_ON_ROOT("Starting benchmark with " << timesteps << " time steps")
+      simTimer.start();
+      timeLoop.run(timeloopTiming);
+      simTimer.end();
+
+      WALBERLA_LOG_INFO_ON_ROOT("Benchmark finished")
+      double time = simTimer.max();
+      WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); }
+      performance.logResultOnRoot(timesteps, time);
+
+      const auto reducedTimeloopTiming = timeloopTiming.getReduced();
+      WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming)
+   }
+   return EXIT_SUCCESS;
+}
\ No newline at end of file
diff --git a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.py b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b350b6c9c48e0418244101cb3de1daec26c34ce
--- /dev/null
+++ b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.py
@@ -0,0 +1,68 @@
+import sympy as sp
+
+import pystencils as ps
+
+from lbmpy.advanced_streaming.utility import get_timesteps
+from lbmpy.boundaries import NoSlip, UBB
+from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule
+from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil
+
+from pystencils_walberla import CodeGeneration, generate_info_header
+from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
+
+omega = sp.symbols("omega")
+omega_free = sp.Symbol("omega_free")
+
+info_header = """
+const char * infoStencil = "{stencil}";
+const char * infoStreamingPattern = "{streaming_pattern}";
+const char * infoCollisionSetup = "{collision_setup}";
+const bool infoCseGlobal = {cse_global};
+const bool infoCsePdfs = {cse_pdfs};
+"""
+
+with CodeGeneration() as ctx:
+    field_type = "float64" if ctx.double_accuracy else "float32"
+
+    streaming_pattern = 'pull'
+    timesteps = get_timesteps(streaming_pattern)
+    stencil = LBStencil(Stencil.D3Q19)
+
+    assert stencil.D == 3, "This application supports only three-dimensional stencils"
+    pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx')
+    density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
+    macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
+
+    lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=omega,
+                           streaming_pattern=streaming_pattern)
+    lbm_opt = LBMOptimisation(cse_global=False, field_layout="fzyx")
+
+    method = create_lb_method(lbm_config=lbm_config)
+    collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
+
+    no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip',
+                                     boundary_object=NoSlip())
+    ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB',
+                                 boundary_object=UBB([0.05, 0, 0], data_type=field_type))
+
+    generate_lbm_package(ctx, name="NonUniformGridCPU",
+                         collision_rule=collision_rule,
+                         lbm_config=lbm_config, lbm_optimisation=lbm_opt,
+                         nonuniform=True, boundaries=[no_slip, ubb],
+                         macroscopic_fields=macroscopic_fields,
+                         target=ps.Target.CPU)
+
+    infoHeaderParams = {
+        'stencil': stencil.name.lower(),
+        'streaming_pattern': streaming_pattern,
+        'collision_setup': lbm_config.method.name.lower(),
+        'cse_global': int(lbm_opt.cse_global),
+        'cse_pdfs': int(lbm_opt.cse_pdfs),
+    }
+
+    field_typedefs = {'VelocityField_T': velocity_field,
+                      'ScalarField_T': density_field}
+
+    generate_info_header(ctx, 'NonUniformGridCPUInfoHeader',
+                         field_typedefs=field_typedefs,
+                         additional_code=info_header.format(**infoHeaderParams))
diff --git a/apps/benchmarks/NonUniformGridCPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/NonUniformGridCPU/simulation_setup/benchmark_configs.py
new file mode 100644
index 0000000000000000000000000000000000000000..1de18d9f0f6ed8ef684eddee74f4712c8f72c852
--- /dev/null
+++ b/apps/benchmarks/NonUniformGridCPU/simulation_setup/benchmark_configs.py
@@ -0,0 +1,57 @@
+import waLBerla as wlb
+
+
+class Scenario:
+    def __init__(self, domain_size=(32, 32, 32), root_blocks=(2, 2, 2),
+                 cells_per_block=(16, 16, 16)):
+
+        self.domain_size = domain_size
+        self.root_blocks = root_blocks
+        self.cells_per_block = cells_per_block
+
+        self.periodic = (0, 0, 0)
+
+        self.config_dict = self.config(print_dict=False)
+
+    @wlb.member_callback
+    def config(self, print_dict=True):
+        from pprint import pformat
+        config_dict = {
+            'DomainSetup': {
+                'domainSize': self.domain_size,
+                'rootBlocks': self.root_blocks,
+                'cellsPerBlock': self.cells_per_block,
+                'periodic': self.periodic
+            },
+            'Parameters': {
+                'omega': 1.95,
+                'timesteps': 101,
+
+                'refinementDepth': 1,
+                'writeSetupForestAndReturn': False,
+                'numProcesses': 1,
+
+                'benchmarkKernelOnly': False,
+
+                'remainingTimeLoggerFrequency': 3,
+
+                'vtkWriteFrequency': 50,
+            }
+        }
+
+        if print_dict:
+            wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
+        return config_dict
+
+
+def validation_run():
+    """Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
+    wlb.log_info_on_root("Validation run")
+    wlb.log_info_on_root("")
+
+    scenarios = wlb.ScenarioManager()
+    scenario = Scenario()
+    scenarios.add(scenario)
+
+
+validation_run()
diff --git a/apps/benchmarks/NonUniformGridGPU/CMakeLists.txt b/apps/benchmarks/NonUniformGridGPU/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6840007e14d5f5af685bb5b262c8bcfd6138d6e
--- /dev/null
+++ b/apps/benchmarks/NonUniformGridGPU/CMakeLists.txt
@@ -0,0 +1,15 @@
+waLBerla_link_files_to_builddir( "*.prm" )
+waLBerla_link_files_to_builddir( "*.py" )
+waLBerla_link_files_to_builddir( "simulation_setup" )
+
+waLBerla_generate_target_from_python(NAME NonUniformGridGPUGenerated
+        FILE NonUniformGridGPU.py
+        OUT_FILES NonUniformGridGPUStorageSpecification.h NonUniformGridGPUStorageSpecification.${CODEGEN_FILE_SUFFIX}
+        NonUniformGridGPUSweepCollection.h NonUniformGridGPUSweepCollection.${CODEGEN_FILE_SUFFIX}
+        NoSlip.h NoSlip.${CODEGEN_FILE_SUFFIX}
+        UBB.h UBB.${CODEGEN_FILE_SUFFIX}
+        NonUniformGridGPUBoundaryCollection.h
+        NonUniformGridGPUInfoHeader.h)
+waLBerla_add_executable( NAME NonUniformGridGPU
+                         FILES NonUniformGridGPU.cpp
+                         DEPENDS blockforest boundary core gpu domain_decomposition field geometry python_coupling timeloop vtk NonUniformGridGPUGenerated )
\ No newline at end of file
diff --git a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fa3905b4236295275d82e2e4aad91be4ddcbb5ba
--- /dev/null
+++ b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp
@@ -0,0 +1,361 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NonUniformGridGPU.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/Initialization.h"
+#include "blockforest/SetupBlockForest.h"
+#include "blockforest/loadbalancing/StaticCurve.h"
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/timing/RemainingTimeLogger.h"
+#include "core/timing/TimingPool.h"
+
+#include "field/AddToStorage.h"
+#include "field/FlagField.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "geometry/InitBoundaryHandling.h"
+
+#include "gpu/AddGPUFieldToStorage.h"
+#include "gpu/DeviceSelectMPI.h"
+#include "gpu/FieldCopy.h"
+#include "gpu/ErrorChecking.h"
+#include "gpu/HostFieldAllocator.h"
+#include "gpu/ParallelStreams.h"
+#include "gpu/communication/NonUniformGPUScheme.h"
+
+#include "lbm_generated/evaluation/PerformanceEvaluation.h"
+#include "lbm_generated/field/PdfField.h"
+#include "lbm_generated/field/AddToStorage.h"
+#include "lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h"
+#include "lbm_generated/gpu/GPUPdfField.h"
+#include "lbm_generated/gpu/AddToStorage.h"
+#include "lbm_generated/gpu/BasicRecursiveTimeStepGPU.h"
+
+#include "python_coupling/CreateConfig.h"
+#include "python_coupling/DictWrapper.h"
+#include "python_coupling/PythonCallback.h"
+
+#include "timeloop/SweepTimeloop.h"
+
+#include <cmath>
+
+#include "NonUniformGridGPUInfoHeader.h"
+using namespace walberla;
+
+using StorageSpecification_T = lbm::NonUniformGridGPUStorageSpecification;
+using Stencil_T = StorageSpecification_T::Stencil;
+using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil;
+
+using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >;
+using GPUPdfField_T = lbm_generated::GPUPdfField< StorageSpecification_T >;
+using FlagField_T = FlagField< uint8_t >;
+using BoundaryCollection_T = lbm::NonUniformGridGPUBoundaryCollection< FlagField_T >;
+
+using SweepCollection_T = lbm::NonUniformGridGPUSweepCollection;
+
+using gpu::communication::NonUniformGPUScheme;
+using RefinementSelectionFunctor = SetupBlockForest::RefinementSelectionFunction;
+
+class LDCRefinement
+{
+ private:
+   const uint_t refinementDepth_;
+
+ public:
+   LDCRefinement(const uint_t depth) : refinementDepth_(depth){};
+
+   void operator()(SetupBlockForest& forest)
+   {
+      std::vector< SetupBlock* > blocks;
+      forest.getBlocks(blocks);
+
+      for (auto block : blocks)
+      {
+         if (forest.atDomainYMaxBorder(*block))
+         {
+            if (block->getLevel() < refinementDepth_) { block->setMarker(true); }
+         }
+      }
+   }
+};
+
+class LDC
+{
+ private:
+   const std::string refinementProfile_;
+   const uint_t refinementDepth_;
+
+   const FlagUID noSlipFlagUID_;
+   const FlagUID ubbFlagUID_;
+
+ public:
+   LDC(const uint_t depth) : refinementDepth_(depth), noSlipFlagUID_("NoSlip"), ubbFlagUID_("UBB"){};
+
+   Vector3< real_t > acceleration() const { return Vector3< real_t >(0.0); }
+   RefinementSelectionFunctor refinementSelector()
+   {
+      return LDCRefinement(refinementDepth_);
+   }
+
+   void setupBoundaryFlagField(StructuredBlockForest& sbfs, const BlockDataID flagFieldID)
+   {
+      for (auto bIt = sbfs.begin(); bIt != sbfs.end(); ++bIt)
+      {
+         Block& b           = dynamic_cast< Block& >(*bIt);
+         const uint_t level       = b.getLevel();
+         auto flagField     = b.getData< FlagField_T >(flagFieldID);
+         const uint8_t noslipFlag = flagField->registerFlag(noSlipFlagUID_);
+         const uint8_t ubbFlag    = flagField->registerFlag(ubbFlagUID_);
+         for (auto cIt = flagField->beginWithGhostLayerXYZ(2); cIt != flagField->end(); ++cIt)
+         {
+            const Cell localCell = cIt.cell();
+            Cell globalCell(localCell);
+            sbfs.transformBlockLocalToGlobalCell(globalCell, b);
+            if (globalCell.y() >= cell_idx_c(sbfs.getNumberOfYCells(level))) { flagField->addFlag(localCell, ubbFlag); }
+            else if (globalCell.z() < 0 || globalCell.y() < 0 || globalCell.x() < 0 ||
+                     globalCell.x() >= cell_idx_c(sbfs.getNumberOfXCells(level)) || globalCell.z() >= cell_idx_c(sbfs.getNumberOfZCells(level)))
+            {
+               flagField->addFlag(localCell, noslipFlag);
+            }
+         }
+      }
+   }
+};
+
+static void createSetupBlockForest(SetupBlockForest& setupBfs, const Config::BlockHandle& domainSetup, LDC& ldcSetup, const uint_t numProcesses=uint_c(MPIManager::instance()->numProcesses()))
+{
+   Vector3< real_t > domainSize = domainSetup.getParameter< Vector3< real_t > >("domainSize");
+   Vector3< uint_t > rootBlocks = domainSetup.getParameter< Vector3< uint_t > >("rootBlocks");
+   Vector3< bool > periodic     = domainSetup.getParameter< Vector3< bool > >("periodic");
+
+   auto refSelection = ldcSetup.refinementSelector();
+   setupBfs.addRefinementSelectionFunction(std::function< void(SetupBlockForest&) >(refSelection));
+   const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
+   setupBfs.addWorkloadMemorySUIDAssignmentFunction( blockforest::uniformWorkloadAndMemoryAssignment );
+   setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
+   setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
+}
+
+int main(int argc, char** argv)
+{
+   const mpi::Environment env(argc, argv);
+   mpi::MPIManager::instance()->useWorldComm();
+   gpu::selectDeviceBasedOnMpiRank();
+
+   for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
+   {
+      WALBERLA_MPI_WORLD_BARRIER()
+
+      WALBERLA_GPU_CHECK(gpuPeekAtLastError())
+
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      ///                                        SETUP AND CONFIGURATION                                             ///
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+      auto config = *cfg;
+      logging::configureLogging(config);
+      auto domainSetup              = config->getOneBlock("DomainSetup");
+      Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
+      // Reading parameters
+      auto parameters          = config->getOneBlock("Parameters");
+      const real_t omega       = parameters.getParameter< real_t >("omega", real_c(1.4));
+      const uint_t refinementDepth = parameters.getParameter< uint_t >("refinementDepth", uint_c(1));
+      const uint_t timesteps   = parameters.getParameter< uint_t >("timesteps", uint_c(50));
+      const bool cudaEnabledMPI = parameters.getParameter< bool >("cudaEnabledMPI", false);
+      const bool writeSetupForestAndReturn = parameters.getParameter< bool >("writeSetupForestAndReturn", false);
+      const bool benchmarkKernelOnly = parameters.getParameter< bool >("benchmarkKernelOnly", false);
+      const uint_t numProcesses = parameters.getParameter< uint_t >( "numProcesses");
+
+      auto ldc = std::make_shared< LDC >(refinementDepth );
+      SetupBlockForest setupBfs;
+      if (writeSetupForestAndReturn)
+      {
+         WALBERLA_LOG_INFO_ON_ROOT("Creating SetupBlockForest for " << numProcesses << " processes")
+         WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
+         createSetupBlockForest(setupBfs, domainSetup, *ldc, numProcesses);
+
+         WALBERLA_ROOT_SECTION() { setupBfs.writeVTKOutput("SetupBlockForest"); }
+
+         WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
+         uint_t totalCellUpdates( 0.0 );
+         for (uint_t level = 0; level <= refinementDepth; level++)
+         {
+            const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
+            const uint_t numberOfCells = numberOfBlocks * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];
+            totalCellUpdates += timesteps * math::uintPow2(level)  * numberOfCells;
+            WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
+         }
+         cudaDeviceProp prop;
+         WALBERLA_GPU_CHECK(gpuGetDeviceProperties(&prop, 0))
+
+         const uint_t totalNumberCells = setupBfs.getNumberOfBlocks() * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];
+
+         const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q;
+         const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE);
+         const uint_t sizePerValue = sizeof(PdfField_T::value_type);
+         const double totalGPUMem = double_c(prop.totalGlobalMem) * 1e-9;
+         const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9;
+
+         WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)")
+         WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB")
+         WALBERLA_LOG_INFO_ON_ROOT( "The total cell updates after " << timesteps << " timesteps (on the coarse level) will be " << totalCellUpdates)
+         WALBERLA_LOG_INFO_ON_ROOT( "Total GPU memory " << totalGPUMem)
+
+         WALBERLA_LOG_INFO_ON_ROOT("Ending program")
+         return EXIT_SUCCESS;
+      }
+
+      WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
+      createSetupBlockForest(setupBfs, domainSetup, *ldc);
+
+      // Create structured block forest
+      WALBERLA_LOG_INFO_ON_ROOT("Creating structured block forest...")
+      auto bfs    = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
+      auto blocks = std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
+      blocks->createCellBoundingBoxes();
+
+      WALBERLA_ROOT_SECTION() { vtk::writeDomainDecomposition(blocks, "domainDecomposition", "vtk_out"); }
+
+      WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << blocks->getNumberOfBlocks())
+      for (uint_t level = 0; level <= refinementDepth; level++)
+      {
+         WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << blocks->getNumberOfBlocks(level))
+      }
+
+      WALBERLA_LOG_INFO_ON_ROOT("Start field allocation")
+      // Creating fields
+      const StorageSpecification_T StorageSpec = StorageSpecification_T();
+      auto allocator = make_shared< gpu::HostFieldAllocator<real_t> >();
+      const BlockDataID pdfFieldCpuID  = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(2), field::fzyx, allocator);
+      const BlockDataID velFieldCpuID = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(2), allocator);
+      const BlockDataID densityFieldCpuID = field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(2), allocator);
+      const BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(3));
+
+      const BlockDataID pdfFieldGpuID = lbm_generated::addGPUPdfFieldToStorage< PdfField_T >(blocks, pdfFieldCpuID, StorageSpec, "pdfs on GPU", true);
+      const BlockDataID velFieldGpuID =
+         gpu::addGPUFieldToStorage< VelocityField_T >(blocks, velFieldCpuID, "velocity on GPU", true);
+      const BlockDataID densityFieldGpuID =
+         gpu::addGPUFieldToStorage< ScalarField_T >(blocks, densityFieldCpuID, "velocity on GPU", true);
+      WALBERLA_LOG_INFO_ON_ROOT("Finished field allocation")
+
+      const Cell innerOuterSplit = Cell(parameters.getParameter< Vector3<cell_idx_t> >("innerOuterSplit", Vector3<cell_idx_t>(1, 1, 1)));
+      Vector3< int32_t > gpuBlockSize = parameters.getParameter< Vector3< int32_t > >("gpuBlockSize", Vector3< int32_t >(256, 1, 1));
+      SweepCollection_T sweepCollection(blocks, pdfFieldGpuID, densityFieldGpuID, velFieldGpuID, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2], omega, innerOuterSplit);
+      for (auto& iBlock : *blocks)
+      {
+         sweepCollection.initialise(&iBlock, 2, nullptr);
+      }
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      ///                                      LB SWEEPS AND BOUNDARY HANDLING                                       ///
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+      const FlagUID fluidFlagUID("Fluid");
+      ldc->setupBoundaryFlagField(*blocks, flagFieldID);
+      geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID, 2);
+      BoundaryCollection_T boundaryCollection(blocks, flagFieldID, pdfFieldGpuID, fluidFlagUID);
+
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      ///                                           COMMUNICATION SCHEME                                             ///
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+      WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...")
+      auto communication = std::make_shared< NonUniformGPUScheme <CommunicationStencil_T>> (blocks, cudaEnabledMPI);
+      auto packInfo = lbm_generated::setupNonuniformGPUPdfCommunication<GPUPdfField_T>(blocks, pdfFieldGpuID);
+      communication->addPackInfo(packInfo);
+
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      ///                                          TIME STEP DEFINITIONS                                             ///
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      int streamHighPriority = 0;
+      int streamLowPriority  = 0;
+      WALBERLA_GPU_CHECK(gpuDeviceGetStreamPriorityRange(&streamLowPriority, &streamHighPriority))
+      sweepCollection.setOuterPriority(streamHighPriority);
+      auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority);
+
+      lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T > LBMMeshRefinement(blocks, pdfFieldGpuID, sweepCollection, boundaryCollection, communication, packInfo);
+      SweepTimeloop timeLoop(blocks->getBlockStorage(), timesteps);
+
+      // LBMMeshRefinement.test(5);
+      // return EXIT_SUCCESS;
+
+      if(benchmarkKernelOnly){
+         timeLoop.add() << Sweep(sweepCollection.streamCollide(SweepCollection_T::ALL), "LBM StreamCollide");
+      }
+      else{
+         LBMMeshRefinement.addRefinementToTimeLoop(timeLoop);
+      }
+
+      // VTK
+      const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
+      if (vtkWriteFrequency > 0)
+      {
+         auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
+                                                         "simulation_step", false, true, true, false, 0);
+         auto velWriter = make_shared< field::VTKWriter< VelocityField_T > >(velFieldCpuID, "vel");
+         vtkOutput->addCellDataWriter(velWriter);
+
+         vtkOutput->addBeforeFunction([&]() {
+            for (auto& block : *blocks)
+               sweepCollection.calculateMacroscopicParameters(&block);
+            gpu::fieldCpy< VelocityField_T, gpu::GPUField< real_t > >(blocks, velFieldCpuID, velFieldGpuID);
+         });
+         timeLoop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
+      }
+
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      ///                                               BENCHMARK                                                    ///
+      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+      auto remainingTimeLoggerFrequency =
+         parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(-1.0)); // in seconds
+      if (remainingTimeLoggerFrequency > 0)
+      {
+         auto logger = timing::RemainingTimeLogger(timeLoop.getNrOfTimeSteps(), remainingTimeLoggerFrequency);
+         timeLoop.addFuncAfterTimeStep(logger, "remaining time logger");
+      }
+
+      lbm_generated::PerformanceEvaluation<FlagField_T> const performance(blocks, flagFieldID, fluidFlagUID);
+      field::CellCounter< FlagField_T > fluidCells( blocks, flagFieldID, fluidFlagUID );
+      fluidCells();
+
+      WALBERLA_LOG_INFO_ON_ROOT( "Non uniform Grid benchmark with " << fluidCells.numberOfCells() << " fluid cells (in total on all levels)")
+
+      WcTimingPool timeloopTiming;
+      WcTimer simTimer;
+
+      WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+      WALBERLA_GPU_CHECK(gpuPeekAtLastError())
+      WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps")
+      simTimer.start();
+      timeLoop.run(timeloopTiming);
+      WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+      simTimer.end();
+
+      WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
+      double time = simTimer.max();
+      WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); }
+      performance.logResultOnRoot(timesteps, time);
+
+      const auto reducedTimeloopTiming = timeloopTiming.getReduced();
+      WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming)
+   }
+   return EXIT_SUCCESS;
+}
\ No newline at end of file
diff --git a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py
new file mode 100644
index 0000000000000000000000000000000000000000..d523b5c0c1b8dfcbfa1cf112c0342edfdee03c7d
--- /dev/null
+++ b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py
@@ -0,0 +1,79 @@
+import sympy as sp
+import numpy as np
+
+import pystencils as ps
+from pystencils.typing import TypedSymbol
+
+from lbmpy.advanced_streaming.utility import get_timesteps
+from lbmpy.boundaries import NoSlip, UBB
+from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule
+from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil
+
+from pystencils_walberla import CodeGeneration, generate_info_header
+from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
+
+omega = sp.symbols("omega")
+omega_free = sp.Symbol("omega_free")
+compile_time_block_size = False
+max_threads = 256
+
+sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32),
+                    TypedSymbol("cudaBlockSize1", np.int32),
+                    TypedSymbol("cudaBlockSize2", np.int32))
+
+gpu_indexing_params = {'block_size': sweep_block_size}
+
+info_header = """
+const char * infoStencil = "{stencil}";
+const char * infoStreamingPattern = "{streaming_pattern}";
+const char * infoCollisionSetup = "{collision_setup}";
+const bool infoCseGlobal = {cse_global};
+const bool infoCsePdfs = {cse_pdfs};
+"""
+ 
+with CodeGeneration() as ctx:
+    field_type = "float64" if ctx.double_accuracy else "float32"
+
+    streaming_pattern = 'pull'
+    timesteps = get_timesteps(streaming_pattern)
+    stencil = LBStencil(Stencil.D3Q19)
+
+    assert stencil.D == 3, "This application supports only three-dimensional stencils"
+    pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx')
+    density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
+    macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
+
+    lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=omega,
+                           streaming_pattern=streaming_pattern)
+    lbm_opt = LBMOptimisation(cse_global=False, field_layout='fzyx')
+
+    method = create_lb_method(lbm_config=lbm_config)
+    collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
+
+    no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip',
+                                     boundary_object=NoSlip())
+    ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB',
+                                 boundary_object=UBB([0.05, 0, 0], data_type=field_type))
+
+    generate_lbm_package(ctx, name="NonUniformGridGPU",
+                         collision_rule=collision_rule,
+                         lbm_config=lbm_config, lbm_optimisation=lbm_opt,
+                         nonuniform=True, boundaries=[no_slip, ubb],
+                         macroscopic_fields=macroscopic_fields,
+                         target=ps.Target.GPU, gpu_indexing_params=gpu_indexing_params,
+                         max_threads=max_threads)
+
+    infoHeaderParams = {
+        'stencil': stencil.name.lower(),
+        'streaming_pattern': streaming_pattern,
+        'collision_setup': lbm_config.method.name.lower(),
+        'cse_global': int(lbm_opt.cse_global),
+        'cse_pdfs': int(lbm_opt.cse_pdfs),
+    }
+
+    field_typedefs = {'VelocityField_T': velocity_field,
+                      'ScalarField_T': density_field}
+
+    generate_info_header(ctx, 'NonUniformGridGPUInfoHeader',
+                         field_typedefs=field_typedefs,
+                         additional_code=info_header.format(**infoHeaderParams))
diff --git a/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py
new file mode 100644
index 0000000000000000000000000000000000000000..d05852fd1934c71ea67d6cce3a8ae3f4cc80e61a
--- /dev/null
+++ b/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py
@@ -0,0 +1,66 @@
+import waLBerla as wlb
+
+
+class Scenario:
+    def __init__(self, domain_size=(64, 64, 64), root_blocks=(2, 2, 2),
+                 cells_per_block=(32, 32, 32), refinement_depth=0):
+
+        self.domain_size = domain_size
+        self.root_blocks = root_blocks
+        self.cells_per_block = cells_per_block
+        self.refinement_depth = refinement_depth
+
+        self.periodic = (0, 0, 0)
+
+        self.config_dict = self.config(print_dict=False)
+
+    @wlb.member_callback
+    def config(self, print_dict=True):
+        from pprint import pformat
+        config_dict = {
+            'DomainSetup': {
+                'domainSize': self.domain_size,
+                'rootBlocks': self.root_blocks,
+                'cellsPerBlock': self.cells_per_block,
+                'periodic': self.periodic
+            },
+            'Parameters': {
+                'omega': 1.95,
+                'timesteps': 1501,
+
+                'refinementDepth': self.refinement_depth,
+                'writeSetupForestAndReturn': False,
+                'numProcesses': 1,
+
+                'cudaEnabledMPI': False,
+                'benchmarkKernelOnly': False,
+
+                'remainingTimeLoggerFrequency': 3,
+
+                'vtkWriteFrequency': 500,
+            }
+        }
+
+        if print_dict and config_dict["Parameters"]["writeSetupForestAndReturn"] is False:
+            wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
+        return config_dict
+
+
+def validation_run():
+    """Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
+    wlb.log_info_on_root("Validation run")
+
+    domain_size = (64, 64, 64)
+    cells_per_block = (32, 32, 32)
+
+    root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
+
+    scenarios = wlb.ScenarioManager()
+    scenario = Scenario(domain_size=domain_size,
+                        root_blocks=root_blocks,
+                        cells_per_block=cells_per_block,
+                        refinement_depth=1)
+    scenarios.add(scenario)
+
+
+validation_run()
diff --git a/apps/benchmarks/UniformGridCPU/CMakeLists.txt b/apps/benchmarks/UniformGridCPU/CMakeLists.txt
index a2f06826e40553f9c157c5b5e5200ba8ed2b26b2..0d159bc542c6ada48999dace8e2b7dce4a085519 100644
--- a/apps/benchmarks/UniformGridCPU/CMakeLists.txt
+++ b/apps/benchmarks/UniformGridCPU/CMakeLists.txt
@@ -15,13 +15,11 @@ foreach(streaming_pattern pull push aa esotwist)
             waLBerla_generate_target_from_python(NAME UniformGridCPUGenerated_${config}
                     FILE UniformGridCPU.py
                     CODEGEN_CFG ${config}
-                    OUT_FILES   UniformGridCPU_LbKernel.cpp UniformGridCPU_LbKernel.h
-                    UniformGridCPU_PackInfoEven.cpp UniformGridCPU_PackInfoEven.h
-                    UniformGridCPU_PackInfoOdd.cpp UniformGridCPU_PackInfoOdd.h
-                    UniformGridCPU_NoSlip.cpp UniformGridCPU_NoSlip.h
-                    UniformGridCPU_UBB.cpp UniformGridCPU_UBB.h
-                    UniformGridCPU_MacroSetter.cpp UniformGridCPU_MacroSetter.h
-                    UniformGridCPU_MacroGetter.cpp UniformGridCPU_MacroGetter.h
+                    OUT_FILES UniformGridCPUStorageSpecification.h UniformGridCPUStorageSpecification.cpp
+                    UniformGridCPUSweepCollection.h UniformGridCPUSweepCollection.cpp
+                    NoSlip.cpp NoSlip.h
+                    UBB.cpp UBB.h
+                    UniformGridCPUBoundaryCollection.h
                     UniformGridCPU_StreamOnlyKernel.cpp UniformGridCPU_StreamOnlyKernel.h
                     UniformGridCPU_InfoHeader.h
                     )
diff --git a/apps/benchmarks/UniformGridCPU/InitShearVelocity.h b/apps/benchmarks/UniformGridCPU/InitShearVelocity.h
index 9a6c7d1db63a5a7ad53376ed33f56851a806dafe..fd13a03b6d89ed30007969ecb2b77f27d015d8a6 100644
--- a/apps/benchmarks/UniformGridCPU/InitShearVelocity.h
+++ b/apps/benchmarks/UniformGridCPU/InitShearVelocity.h
@@ -16,7 +16,7 @@ inline void initShearVelocity(const shared_ptr<StructuredBlockStorage> & blocks,
         WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(velField,
                                                          Cell globalCell;
         blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
-        real_t randomReal = xMagnitude * math::realRandom<real_t>(-fluctuationMagnitude, fluctuationMagnitude);
+        const real_t randomReal = xMagnitude * math::realRandom<real_t>(-fluctuationMagnitude, fluctuationMagnitude);
         velField->get(x, y, z, 1) = real_t(0);
         velField->get(x, y, z, 2) = randomReal;
 
diff --git a/apps/benchmarks/UniformGridCPU/UniformGridCPU.cpp b/apps/benchmarks/UniformGridCPU/UniformGridCPU.cpp
index 3b4a77a570ad86d2adc95789f0a58cda3a3dd4e9..64d94ce3d0dd843b29e693d446485bac73b84119 100644
--- a/apps/benchmarks/UniformGridCPU/UniformGridCPU.cpp
+++ b/apps/benchmarks/UniformGridCPU/UniformGridCPU.cpp
@@ -34,7 +34,10 @@
 
 #include "geometry/InitBoundaryHandling.h"
 
-#include "lbm/communication/CombinedInPlaceCpuPackInfo.h"
+#include "lbm_generated/field/PdfField.h"
+#include "lbm_generated/field/AddToStorage.h"
+#include "lbm_generated/communication/UniformGeneratedPdfPackInfo.h"
+#include "lbm_generated/evaluation/PerformanceEvaluation.h"
 
 #include "python_coupling/CreateConfig.h"
 #include "python_coupling/DictWrapper.h"
@@ -50,21 +53,20 @@
 
 using namespace walberla;
 
-using PackInfoEven_T = lbm::UniformGridCPU_PackInfoEven;
-using PackInfoOdd_T = lbm::UniformGridCPU_PackInfoOdd;
-using LbSweep = lbm::UniformGridCPU_LbKernel;
+using StorageSpecification_T = lbm::UniformGridCPUStorageSpecification;
+using Stencil_T = lbm::UniformGridCPUStorageSpecification::Stencil;
 
+using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >;
 using FlagField_T = FlagField< uint8_t >;
+using BoundaryCollection_T = lbm::UniformGridCPUBoundaryCollection< FlagField_T >;
 
-auto pdfFieldAdder = [](IBlock* const block, StructuredBlockStorage* const storage) {
-   return new PdfField_T(storage->getNumberOfXCells(*block), storage->getNumberOfYCells(*block),
-                         storage->getNumberOfZCells(*block), uint_t(1), field::fzyx,
-                         make_shared< field::AllocateAligned< real_t, 64 > >());
-};
+using SweepCollection_T = lbm::UniformGridCPUSweepCollection;
+
+using blockforest::communication::UniformBufferedScheme;
 
 int main(int argc, char** argv)
 {
-   mpi::Environment const env(argc, argv);
+   const mpi::Environment env(argc, argv);
 
    for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
    {
@@ -74,8 +76,6 @@ int main(int argc, char** argv)
       logging::configureLogging(config);
       auto blocks = blockforest::createUniformBlockGridFromConfig(config);
 
-      Vector3< uint_t > cellsPerBlock =
-         config->getBlock("DomainSetup").getParameter< Vector3< uint_t > >("cellsPerBlock");
       // Reading parameters
       auto parameters          = config->getOneBlock("Parameters");
       const real_t omega       = parameters.getParameter< real_t >("omega", real_c(1.4));
@@ -83,9 +83,12 @@ int main(int argc, char** argv)
       const bool initShearFlow = parameters.getParameter< bool >("initShearFlow", true);
 
       // Creating fields
-      BlockDataID pdfFieldId = blocks->addStructuredBlockData< PdfField_T >(pdfFieldAdder, "pdfs");
-      BlockDataID velFieldId = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx);
-      BlockDataID const densityFieldId = field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx);
+      const StorageSpecification_T StorageSpec = StorageSpecification_T();
+      auto fieldAllocator = make_shared< field::AllocateAligned< real_t, 64 > >();
+      const BlockDataID pdfFieldId  = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, field::fzyx, fieldAllocator);
+      const BlockDataID velFieldId = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx);
+      const BlockDataID densityFieldId = field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx);
+      const BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field");
 
       // Initialize velocity on cpu
       if (initShearFlow)
@@ -94,157 +97,76 @@ int main(int argc, char** argv)
          initShearVelocity(blocks, velFieldId);
       }
 
-      pystencils::UniformGridCPU_MacroSetter setterSweep(densityFieldId, pdfFieldId, velFieldId);
-      pystencils::UniformGridCPU_MacroGetter getterSweep(densityFieldId, pdfFieldId, velFieldId);
+      const Cell innerOuterSplit = Cell(parameters.getParameter< Vector3<cell_idx_t> >("innerOuterSplit", Vector3<cell_idx_t>(1, 1, 1)));
+      SweepCollection_T sweepCollection(blocks, pdfFieldId, densityFieldId, velFieldId, omega, innerOuterSplit);
 
-      // Set up initial PDF values
       for (auto& block : *blocks)
-         setterSweep(&block);
-
-      Vector3< int > innerOuterSplit =
-         parameters.getParameter< Vector3< int > >("innerOuterSplit", Vector3< int >(1, 1, 1));
-
-      for (uint_t i = 0; i < 3; ++i)
       {
-         if (int_c(cellsPerBlock[i]) <= innerOuterSplit[i] * 2)
-         {
-            WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock")
-         }
+         sweepCollection.initialise(&block);
       }
-      Cell const innerOuterSplitCell(innerOuterSplit[0], innerOuterSplit[1], innerOuterSplit[2]);
 
-      LbSweep lbSweep(pdfFieldId, omega, innerOuterSplitCell);
-      pystencils::UniformGridCPU_StreamOnlyKernel StreamOnlyKernel(pdfFieldId);
+      const pystencils::UniformGridCPU_StreamOnlyKernel StreamOnlyKernel(pdfFieldId);
 
       // Boundaries
       const FlagUID fluidFlagUID("Fluid");
-      BlockDataID const flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field");
       auto boundariesConfig   = config->getBlock("Boundaries");
-      bool boundaries         = false;
       if (boundariesConfig)
       {
          WALBERLA_LOG_INFO_ON_ROOT("Setting boundary conditions")
-         boundaries = true;
          geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldID, boundariesConfig);
-         geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID);
       }
-
-      lbm::UniformGridCPU_NoSlip noSlip(blocks, pdfFieldId);
-      noSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldID, FlagUID("NoSlip"), fluidFlagUID);
-
-      lbm::UniformGridCPU_UBB ubb(blocks, pdfFieldId);
-      ubb.fillFromFlagField< FlagField_T >(blocks, flagFieldID, FlagUID("UBB"), fluidFlagUID);
+      geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID);
+      BoundaryCollection_T boundaryCollection(blocks, flagFieldID, pdfFieldId, fluidFlagUID);
 
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       ///                                           COMMUNICATION SCHEME                                             ///
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-      // Initial setup is the post-collision state of an even time step
-      auto tracker = make_shared< lbm::TimestepTracker >(0);
-      auto packInfo =
-         make_shared< lbm::CombinedInPlaceCpuPackInfo< PackInfoEven_T , PackInfoOdd_T > >(tracker, pdfFieldId);
-
-      blockforest::communication::UniformBufferedScheme< Stencil_T > communication(blocks);
+      auto packInfo = std::make_shared<lbm_generated::UniformGeneratedPdfPackInfo< PdfField_T >>(pdfFieldId);
+      UniformBufferedScheme< Stencil_T > communication(blocks);
       communication.addPackInfo(packInfo);
 
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       ///                                          TIME STEP DEFINITIONS                                             ///
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      SweepTimeloop timeLoop(blocks->getBlockStorage(), timesteps);
+      const std::string timeStepStrategy = parameters.getParameter< std::string >("timeStepStrategy", "normal");
 
-      auto boundarySweep = [&](IBlock* block, uint8_t t) {
-         noSlip.run(block, t);
-         ubb.run(block, t);
-      };
-
-      auto boundaryInner = [&](IBlock* block, uint8_t t) {
-         noSlip.inner(block, t);
-         ubb.inner(block, t);
-      };
-
-      auto boundaryOuter = [&](IBlock* block, uint8_t t) {
-         noSlip.outer(block, t);
-         ubb.outer(block, t);
-      };
-
-      auto simpleOverlapTimeStep = [&]() {
-         // Communicate post-collision values of previous timestep...
-         communication.startCommunication();
-         for (auto& block : *blocks)
-         {
-            if (boundaries) boundaryInner(&block, tracker->getCounter());
-            lbSweep.inner(&block, tracker->getCounterPlusOne());
-         }
-         communication.wait();
-         for (auto& block : *blocks)
-         {
-            if (boundaries) boundaryOuter(&block, tracker->getCounter());
-            lbSweep.outer(&block, tracker->getCounterPlusOne());
-         }
-
-         tracker->advance();
-      };
-
-      auto normalTimeStep = [&]() {
-         communication.communicate();
-         for (auto& block : *blocks)
-         {
-            if (boundaries) boundarySweep(&block, tracker->getCounter());
-            lbSweep(&block, tracker->getCounterPlusOne());
-         }
-
-         tracker->advance();
-      };
-
-      // With two-fields patterns, ghost layer cells act as constant stream-in boundaries;
-      // with in-place patterns, ghost layer cells act as wet-node no-slip boundaries.
-      auto kernelOnlyFunc = [&]() {
-         tracker->advance();
-         for (auto& block : *blocks)
-            lbSweep(&block, tracker->getCounter());
-      };
-
-      // Stream only function to test a streaming pattern without executing lbm operations inside
-      auto StreamOnlyFunc = [&]() {
-         for (auto& block : *blocks)
-            StreamOnlyKernel(&block);
-      };
+      if (timeStepStrategy == "noOverlap") {
+         if (boundariesConfig){
+            timeLoop.add() << BeforeFunction(communication, "communication")
+                           << Sweep(boundaryCollection.getSweep(BoundaryCollection_T::ALL), "Boundary Conditions");
+            timeLoop.add() << Sweep(sweepCollection.streamCollide(SweepCollection_T::ALL), "LBM StreamCollide");
+         }else {
+            timeLoop.add() << BeforeFunction(communication, "communication")
+                           << Sweep(sweepCollection.streamCollide(SweepCollection_T::ALL), "LBM StreamCollide");}
+
+      } else if (timeStepStrategy == "simpleOverlap") {
+         if (boundariesConfig){
+            timeLoop.add() << BeforeFunction(communication.getStartCommunicateFunctor(), "Start Communication")
+                           << Sweep(boundaryCollection.getSweep(BoundaryCollection_T::ALL), "Boundary Conditions");
+            timeLoop.add() << Sweep(sweepCollection.streamCollide(SweepCollection_T::INNER), "LBM StreamCollide Inner Frame");
+            timeLoop.add() << BeforeFunction(communication.getWaitFunctor(), "Wait for Communication")
+                           << Sweep(sweepCollection.streamCollide(SweepCollection_T::OUTER), "LBM StreamCollide Outer Frame");
+         }else{
+            timeLoop.add() << BeforeFunction(communication.getStartCommunicateFunctor(), "Start Communication")
+                           << Sweep(sweepCollection.streamCollide(SweepCollection_T::INNER), "LBM StreamCollide Inner Frame");
+            timeLoop.add() << BeforeFunction(communication.getWaitFunctor(), "Wait for Communication")
+                           << Sweep(sweepCollection.streamCollide(SweepCollection_T::OUTER), "LBM StreamCollide Outer Frame");}
+
+      } else if (timeStepStrategy == "kernelOnly") {
+         timeLoop.add() << Sweep(sweepCollection.streamCollide(SweepCollection_T::ALL), "LBM StreamCollide");
+      } else if (timeStepStrategy == "StreamOnly") {
+         timeLoop.add() << Sweep(StreamOnlyKernel, "LBM Stream Only");
+      } else {
+         WALBERLA_ABORT_NO_DEBUG_INFO("Invalid value for 'timeStepStrategy'")
+      }
 
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       ///                                             TIME LOOP SETUP                                                ///
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-      SweepTimeloop timeLoop(blocks->getBlockStorage(), timesteps);
-
-      const std::string timeStepStrategy = parameters.getParameter< std::string >("timeStepStrategy", "normal");
-      std::function< void() > timeStep;
-      if (timeStepStrategy == "noOverlap")
-         timeStep = std::function< void() >(normalTimeStep);
-      else if (timeStepStrategy == "simpleOverlap")
-         timeStep = simpleOverlapTimeStep;
-      else if (timeStepStrategy == "kernelOnly")
-      {
-         WALBERLA_LOG_INFO_ON_ROOT(
-            "Running only compute kernel without boundary - this makes only sense for benchmarking!")
-         // Run initial communication once to provide any missing stream-in populations
-         communication.communicate();
-         timeStep = kernelOnlyFunc;
-      }
-      else if (timeStepStrategy == "StreamOnly")
-      {
-         WALBERLA_LOG_INFO_ON_ROOT(
-            "Running only streaming kernel without LBM - this makes only sense for benchmarking!")
-         // Run initial communication once to provide any missing stream-in populations
-         timeStep = StreamOnlyFunc;
-      }
-      else
-      {
-         WALBERLA_ABORT_NO_DEBUG_INFO("Invalid value for 'timeStepStrategy'. Allowed values are 'noOverlap', "
-                                      "'simpleOverlap', 'kernelOnly'")
-      }
-
-      timeLoop.add() << BeforeFunction(timeStep) << Sweep([](IBlock*) {}, "time step");
-
-      uint_t const vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
+      const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
       if (vtkWriteFrequency > 0)
       {
          auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
@@ -254,7 +176,7 @@ int main(int argc, char** argv)
 
          vtkOutput->addBeforeFunction([&]() {
            for (auto& block : *blocks){
-              getterSweep(&block);}
+              sweepCollection.calculateMacroscopicParameters(&block);}
          });
 
          timeLoop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
@@ -263,46 +185,50 @@ int main(int argc, char** argv)
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       ///                                               BENCHMARK                                                    ///
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      lbm_generated::PerformanceEvaluation<FlagField_T> const performance(blocks, flagFieldID, fluidFlagUID);
 
-      int const warmupSteps     = parameters.getParameter< int >("warmupSteps", 2);
-      int const outerIterations = parameters.getParameter< int >("outerIterations", 1);
-      for (int i = 0; i < warmupSteps; ++i)
+      const uint_t warmupSteps     = parameters.getParameter< uint_t >("warmupSteps", uint_c(2));
+      const uint_t outerIterations = parameters.getParameter< uint_t >("outerIterations", uint_c(1));
+      for (uint_t i = 0; i < warmupSteps; ++i)
          timeLoop.singleStep();
 
-      real_t const remainingTimeLoggerFrequency =
+      auto remainingTimeLoggerFrequency =
          parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(-1.0)); // in seconds
       if (remainingTimeLoggerFrequency > 0)
       {
-         auto logger = timing::RemainingTimeLogger(timeLoop.getNrOfTimeSteps() * uint_c(outerIterations),
+         auto logger = timing::RemainingTimeLogger(timeLoop.getNrOfTimeSteps() * outerIterations,
                                                    remainingTimeLoggerFrequency);
          timeLoop.addFuncAfterTimeStep(logger, "remaining time logger");
       }
 
-      for (int outerIteration = 0; outerIteration < outerIterations; ++outerIteration)
+      for (uint_t outerIteration = 0; outerIteration < outerIterations; ++outerIteration)
       {
          timeLoop.setCurrentTimeStepToZero();
+
+         WcTimingPool timeloopTiming;
          WcTimer simTimer;
+
+         WALBERLA_MPI_WORLD_BARRIER()
          WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps")
+
          simTimer.start();
-         timeLoop.run();
+         timeLoop.run(timeloopTiming);
          simTimer.end();
+
          WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
-         auto time      = real_c(simTimer.last());
-         WALBERLA_MPI_SECTION()
-         {
-            walberla::mpi::reduceInplace(time, walberla::mpi::MAX);
-         }
-         auto nrOfCells = real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
+         double time = simTimer.max();
+         WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); }
+         performance.logResultOnRoot(timesteps, time);
+
+         const auto reducedTimeloopTiming = timeloopTiming.getReduced();
+         WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming)
 
-         auto mlupsPerProcess = nrOfCells * real_c(timesteps) / time * 1e-6;
-         WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process " << mlupsPerProcess)
-         WALBERLA_LOG_RESULT_ON_ROOT("Time per time step " << time / real_c(timesteps))
          WALBERLA_ROOT_SECTION()
          {
             python_coupling::PythonCallback pythonCallbackResults("results_callback");
             if (pythonCallbackResults.isCallable())
             {
-               pythonCallbackResults.data().exposeValue("mlupsPerProcess", mlupsPerProcess);
+               pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time));
                pythonCallbackResults.data().exposeValue("stencil", infoStencil);
                pythonCallbackResults.data().exposeValue("streamingPattern", infoStreamingPattern);
                pythonCallbackResults.data().exposeValue("collisionSetup", infoCollisionSetup);
diff --git a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py
index cba55fac4675c18d8f25f10541de2138002b1208..cd1a36114788a0ad440f89d750abc8af26109eda 100644
--- a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py
+++ b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py
@@ -6,19 +6,17 @@ import pystencils as ps
 from pystencils.simp.subexpression_insertion import insert_zeros, insert_aliases, insert_constants,\
     insert_symbol_times_minus_one
 
-from lbmpy.advanced_streaming import Timestep, is_inplace
+from lbmpy.advanced_streaming import is_inplace
 from lbmpy.advanced_streaming.utility import streaming_patterns
 from lbmpy.boundaries import NoSlip, UBB
 from lbmpy.creationfunctions import LBMConfig, LBMOptimisation, LBStencil, create_lb_collision_rule
 from lbmpy.enums import Method, Stencil
 from lbmpy.fieldaccess import CollideOnlyInplaceAccessor
-from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
+from lbmpy.moments import get_default_moment_set_for_stencil
 from lbmpy.updatekernels import create_stream_only_kernel
 
-from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep,\
-    generate_mpidtype_info_from_kernel, generate_info_header
-
-from lbmpy_walberla import generate_alternating_lbm_sweep, generate_alternating_lbm_boundary, generate_lb_pack_info
+from pystencils_walberla import CodeGeneration, generate_info_header, generate_sweep
+from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
 
 omega = sp.symbols('omega')
 omega_free = sp.Symbol('omega_free')
@@ -121,15 +119,17 @@ with CodeGeneration() as ctx:
 
     options = options_dict[collision_setup]
 
-    q = stencil.Q
-    dim = stencil.D
-    assert dim == 3, "This app supports only three-dimensional stencils"
-    pdfs, pdfs_tmp = ps.fields(f"pdfs({q}), pdfs_tmp({q}): {field_type}[3D]", layout='fzyx')
+    assert stencil.D == 3, "This application supports only three-dimensional stencils"
+    pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx')
     density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
+    macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
 
     lbm_config = LBMConfig(stencil=stencil, field_name=pdfs.name, streaming_pattern=streaming_pattern, **options)
     lbm_opt = LBMOptimisation(cse_global=True, cse_pdfs=False, symbolic_field=pdfs, field_layout='fzyx')
 
+    if lbm_config.method == Method.CENTRAL_MOMENT:
+        lbm_config = replace(lbm_config, nested_moments=get_default_moment_set_for_stencil(stencil))
+
     if not is_inplace(streaming_pattern):
         lbm_opt = replace(lbm_opt, symbolic_temporary_field=pdfs_tmp)
         field_swaps = [(pdfs, pdfs_tmp)]
@@ -153,46 +153,22 @@ with CodeGeneration() as ctx:
         collision_rule = insert_aliases(collision_rule)
         collision_rule = insert_symbol_times_minus_one(collision_rule)
 
-    lb_method = collision_rule.method
-
-    generate_alternating_lbm_sweep(ctx, 'UniformGridCPU_LbKernel', collision_rule, lbm_config=lbm_config,
-                                   lbm_optimisation=lbm_opt, target=ps.Target.CPU,
-                                   inner_outer_split=True, field_swaps=field_swaps,
-                                   cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
-    
-    # getter & setter
-    setter_assignments = macroscopic_values_setter(lb_method,
-                                                   density=density_field.center, velocity=velocity_field.center_vector,
-                                                   pdfs=pdfs,
-                                                   streaming_pattern=streaming_pattern,
-                                                   previous_timestep=Timestep.EVEN)
-    getter_assignments = macroscopic_values_getter(lb_method,
-                                                   density=density_field, velocity=velocity_field,
-                                                   pdfs=pdfs,
-                                                   streaming_pattern=streaming_pattern,
-                                                   previous_timestep=Timestep.EVEN)
-
-    generate_sweep(ctx, 'UniformGridCPU_MacroSetter', setter_assignments, target=ps.Target.CPU, cpu_openmp=openmp)
-    generate_sweep(ctx, 'UniformGridCPU_MacroGetter', getter_assignments, target=ps.Target.CPU, cpu_openmp=openmp)
+    no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip',
+                                     boundary_object=NoSlip())
+    ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB',
+                                 boundary_object=UBB([0.05, 0, 0], data_type=field_type))
+
+    generate_lbm_package(ctx, name="UniformGridCPU",
+                         collision_rule=collision_rule,
+                         lbm_config=lbm_config, lbm_optimisation=lbm_opt,
+                         nonuniform=False, boundaries=[no_slip, ubb],
+                         macroscopic_fields=macroscopic_fields,
+                         cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
 
     # Stream only kernel
     generate_sweep(ctx, 'UniformGridCPU_StreamOnlyKernel', stream_only_kernel, field_swaps=field_swaps_stream_only,
                    target=ps.Target.CPU, cpu_openmp=openmp)
 
-    # Boundaries
-    noslip = NoSlip()
-    ubb = UBB((0.05, 0, 0), data_type=field_type)
-
-    generate_alternating_lbm_boundary(ctx, 'UniformGridCPU_NoSlip', noslip, lb_method, field_name=pdfs.name,
-                                      streaming_pattern=streaming_pattern, target=ps.Target.CPU, cpu_openmp=openmp)
-    generate_alternating_lbm_boundary(ctx, 'UniformGridCPU_UBB', ubb, lb_method, field_name=pdfs.name,
-                                      streaming_pattern=streaming_pattern, target=ps.Target.CPU, cpu_openmp=openmp)
-
-    # communication
-    generate_lb_pack_info(ctx, 'UniformGridCPU_PackInfo', stencil, pdfs,
-                          streaming_pattern=streaming_pattern, target=ps.Target.CPU,
-                          always_generate_separate_classes=True)
-
     infoHeaderParams = {
         'stencil': stencil_str,
         'streaming_pattern': streaming_pattern,
@@ -201,13 +177,10 @@ with CodeGeneration() as ctx:
         'cse_pdfs': int(lbm_opt.cse_pdfs),
     }
 
-    stencil_typedefs = {'Stencil_T': stencil,
-                        'CommunicationStencil_T': stencil}
-    field_typedefs = {'PdfField_T': pdfs,
-                      'VelocityField_T': velocity_field,
+    field_typedefs = {'VelocityField_T': velocity_field,
                       'ScalarField_T': density_field}
 
     # Info header containing correct template definitions for stencil and field
     generate_info_header(ctx, 'UniformGridCPU_InfoHeader',
-                         stencil_typedefs=stencil_typedefs, field_typedefs=field_typedefs,
+                         field_typedefs=field_typedefs,
                          additional_code=info_header.format(**infoHeaderParams))
diff --git a/apps/benchmarks/UniformGridCPU/simulation_setup/PizDaintJobScript.py b/apps/benchmarks/UniformGridCPU/simulation_setup/PizDaintJobScript.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c4aa08ec2c2328be7d102d4f377a2cd754dc8af
--- /dev/null
+++ b/apps/benchmarks/UniformGridCPU/simulation_setup/PizDaintJobScript.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+import os
+from waLBerla.tools.config import block_decomposition
+
+
+job_script_header = """
+#!/bin/bash -l
+#SBATCH --job-name=scaling
+#SBATCH --time=01:00:00
+#SBATCH --nodes={nodes}
+#SBATCH -o out_scaling_{nodes}_%j.txt
+#SBATCH -e err_scaling_{nodes}_%j.txt
+#SBATCH --ntasks-per-core=1
+#SBATCH --cpus-per-task=1
+#SBATCH --partition=normal
+#SBATCH --constraint=gpu
+#SBATCH --account=s1042
+
+source ~/env.sh
+
+export MPICH_RDMA_ENABLED_CUDA=1  # allow GPU-GPU data transfer
+export CRAY_CUDA_MPS=1            # allow GPU sharing
+export MPICH_G2G_PIPELINE=256     # adapt maximum number of concurrent in-flight messages
+
+export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
+export CRAY_CUDA_MPS=1
+
+export MPICH_RANK_REORDER_METHOD=3
+export PMI_MMAP_SYNC_WAIT_TIME=300
+
+cd {folder}
+# grid_order -R -H -c 1,1,8 -g 16,16,8
+
+ulimit -c 0
+"""
+
+job_script_exe_part = """
+
+export WALBERLA_SCENARIO_IDX=0
+while srun -n {nodes} ./{app} {config}
+do
+ ((WALBERLA_SCENARIO_IDX++))
+done
+"""
+
+streaming_patterns = ['pull', 'push', 'aa', 'esotwist']
+stencils = ['d3q27', 'd3q19']
+methods = ['srt', 'mrt', 'cumulant', 'entropic']
+
+all_executables = []
+
+for stencil in stencils:
+    for streaming_pattern in streaming_patterns:
+        for method in methods:
+            all_executables.append(f"UniformGridGPU_{stencil}_{streaming_pattern}_{method}")
+
+all_executables = tuple(all_executables)
+
+
+def generate_jobscripts(exe_names=all_executables):
+    for node_count in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 2400]:
+        folder_name = "scaling_{:04d}".format(node_count)
+        os.makedirs(folder_name, exist_ok=True)
+
+        # run grid_order
+        import subprocess
+        decomposition = block_decomposition(node_count)
+        decomposition_str = ",".join(str(e) for e in decomposition)
+        subprocess.check_call(['grid_order', '-R', '-H', '-g', decomposition_str])
+
+        job_script = job_script_header.format(nodes=node_count, folder=os.path.join(os.getcwd(), folder_name))
+        for exe in exe_names:
+            job_script += job_script_exe_part.format(app="../" + exe, nodes=node_count,
+                                                     config='../communication_compare.py')
+
+        with open(os.path.join(folder_name, 'job.sh'), 'w') as f:
+            f.write(job_script)
+
+
+if __name__ == '__main__':
+    print("Called without waLBerla - generating job scripts for PizDaint")
+    generate_jobscripts()
diff --git a/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs.py
index f432e778bc8e7d5c82120db40469ed7d2f2aa7ed..9acab66da85c8f5477251e66bc7a9ea37ccc2fd7 100755
--- a/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs.py
+++ b/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs.py
@@ -9,13 +9,15 @@ from math import prod
 # Number of time steps run for a workload of 128^3 per process
 # if double as many cells are on the process, half as many time steps are run etc.
 # increase this to get more reliable measurements
-TIME_STEPS_FOR_128_BLOCK = 5
+TIME_STEPS_FOR_128_BLOCK = 10
 DB_FILE = os.environ.get('DB_FILE', "cpu_benchmark.sqlite3")
 
 
 def num_time_steps(block_size, time_steps_for_128_block=TIME_STEPS_FOR_128_BLOCK):
     cells = block_size[0] * block_size[1] * block_size[2]
     time_steps = (128 ** 3 / cells) * time_steps_for_128_block
+    if time_steps < TIME_STEPS_FOR_128_BLOCK:
+        time_steps = 5
     return int(time_steps)
 
 
@@ -39,7 +41,7 @@ class Scenario:
             init_shear_flow = False
             periodic = (0, 0, 0)
 
-        self.blocks = block_decomposition(wlb.mpi.numProcesses())
+        self.blocks = (2, 1, 1) # block_decomposition(wlb.mpi.numProcesses())
 
         self.cells_per_block = cells_per_block
         self.periodic = periodic
@@ -66,6 +68,7 @@ class Scenario:
                 'blocks': self.blocks,
                 'cellsPerBlock': self.cells_per_block,
                 'periodic': self.periodic,
+                'oneBlockPerProcess': False
             },
             'Parameters': {
                 'omega': self.omega,
@@ -176,6 +179,7 @@ def single_node_benchmark():
     for block_size in block_sizes:
         scenario = Scenario(cells_per_block=block_size,
                             time_step_strategy='kernelOnly',
+                            outer_iterations=1,
                             timesteps=num_time_steps(block_size))
         scenarios.add(scenario)
 
@@ -185,26 +189,26 @@ def validation_run():
     wlb.log_info_on_root("Validation run")
     wlb.log_info_on_root("")
 
-    time_step_strategy = 'simpleOverlap'  # 'noOverlap'
+    time_step_strategy = "noOverlap"  # "noOverlap"
 
     scenarios = wlb.ScenarioManager()
     scenario = Scenario(cells_per_block=(64, 64, 64),
                         time_step_strategy=time_step_strategy,
-                        timesteps=101,
+                        timesteps=201,
                         outer_iterations=1,
                         warmup_steps=0,
-                        init_shear_flow=True,
-                        boundary_setup=False,
-                        vtk_write_frequency=100,
+                        init_shear_flow=False,
+                        boundary_setup=True,
+                        vtk_write_frequency=50,
                         remaining_time_logger_frequency=10)
     scenarios.add(scenario)
 
 
 wlb.log_info_on_root(f"Batch run of benchmark scenarios, saving result to {DB_FILE}")
 # Select the benchmark you want to run
-single_node_benchmark()  # benchmarks different CUDA block sizes and domain sizes and measures single GPU
+# single_node_benchmark()  # benchmarks different CUDA block sizes and domain sizes and measures single GPU
 # performance of compute kernel (no communication)
 # overlap_benchmark()  # benchmarks different communication overlap options
 # profiling()  # run only two timesteps on a smaller domain for profiling only
-# validation_run()
+validation_run()
 # scaling_benchmark()
diff --git a/apps/benchmarks/UniformGridGPU/CMakeLists.txt b/apps/benchmarks/UniformGridGPU/CMakeLists.txt
index b1f74c57130935614f1e86c71d31a003afc27b7a..66a5b0fa4f4a3588f36ba4dbd5feb732131f76d0 100644
--- a/apps/benchmarks/UniformGridGPU/CMakeLists.txt
+++ b/apps/benchmarks/UniformGridGPU/CMakeLists.txt
@@ -14,13 +14,12 @@ foreach(streaming_pattern pull push aa esotwist)
             waLBerla_generate_target_from_python(NAME UniformGridGPUGenerated_${config}
                     FILE UniformGridGPU.py
                     CODEGEN_CFG ${config}
-                    OUT_FILES   UniformGridGPU_LbKernel.${CODEGEN_FILE_SUFFIX} UniformGridGPU_LbKernel.h
-                    UniformGridGPU_PackInfoEven.${CODEGEN_FILE_SUFFIX} UniformGridGPU_PackInfoEven.h
-                    UniformGridGPU_PackInfoOdd.${CODEGEN_FILE_SUFFIX} UniformGridGPU_PackInfoOdd.h
-                    UniformGridGPU_NoSlip.${CODEGEN_FILE_SUFFIX} UniformGridGPU_NoSlip.h
-                    UniformGridGPU_UBB.${CODEGEN_FILE_SUFFIX} UniformGridGPU_UBB.h
-                    UniformGridGPU_MacroSetter.${CODEGEN_FILE_SUFFIX} UniformGridGPU_MacroSetter.h
-                    UniformGridGPU_StreamOnlyKernel.${CODEGEN_FILE_SUFFIX} UniformGridGPU_StreamOnlyKernel.h
+                    OUT_FILES UniformGridGPUStorageSpecification.h UniformGridGPUStorageSpecification.${CODEGEN_FILE_SUFFIX}
+                    UniformGridGPUSweepCollection.h UniformGridGPUSweepCollection.${CODEGEN_FILE_SUFFIX}
+                    NoSlip.h NoSlip.${CODEGEN_FILE_SUFFIX}
+                    UBB.h UBB.${CODEGEN_FILE_SUFFIX}
+                    UniformGridGPUBoundaryCollection.h
+                    UniformGridGPU_StreamOnlyKernel.h UniformGridGPU_StreamOnlyKernel.${CODEGEN_FILE_SUFFIX}
                     UniformGridGPU_InfoHeader.h
                     )
 
diff --git a/apps/benchmarks/UniformGridGPU/InitShearVelocity.h b/apps/benchmarks/UniformGridGPU/InitShearVelocity.h
index 9a6c7d1db63a5a7ad53376ed33f56851a806dafe..fd13a03b6d89ed30007969ecb2b77f27d015d8a6 100644
--- a/apps/benchmarks/UniformGridGPU/InitShearVelocity.h
+++ b/apps/benchmarks/UniformGridGPU/InitShearVelocity.h
@@ -16,7 +16,7 @@ inline void initShearVelocity(const shared_ptr<StructuredBlockStorage> & blocks,
         WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(velField,
                                                          Cell globalCell;
         blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
-        real_t randomReal = xMagnitude * math::realRandom<real_t>(-fluctuationMagnitude, fluctuationMagnitude);
+        const real_t randomReal = xMagnitude * math::realRandom<real_t>(-fluctuationMagnitude, fluctuationMagnitude);
         velField->get(x, y, z, 1) = real_t(0);
         velField->get(x, y, z, 2) = randomReal;
 
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp b/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
index 7a3885d3b686d0967f7e7825ea109b8051393309..ee022f457738fb6f8aa71f615441e9279fd25eca 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
@@ -29,12 +29,24 @@
 
 #include "field/AddToStorage.h"
 #include "field/FlagField.h"
-#include "field/communication/PackInfo.h"
 #include "field/vtk/VTKWriter.h"
 
 #include "geometry/InitBoundaryHandling.h"
 
-#include "lbm/inplace_streaming/TimestepTracker.h"
+#include "gpu/AddGPUFieldToStorage.h"
+#include "gpu/DeviceSelectMPI.h"
+#include "gpu/FieldCopy.h"
+#include "gpu/GPUWrapper.h"
+#include "gpu/HostFieldAllocator.h"
+#include "gpu/ParallelStreams.h"
+#include "gpu/communication/UniformGPUScheme.h"
+
+#include "lbm_generated/field/PdfField.h"
+#include "lbm_generated/field/AddToStorage.h"
+#include "lbm_generated/gpu/UniformGeneratedGPUPdfPackInfo.h"
+#include "lbm_generated/gpu/GPUPdfField.h"
+#include "lbm_generated/gpu/AddToStorage.h"
+#include "lbm_generated/evaluation/PerformanceEvaluation.h"
 
 #include "python_coupling/CreateConfig.h"
 #include "python_coupling/DictWrapper.h"
@@ -46,16 +58,20 @@
 
 #include "InitShearVelocity.h"
 #include "UniformGridGPU_InfoHeader.h"
-#include "gpu/AddGPUFieldToStorage.h"
-#include "gpu/DeviceSelectMPI.h"
-#include "gpu/FieldCopy.h"
-#include "gpu/GPUWrapper.h"
-#include "gpu/ParallelStreams.h"
-#include "gpu/communication/UniformGPUScheme.h"
-#include "gpu/lbm/CombinedInPlaceGpuPackInfo.h"
+
 using namespace walberla;
 
+using StorageSpecification_T = lbm::UniformGridGPUStorageSpecification;
+using Stencil_T = lbm::UniformGridGPUStorageSpecification::Stencil;
+
+using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >;
+using GPUPdfField_T = lbm_generated::GPUPdfField< StorageSpecification_T >;
 using FlagField_T = FlagField< uint8_t >;
+using BoundaryCollection_T = lbm::UniformGridGPUBoundaryCollection< FlagField_T >;
+
+using SweepCollection_T = lbm::UniformGridGPUSweepCollection;
+
+using gpu::communication::UniformGPUScheme;
 
 int main(int argc, char** argv)
 {
@@ -76,18 +92,21 @@ int main(int argc, char** argv)
       logging::configureLogging(config);
       auto blocks = blockforest::createUniformBlockGridFromConfig(config);
 
-      Vector3< uint_t > cellsPerBlock =
-         config->getBlock("DomainSetup").getParameter< Vector3< uint_t > >("cellsPerBlock");
       // Reading parameters
       auto parameters          = config->getOneBlock("Parameters");
       const real_t omega       = parameters.getParameter< real_t >("omega", real_c(1.4));
       const uint_t timesteps   = parameters.getParameter< uint_t >("timesteps", uint_c(50));
       const bool initShearFlow = parameters.getParameter< bool >("initShearFlow", true);
+      const bool cudaEnabledMPI = parameters.getParameter< bool >("cudaEnabledMPI", false);
 
       // Creating fields
-      BlockDataID const pdfFieldCpuID =
-         field::addToStorage< PdfField_T >(blocks, "pdfs cpu", real_c(std::nan("")), field::fzyx);
-      BlockDataID velFieldCpuID = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx);
+      const StorageSpecification_T StorageSpec = StorageSpecification_T();
+      const BlockDataID pdfFieldCpuID  = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(1), field::fzyx);
+
+      auto allocator = make_shared< gpu::HostFieldAllocator<real_t> >(); // use pinned memory allocator for faster CPU-GPU memory transfers
+      const BlockDataID velFieldCpuID = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(1), allocator);
+      const BlockDataID densityFieldCpuID = field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(1), allocator);
+      const BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field");
 
       // Initialize velocity on cpu
       if (initShearFlow)
@@ -96,181 +115,92 @@ int main(int argc, char** argv)
          initShearVelocity(blocks, velFieldCpuID);
       }
 
-      BlockDataID const pdfFieldGpuID = gpu::addGPUFieldToStorage< PdfField_T >(blocks, pdfFieldCpuID, "pdfs on GPU", true);
-      // Velocity field is copied to the GPU
-      BlockDataID velFieldGpuID =
+      const BlockDataID pdfFieldGpuID = lbm_generated::addGPUPdfFieldToStorage< PdfField_T >(blocks, pdfFieldCpuID, StorageSpec, "pdfs on GPU", true);
+      const BlockDataID velFieldGpuID =
          gpu::addGPUFieldToStorage< VelocityField_T >(blocks, velFieldCpuID, "velocity on GPU", true);
+      const BlockDataID densityFieldGpuID =
+         gpu::addGPUFieldToStorage< ScalarField_T >(blocks, densityFieldCpuID, "velocity on GPU", true);
 
-      pystencils::UniformGridGPU_MacroSetter setterSweep(pdfFieldGpuID, velFieldGpuID);
-
-      // Set up initial PDF values
+      const Cell innerOuterSplit = Cell(parameters.getParameter< Vector3<cell_idx_t> >("innerOuterSplit", Vector3<cell_idx_t>(1, 1, 1)));
+      Vector3< int32_t > gpuBlockSize = parameters.getParameter< Vector3< int32_t > >("gpuBlockSize", Vector3< int32_t >(256, 1, 1));
+      SweepCollection_T sweepCollection(blocks, pdfFieldGpuID, densityFieldGpuID, velFieldGpuID, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2], omega, innerOuterSplit);
       for (auto& block : *blocks)
-         setterSweep(&block);
-
-      Vector3< int > innerOuterSplit =
-         parameters.getParameter< Vector3< int > >("innerOuterSplit", Vector3< int >(1, 1, 1));
-
-      for (uint_t i = 0; i < 3; ++i)
       {
-         if (int_c(cellsPerBlock[i]) <= innerOuterSplit[i] * 2)
-         { WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock") }
+         sweepCollection.initialise(&block);
       }
 
-      Cell const innerOuterSplitCell(innerOuterSplit[0], innerOuterSplit[1], innerOuterSplit[2]);
-      bool const cudaEnabledMPI = parameters.getParameter< bool >("cudaEnabledMPI", false);
-      Vector3< int32_t > gpuBlockSize =
-         parameters.getParameter< Vector3< int32_t > >("gpuBlockSize", Vector3< int32_t >(256, 1, 1));
-
       int streamHighPriority = 0;
       int streamLowPriority  = 0;
       WALBERLA_GPU_CHECK(gpuDeviceGetStreamPriorityRange(&streamLowPriority, &streamHighPriority))
-
+      sweepCollection.setOuterPriority(streamHighPriority);
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       ///                                      LB SWEEPS AND BOUNDARY HANDLING                                       ///
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-      using LbSweep      = lbm::UniformGridGPU_LbKernel;
-      using PackInfoEven = lbm::UniformGridGPU_PackInfoEven;
-      using PackInfoOdd  = lbm::UniformGridGPU_PackInfoOdd;
-      using gpu::communication::UniformGPUScheme;
-
-      LbSweep lbSweep(pdfFieldGpuID, omega, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2], innerOuterSplitCell);
-      lbSweep.setOuterPriority(streamHighPriority);
-
-      pystencils::UniformGridGPU_StreamOnlyKernel StreamOnlyKernel(pdfFieldGpuID, gpuBlockSize[0], gpuBlockSize[1],
-                                                                   gpuBlockSize[2]);
+      const pystencils::UniformGridGPU_StreamOnlyKernel StreamOnlyKernel(pdfFieldGpuID, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
 
       // Boundaries
       const FlagUID fluidFlagUID("Fluid");
-      BlockDataID const flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field");
       auto boundariesConfig   = config->getBlock("Boundaries");
-      bool boundaries         = false;
       if (boundariesConfig)
       {
-         boundaries = true;
+         WALBERLA_LOG_INFO_ON_ROOT("Setting boundary conditions")
          geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldID, boundariesConfig);
-         geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID);
       }
-
-      lbm::UniformGridGPU_NoSlip noSlip(blocks, pdfFieldGpuID);
-      noSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldID, FlagUID("NoSlip"), fluidFlagUID);
-
-      lbm::UniformGridGPU_UBB ubb(blocks, pdfFieldGpuID);
-      ubb.fillFromFlagField< FlagField_T >(blocks, flagFieldID, FlagUID("UBB"), fluidFlagUID);
-
-      // Initial setup is the post-collision state of an even time step
-      auto tracker = make_shared< lbm::TimestepTracker >(0);
+      geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID);
+      BoundaryCollection_T boundaryCollection(blocks, flagFieldID, pdfFieldGpuID, fluidFlagUID);
 
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       ///                                           COMMUNICATION SCHEME                                             ///
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-      UniformGPUScheme< Stencil_T > comm(blocks, cudaEnabledMPI);
-      auto packInfo =
-         make_shared< lbm::CombinedInPlaceGpuPackInfo< PackInfoEven, PackInfoOdd > >(tracker, pdfFieldGpuID);
-      comm.addPackInfo(packInfo);
+      UniformGPUScheme< Stencil_T > communication(blocks, cudaEnabledMPI);
+      auto packInfo = std::make_shared<lbm_generated::UniformGeneratedGPUPdfPackInfo< GPUPdfField_T >>(pdfFieldGpuID);
+      communication.addPackInfo(packInfo);
 
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       ///                                          TIME STEP DEFINITIONS                                             ///
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      SweepTimeloop timeLoop(blocks->getBlockStorage(), timesteps);
+      const std::string timeStepStrategy = parameters.getParameter< std::string >("timeStepStrategy", "normal");
 
       auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority);
 
-      auto boundarySweep = [&](IBlock* block, uint8_t t, gpuStream_t stream) {
-         noSlip.run(block, t, stream);
-         ubb.run(block, t, stream);
-      };
-
-      auto boundaryInner = [&](IBlock* block, uint8_t t, gpuStream_t stream) {
-         noSlip.inner(block, t, stream);
-         ubb.inner(block, t, stream);
-      };
-
-      auto boundaryOuter = [&](IBlock* block, uint8_t t, gpuStream_t stream) {
-         noSlip.outer(block, t, stream);
-         ubb.outer(block, t, stream);
-      };
-
-      auto simpleOverlapTimeStep = [&]() {
-         // Communicate post-collision values of previous timestep...
-         comm.startCommunication(defaultStream);
-         for (auto& block : *blocks)
-         {
-            if (boundaries) boundaryInner(&block, tracker->getCounter(), defaultStream);
-            lbSweep.inner(&block, tracker->getCounterPlusOne(), defaultStream);
-         }
-         comm.wait(defaultStream);
-         for (auto& block : *blocks)
-         {
-            if (boundaries) boundaryOuter(&block, tracker->getCounter(), defaultStream);
-            lbSweep.outer(&block, tracker->getCounterPlusOne(), defaultStream);
-         }
-
-         tracker->advance();
-      };
-
-      auto normalTimeStep = [&]() {
-         comm.communicate(defaultStream);
-         for (auto& block : *blocks)
-         {
-            if (boundaries) boundarySweep(&block, tracker->getCounter(), defaultStream);
-            lbSweep(&block, tracker->getCounterPlusOne(), defaultStream);
-         }
-
-         tracker->advance();
-      };
-
-      // With two-fields patterns, ghost layer cells act as constant stream-in boundaries;
-      // with in-place patterns, ghost layer cells act as wet-node no-slip boundaries.
-      auto kernelOnlyFunc = [&]() {
-         tracker->advance();
-         for (auto& block : *blocks)
-            lbSweep(&block, tracker->getCounter(), defaultStream);
-      };
-
-      // Stream only function to test a streaming pattern without executing lbm operations inside
-      auto StreamOnlyFunc = [&]() {
-         for (auto& block : *blocks)
-            StreamOnlyKernel(&block, defaultStream);
-      };
+      if (timeStepStrategy == "noOverlap") {
+         if (boundariesConfig){
+            timeLoop.add() << BeforeFunction(communication.getCommunicateFunctor(defaultStream), "communication")
+                           << Sweep(boundaryCollection.getSweep(BoundaryCollection_T::ALL, defaultStream), "Boundary Conditions");
+            timeLoop.add() << Sweep(sweepCollection.streamCollide(SweepCollection_T::ALL, defaultStream), "LBM StreamCollide");
+         }else {
+            timeLoop.add() << BeforeFunction(communication.getCommunicateFunctor(defaultStream), "communication")
+                           << Sweep(sweepCollection.streamCollide(SweepCollection_T::ALL, defaultStream), "LBM StreamCollide");}
+
+      } else if (timeStepStrategy == "simpleOverlap") {
+         if (boundariesConfig){
+            timeLoop.add() << BeforeFunction(communication.getStartCommunicateFunctor(defaultStream), "Start Communication")
+                           << Sweep(boundaryCollection.getSweep(BoundaryCollection_T::ALL, defaultStream), "Boundary Conditions");
+            timeLoop.add() << Sweep(sweepCollection.streamCollide(SweepCollection_T::INNER, defaultStream), "LBM StreamCollide Inner Frame");
+            timeLoop.add() << BeforeFunction(communication.getWaitFunctor(), "Wait for Communication")
+                           << Sweep(sweepCollection.streamCollide(SweepCollection_T::OUTER, defaultStream), "LBM StreamCollide Outer Frame");
+         }else{
+            timeLoop.add() << BeforeFunction(communication.getStartCommunicateFunctor(defaultStream), "Start Communication")
+                           << Sweep(sweepCollection.streamCollide(SweepCollection_T::INNER, defaultStream), "LBM StreamCollide Inner Frame");
+            timeLoop.add() << BeforeFunction(communication.getWaitFunctor(), "Wait for Communication")
+                           << Sweep(sweepCollection.streamCollide(SweepCollection_T::OUTER,defaultStream), "LBM StreamCollide Outer Frame");}
+
+      } else if (timeStepStrategy == "kernelOnly") {
+         timeLoop.add() << Sweep(sweepCollection.streamCollide(SweepCollection_T::ALL, defaultStream), "LBM StreamCollide");
+      } else if (timeStepStrategy == "StreamOnly") {
+         timeLoop.add() << Sweep(StreamOnlyKernel, "LBM Stream Only");
+      } else {
+         WALBERLA_ABORT_NO_DEBUG_INFO("Invalid value for 'timeStepStrategy'")
+      }
 
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       ///                                             TIME LOOP SETUP                                                ///
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-      SweepTimeloop timeLoop(blocks->getBlockStorage(), timesteps);
-
-      const std::string timeStepStrategy = parameters.getParameter< std::string >("timeStepStrategy", "normal");
-      std::function< void() > timeStep;
-      if (timeStepStrategy == "noOverlap")
-         timeStep = std::function< void() >(normalTimeStep);
-      else if (timeStepStrategy == "simpleOverlap")
-         timeStep = simpleOverlapTimeStep;
-      else if (timeStepStrategy == "kernelOnly")
-      {
-         WALBERLA_LOG_INFO_ON_ROOT(
-            "Running only compute kernel without boundary - this makes only sense for benchmarking!")
-         // Run initial communication once to provide any missing stream-in populations
-         comm.communicate();
-         timeStep = kernelOnlyFunc;
-      }
-      else if (timeStepStrategy == "StreamOnly")
-      {
-         WALBERLA_LOG_INFO_ON_ROOT(
-            "Running only streaming kernel without LBM - this makes only sense for benchmarking!")
-         // Run initial communication once to provide any missing stream-in populations
-         timeStep = StreamOnlyFunc;
-      }
-      else
-      {
-         WALBERLA_ABORT_NO_DEBUG_INFO("Invalid value for 'timeStepStrategy'. Allowed values are 'noOverlap', "
-                                      "'simpleOverlap', 'kernelOnly'")
-      }
-
-      timeLoop.add() << BeforeFunction(timeStep) << Sweep([](IBlock*) {}, "time step");
-
       // VTK
-      uint_t const vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
+      const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
       if (vtkWriteFrequency > 0)
       {
          auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
@@ -278,7 +208,10 @@ int main(int argc, char** argv)
          auto velWriter = make_shared< field::VTKWriter< VelocityField_T > >(velFieldCpuID, "vel");
          vtkOutput->addCellDataWriter(velWriter);
 
-         vtkOutput->addBeforeFunction([&]() { gpu::fieldCpy< VelocityField_T, gpu::GPUField< real_t > >(blocks, velFieldCpuID, velFieldGpuID);
+         vtkOutput->addBeforeFunction([&]() {
+            for (auto& block : *blocks)
+               sweepCollection.calculateMacroscopicParameters(&block);
+            gpu::fieldCpy< VelocityField_T, gpu::GPUField< real_t > >(blocks, velFieldCpuID, velFieldGpuID);
          });
          timeLoop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
       }
@@ -287,12 +220,13 @@ int main(int argc, char** argv)
       ///                                               BENCHMARK                                                    ///
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-      int const warmupSteps     = parameters.getParameter< int >("warmupSteps", 2);
-      int const outerIterations = parameters.getParameter< int >("outerIterations", 1);
-      for (int i = 0; i < warmupSteps; ++i)
+      lbm_generated::PerformanceEvaluation<FlagField_T> const performance(blocks, flagFieldID, fluidFlagUID);
+      const uint_t warmupSteps     = parameters.getParameter< uint_t >("warmupSteps", uint_c(2));
+      const uint_t outerIterations = parameters.getParameter< uint_t >("outerIterations", uint_c(1));
+      for (uint_t i = 0; i < warmupSteps; ++i)
          timeLoop.singleStep();
 
-      real_t const remainingTimeLoggerFrequency =
+      auto remainingTimeLoggerFrequency =
          parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(-1.0)); // in seconds
       if (remainingTimeLoggerFrequency > 0)
       {
@@ -301,32 +235,36 @@ int main(int argc, char** argv)
          timeLoop.addFuncAfterTimeStep(logger, "remaining time logger");
       }
 
-      for (int outerIteration = 0; outerIteration < outerIterations; ++outerIteration)
+      for (uint_t outerIteration = 0; outerIteration < outerIterations; ++outerIteration)
       {
          WALBERLA_GPU_CHECK(gpuPeekAtLastError())
 
          timeLoop.setCurrentTimeStepToZero();
+         WcTimingPool const timeloopTiming;
          WcTimer simTimer;
+
          WALBERLA_GPU_CHECK( gpuDeviceSynchronize() )
-         WALBERLA_GPU_CHECK(gpuPeekAtLastError())
+         WALBERLA_GPU_CHECK( gpuPeekAtLastError() )
          WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps")
          simTimer.start();
          timeLoop.run();
          WALBERLA_GPU_CHECK( gpuDeviceSynchronize() )
          simTimer.end();
+
          WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
-         auto time      = real_c(simTimer.last());
-         auto nrOfCells = real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
+         double time = simTimer.max();
+         WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); }
+         performance.logResultOnRoot(timesteps, time);
+
+         const auto reducedTimeloopTiming = timeloopTiming.getReduced();
+         WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming)
 
-         auto mlupsPerProcess = nrOfCells * real_c(timesteps) / time * 1e-6;
-         WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process " << mlupsPerProcess)
-         WALBERLA_LOG_RESULT_ON_ROOT("Time per time step " << time / real_c(timesteps))
          WALBERLA_ROOT_SECTION()
          {
             python_coupling::PythonCallback pythonCallbackResults("results_callback");
             if (pythonCallbackResults.isCallable())
             {
-               pythonCallbackResults.data().exposeValue("mlupsPerProcess", mlupsPerProcess);
+               pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time));
                pythonCallbackResults.data().exposeValue("stencil", infoStencil);
                pythonCallbackResults.data().exposeValue("streamingPattern", infoStreamingPattern);
                pythonCallbackResults.data().exposeValue("collisionSetup", infoCollisionSetup);
@@ -338,6 +276,5 @@ int main(int argc, char** argv)
          }
       }
    }
-
    return EXIT_SUCCESS;
 }
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
index e8fa9906aa1ae005af20f0f77178fb054a528161..3d7579e5bcb3f3713f59a9afd94d7fed790c21e9 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
@@ -8,22 +8,21 @@ from pystencils.typing import TypedSymbol
 from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions
 
 from lbmpy import LBMConfig, LBMOptimisation, LBStencil, Method, Stencil
-from lbmpy.advanced_streaming import Timestep, is_inplace
+from lbmpy.advanced_streaming import is_inplace
 from lbmpy.advanced_streaming.utility import streaming_patterns
 from lbmpy.boundaries import NoSlip, UBB
 from lbmpy.creationfunctions import create_lb_collision_rule
-from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
 from lbmpy.moments import get_default_moment_set_for_stencil
 from lbmpy.updatekernels import create_stream_only_kernel
 from lbmpy.fieldaccess import *
 
 from pystencils_walberla import CodeGeneration, generate_info_header, generate_sweep
-from lbmpy_walberla import generate_alternating_lbm_sweep, generate_lb_pack_info, generate_alternating_lbm_boundary
+from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
 
 omega = sp.symbols("omega")
 omega_free = sp.Symbol("omega_free")
 compile_time_block_size = False
-max_threads = None
+max_threads = 256
 
 if compile_time_block_size:
     sweep_block_size = (128, 1, 1)
@@ -124,11 +123,10 @@ with CodeGeneration() as ctx:
 
     options = options_dict[collision_setup]
 
-    q = stencil.Q
-    dim = stencil.D
-    assert dim == 3, "This app supports only three-dimensional stencils"
-    pdfs, pdfs_tmp, velocity_field = ps.fields(f"pdfs({q}), pdfs_tmp({q}), velocity(3) : {field_type}[3D]",
-                                               layout='fzyx')
+    assert stencil.D == 3, "This application supports only three-dimensional stencils"
+    pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx')
+    density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
+    macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
 
     lbm_config = LBMConfig(stencil=stencil, field_name=pdfs.name, streaming_pattern=streaming_pattern, **options)
     lbm_opt = LBMOptimisation(cse_global=True, cse_pdfs=False, symbolic_field=pdfs, field_layout='fzyx')
@@ -142,12 +140,6 @@ with CodeGeneration() as ctx:
     else:
         field_swaps = []
 
-    vp = [
-        ('int32_t', 'cudaBlockSize0'),
-        ('int32_t', 'cudaBlockSize1'),
-        ('int32_t', 'cudaBlockSize2')
-    ]
-
     # Sweep for Stream only. This is for benchmarking an empty streaming pattern without LBM.
     # is_inplace is set to False to ensure that the streaming is done with src and dst field.
     # If this is not the case the compiler might simplify the streaming in a way that benchmarking makes no sense.
@@ -165,38 +157,25 @@ with CodeGeneration() as ctx:
 
     lb_method = collision_rule.method
 
-    generate_alternating_lbm_sweep(ctx, 'UniformGridGPU_LbKernel', collision_rule, lbm_config=lbm_config,
-                                   lbm_optimisation=lbm_opt, target=ps.Target.GPU,
-                                   gpu_indexing_params=gpu_indexing_params,
-                                   inner_outer_split=True, varying_parameters=vp, field_swaps=field_swaps,
-                                   max_threads=max_threads)
+    no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip',
+                                     boundary_object=NoSlip())
+    ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB',
+                                 boundary_object=UBB([0.05, 0, 0], data_type=field_type))
 
-    # getter & setter
-    setter_assignments = macroscopic_values_setter(lb_method, density=1.0, velocity=velocity_field.center_vector,
-                                                   pdfs=pdfs,
-                                                   streaming_pattern=streaming_pattern,
-                                                   previous_timestep=Timestep.EVEN)
-    generate_sweep(ctx, 'UniformGridGPU_MacroSetter', setter_assignments, target=ps.Target.GPU, max_threads=max_threads)
+    generate_lbm_package(ctx, name="UniformGridGPU",
+                         collision_rule=collision_rule,
+                         lbm_config=lbm_config, lbm_optimisation=lbm_opt,
+                         nonuniform=False, boundaries=[no_slip, ubb],
+                         macroscopic_fields=macroscopic_fields,
+                         target=ps.Target.GPU, gpu_indexing_params=gpu_indexing_params,
+                         max_threads=max_threads)
 
     # Stream only kernel
+    vp = [('int32_t', 'cudaBlockSize0'), ('int32_t', 'cudaBlockSize1'), ('int32_t', 'cudaBlockSize2')]
     generate_sweep(ctx, 'UniformGridGPU_StreamOnlyKernel', stream_only_kernel, field_swaps=field_swaps_stream_only,
                    gpu_indexing_params=gpu_indexing_params, varying_parameters=vp, target=ps.Target.GPU,
                    max_threads=max_threads)
 
-    # Boundaries
-    noslip = NoSlip()
-    ubb = UBB((0.05, 0, 0), data_type=field_type)
-
-    generate_alternating_lbm_boundary(ctx, 'UniformGridGPU_NoSlip', noslip, lb_method, field_name=pdfs.name,
-                                      streaming_pattern=streaming_pattern, target=ps.Target.GPU)
-    generate_alternating_lbm_boundary(ctx, 'UniformGridGPU_UBB', ubb, lb_method, field_name=pdfs.name,
-                                      streaming_pattern=streaming_pattern, target=ps.Target.GPU)
-
-    # communication
-    generate_lb_pack_info(ctx, 'UniformGridGPU_PackInfo', stencil, pdfs,
-                          streaming_pattern=streaming_pattern, target=ps.Target.GPU,
-                          always_generate_separate_classes=True)
-
     infoHeaderParams = {
         'stencil': stencil_str,
         'streaming_pattern': streaming_pattern,
@@ -205,12 +184,10 @@ with CodeGeneration() as ctx:
         'cse_pdfs': int(lbm_opt.cse_pdfs),
     }
 
-    stencil_typedefs = {'Stencil_T': stencil,
-                        'CommunicationStencil_T': stencil}
-    field_typedefs = {'PdfField_T': pdfs,
-                      'VelocityField_T': velocity_field}
+    field_typedefs = {'VelocityField_T': velocity_field,
+                      'ScalarField_T': density_field}
 
     # Info header containing correct template definitions for stencil and field
     generate_info_header(ctx, 'UniformGridGPU_InfoHeader',
-                         stencil_typedefs=stencil_typedefs, field_typedefs=field_typedefs,
+                         field_typedefs=field_typedefs,
                          additional_code=info_header.format(**infoHeaderParams))
diff --git a/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py
index 8de01dacf51ed5e94ac651a5ca61f50988bd3416..531ab22d54ab261ad8f159c91e85c5bfde03360d 100755
--- a/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py
+++ b/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py
@@ -1,12 +1,3 @@
-#!/usr/bin/env python3
-"""
-This is a waLBerla parameter file that tests (almost) all parameter combinations for GPU communication.
-Build waLBerla with -DWALBERLA_BUILD_WITH_PYTHON=1  then run e.g.
- ./UniformGridGPU_d3q27_aa_srt simulation_setup/benchmark_configs.py
-
-Look at the end of the file to select the benchmark to run
-"""
-
 import os
 import waLBerla as wlb
 from waLBerla.tools.config import block_decomposition
@@ -34,6 +25,15 @@ BASE_CONFIG = {
     }
 }
 
+ldc_setup = {'Border': [
+    {'direction': 'N', 'walldistance': -1, 'flag': 'NoSlip'},
+    {'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
+    {'direction': 'N', 'walldistance': -1, 'flag': 'UBB'},
+    {'direction': 'E', 'walldistance': -1, 'flag': 'NoSlip'},
+    {'direction': 'T', 'walldistance': -1, 'flag': 'NoSlip'},
+    {'direction': 'B', 'walldistance': -1, 'flag': 'NoSlip'},
+]}
+
 
 def num_time_steps(block_size, time_steps_for_128_block=200):
     cells = block_size[0] * block_size[1] * block_size[2]
@@ -57,10 +57,16 @@ def domain_block_size_ok(block_size, total_mem, gls=1, q=27, size_per_value=8):
 class Scenario:
     def __init__(self, cells_per_block=(256, 128, 128), periodic=(1, 1, 1), cuda_blocks=(256, 1, 1),
                  timesteps=None, time_step_strategy="normal", omega=1.8, cuda_enabled_mpi=False,
-                 inner_outer_split=(1, 1, 1), warmup_steps=5, outer_iterations=3, init_shear_flow=False,
+                 inner_outer_split=(1, 1, 1), warmup_steps=5, outer_iterations=3,
+                 init_shear_flow=False, boundary_setup=False,
+                 vtk_write_frequency=0, remaining_time_logger_frequency=-1,
                  additional_info=None):
 
-        self.blocks = block_decomposition(wlb.mpi.numProcesses())
+        if boundary_setup:
+            init_shear_flow = False
+            periodic = (0, 0, 0)
+
+        self.blocks = (2, 1, 1) # block_decomposition(wlb.mpi.numProcesses())
 
         self.cells_per_block = cells_per_block
         self.periodic = periodic
@@ -71,11 +77,13 @@ class Scenario:
         self.cuda_enabled_mpi = cuda_enabled_mpi
         self.inner_outer_split = inner_outer_split
         self.init_shear_flow = init_shear_flow
+        self.boundary_setup = boundary_setup
         self.warmup_steps = warmup_steps
         self.outer_iterations = outer_iterations
         self.cuda_blocks = cuda_blocks
 
-        self.vtk_write_frequency = 0
+        self.vtk_write_frequency = vtk_write_frequency
+        self.remaining_time_logger_frequency = remaining_time_logger_frequency
 
         self.config_dict = self.config(print_dict=False)
         self.additional_info = additional_info
@@ -88,6 +96,7 @@ class Scenario:
                 'blocks': self.blocks,
                 'cellsPerBlock': self.cells_per_block,
                 'periodic': self.periodic,
+                'oneBlockPerProcess': False
             },
             'Parameters': {
                 'omega': self.omega,
@@ -99,9 +108,13 @@ class Scenario:
                 'initShearFlow': self.init_shear_flow,
                 'gpuBlockSize': self.cuda_blocks,
                 'innerOuterSplit': self.inner_outer_split,
-                'vtkWriteFrequency': self.vtk_write_frequency
+                'vtkWriteFrequency': self.vtk_write_frequency,
+                'remainingTimeLoggerFrequency': self.remaining_time_logger_frequency
             }
         }
+        if self.boundary_setup:
+            config_dict["Boundaries"] = ldc_setup
+
         if print_dict:
             wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
             if self.additional_info:
@@ -219,90 +232,30 @@ def single_gpu_benchmark():
             scenarios.add(scenario)
 
 
-# -------------------------------------- Optional job script generation for PizDaint ---------------------------------
-
-
-job_script_header = """
-#!/bin/bash -l
-#SBATCH --job-name=scaling
-#SBATCH --time=01:00:00
-#SBATCH --nodes={nodes}
-#SBATCH -o out_scaling_{nodes}_%j.txt
-#SBATCH -e err_scaling_{nodes}_%j.txt
-#SBATCH --ntasks-per-core=1
-#SBATCH --cpus-per-task=1
-#SBATCH --partition=normal
-#SBATCH --constraint=gpu
-#SBATCH --account=s1042
-
-source ~/env.sh
-
-export MPICH_RDMA_ENABLED_CUDA=1  # allow GPU-GPU data transfer
-export CRAY_CUDA_MPS=1            # allow GPU sharing
-export MPICH_G2G_PIPELINE=256     # adapt maximum number of concurrent in-flight messages
-
-export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
-export CRAY_CUDA_MPS=1
-
-export MPICH_RANK_REORDER_METHOD=3
-export PMI_MMAP_SYNC_WAIT_TIME=300
-
-cd {folder}
-# grid_order -R -H -c 1,1,8 -g 16,16,8
-
-ulimit -c 0
-"""
-
-job_script_exe_part = """
-
-export WALBERLA_SCENARIO_IDX=0
-while srun -n {nodes} ./{app} {config}
-do
- ((WALBERLA_SCENARIO_IDX++))
-done
-"""
-
-streaming_patterns = ['pull', 'push', 'aa', 'esotwist']
-stencils = ['d3q27', 'd3q19']
-methods = ['srt', 'mrt', 'cumulant', 'entropic']
-
-all_executables = []
-
-for stencil in stencils:
-    for streaming_pattern in streaming_patterns:
-        for method in methods:
-            all_executables.append(f"UniformGridGPU_{stencil}_{streaming_pattern}_{method}")
-
-all_executables = tuple(all_executables)
-
-
-def generate_jobscripts(exe_names=all_executables):
-    for node_count in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 2400]:
-        folder_name = "scaling_{:04d}".format(node_count)
-        os.makedirs(folder_name, exist_ok=True)
-
-        # run grid_order
-        import subprocess
-        decomposition = block_decomposition(node_count)
-        decomposition_str = ",".join(str(e) for e in decomposition)
-        subprocess.check_call(['grid_order', '-R', '-H', '-g', decomposition_str])
-
-        job_script = job_script_header.format(nodes=node_count, folder=os.path.join(os.getcwd(), folder_name))
-        for exe in exe_names:
-            job_script += job_script_exe_part.format(app="../" + exe, nodes=node_count,
-                                                     config='../communication_compare.py')
-
-        with open(os.path.join(folder_name, 'job.sh'), 'w') as f:
-            f.write(job_script)
+def validation_run():
+    """Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
+    wlb.log_info_on_root("Validation run")
+    wlb.log_info_on_root("")
 
+    time_step_strategy = "noOverlap"  # "noOverlap"
 
-if __name__ == '__main__':
-    print("Called without waLBerla - generating job scripts for PizDaint")
-    generate_jobscripts()
-else:
-    wlb.log_info_on_root(f"Batch run of benchmark scenarios, saving result to {DB_FILE}")
-    # Select the benchmark you want to run
-    single_gpu_benchmark()  # benchmarks different CUDA block sizes and domain sizes and measures single GPU
-    # performance of compute kernel (no communication)
-    # overlap_benchmark()  # benchmarks different communication overlap options
-    # profiling()  # run only two timesteps on a smaller domain for profiling only
+    scenarios = wlb.ScenarioManager()
+    scenario = Scenario(cells_per_block=(64, 64, 64),
+                        time_step_strategy=time_step_strategy,
+                        timesteps=1000,
+                        outer_iterations=1,
+                        warmup_steps=0,
+                        init_shear_flow=False,
+                        boundary_setup=True,
+                        vtk_write_frequency=0,
+                        remaining_time_logger_frequency=10)
+    scenarios.add(scenario)
+
+
+wlb.log_info_on_root(f"Batch run of benchmark scenarios, saving result to {DB_FILE}")
+# Select the benchmark you want to run
+# single_gpu_benchmark()  # benchmarks different CUDA block sizes and domain sizes and measures single GPU
+# performance of compute kernel (no communication)
+# overlap_benchmark()  # benchmarks different communication overlap options
+# profiling()  # run only two timesteps on a smaller domain for profiling only
+validation_run()
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/CMakeLists.txt b/apps/showcases/PhaseFieldAllenCahn/GPU/CMakeLists.txt
index 95b852203ce277cf293011694e9f39e8063417c9..61e4464d18c4ea1a5ee056f26792c60f6af71250 100644
--- a/apps/showcases/PhaseFieldAllenCahn/GPU/CMakeLists.txt
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/CMakeLists.txt
@@ -4,16 +4,16 @@ waLBerla_link_files_to_builddir(*.obj)
 
 waLBerla_generate_target_from_python(NAME PhaseFieldCodeGenGPU
         FILE multiphase_codegen.py
-        OUT_FILES initialize_phase_field_distributions.cu initialize_phase_field_distributions.h
-        initialize_velocity_based_distributions.cu initialize_velocity_based_distributions.h
-        phase_field_LB_step.cu phase_field_LB_step.h
-        phase_field_LB_NoSlip.cu phase_field_LB_NoSlip.h
-        hydro_LB_step.cu hydro_LB_step.h
-        hydro_LB_NoSlip.cu hydro_LB_NoSlip.h
-        PackInfo_phase_field_distributions.cu PackInfo_phase_field_distributions.h
-        PackInfo_phase_field.cu PackInfo_phase_field.h
-        PackInfo_velocity_based_distributions.cu PackInfo_velocity_based_distributions.h
-        ContactAngle.cu ContactAngle.h
+        OUT_FILES initialize_phase_field_distributions.${CODEGEN_FILE_SUFFIX} initialize_phase_field_distributions.h
+        initialize_velocity_based_distributions.${CODEGEN_FILE_SUFFIX} initialize_velocity_based_distributions.h
+        phase_field_LB_step.${CODEGEN_FILE_SUFFIX} phase_field_LB_step.h
+        phase_field_LB_NoSlip.${CODEGEN_FILE_SUFFIX} phase_field_LB_NoSlip.h
+        hydro_LB_step.${CODEGEN_FILE_SUFFIX} hydro_LB_step.h
+        hydro_LB_NoSlip.${CODEGEN_FILE_SUFFIX} hydro_LB_NoSlip.h
+        PackInfo_phase_field_distributions.${CODEGEN_FILE_SUFFIX} PackInfo_phase_field_distributions.h
+        PackInfo_phase_field.${CODEGEN_FILE_SUFFIX} PackInfo_phase_field.h
+        PackInfo_velocity_based_distributions.${CODEGEN_FILE_SUFFIX} PackInfo_velocity_based_distributions.h
+        ContactAngle.${CODEGEN_FILE_SUFFIX} ContactAngle.h
         GenDefines.h)
 
 waLBerla_add_executable(NAME multiphaseGPU
diff --git a/apps/tutorials/gpu/01_GameOfLife_cuda.cpp b/apps/tutorials/gpu/01_GameOfLife_cuda.cpp
index e73ec19962dff4cb89f523e83f6466fb685b1c69..2cfc8b30b94e1bad57508d23f5a672de7ccc8df5 100644
--- a/apps/tutorials/gpu/01_GameOfLife_cuda.cpp
+++ b/apps/tutorials/gpu/01_GameOfLife_cuda.cpp
@@ -30,6 +30,7 @@
 #include "gpu/communication/MemcpyPackInfo.h"
 #include "gpu/communication/UniformGPUScheme.h"
 
+#include "field/AddToStorage.h"
 #include "field/vtk/VTKWriter.h"
 
 #include "geometry/initializer/ScalarFieldFromGrayScaleImage.h"
@@ -48,21 +49,6 @@ using CommScheme = gpu::communication::UniformGPUScheme<stencil::D2Q9 > ;
 using Packing = gpu::communication::MemcpyPackInfo<GPUField> ;
 
 
-ScalarField * createField( IBlock* const block, StructuredBlockStorage* const storage )
-{
-   auto xSize = storage->getNumberOfXCells( *block );
-   auto ySize = storage->getNumberOfYCells( *block );
-   auto zSize = storage->getNumberOfZCells( *block );
-   auto numberOfGhostLayers = uint_c(1);
-   auto initialValue = real_c(0);
-   auto fieldLayout = field::fzyx;
-   return new ScalarField (xSize, ySize, zSize,
-                          numberOfGhostLayers, initialValue, fieldLayout,
-                          make_shared< gpu::HostFieldAllocator<real_t> >()  // allocator for host pinned memory
-                           );
-}
-
-
 int main( int argc, char ** argv )
 {
    walberla::Environment const env( argc, argv );
@@ -78,7 +64,8 @@ int main( int argc, char ** argv )
             false, false, false );                                                   // no periodicity
 
 
-   BlockDataID const cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" );
+   auto hostFieldAllocator = make_shared< gpu::HostFieldAllocator<real_t> >();
+   BlockDataID const cpuFieldID =field::addToStorage< ScalarField >(blocks, "CPU Field", real_c(0.0), field::fzyx, uint_c(1), hostFieldAllocator);
 
    // Initializing the field from an image
    using geometry::initializer::ScalarFieldFromGrayScaleImage;
diff --git a/apps/tutorials/gpu/01_GameOfLife_cuda.dox b/apps/tutorials/gpu/01_GameOfLife_cuda.dox
index 77c83e5f66df3f2cf6ab612099dd62c1bdbfcb69..8794e6c520ffb31d2c3653622cb2f4b4ba4b6eda 100644
--- a/apps/tutorials/gpu/01_GameOfLife_cuda.dox
+++ b/apps/tutorials/gpu/01_GameOfLife_cuda.dox
@@ -33,18 +33,8 @@ Data transfer from pinned memory is faster than from normal memory. The usage of
 mandatory, the data transfer functions work (slightly slower) also with normally allocated fields.
 
 \code
-ScalarField * createField( IBlock* const block, StructuredBlockStorage* const storage )
-{
-   return new ScalarField (
-            storage->getNumberOfXCells( *block ),   // number of cells in x direction per block
-            storage->getNumberOfYCells( *block ),   // number of cells in y direction per block
-            storage->getNumberOfZCells( *block ),   // number of cells in z direction per block
-            1,                                      // one ghost layer
-            real_t(0),                              // initial value
-            field::fzyx,                            // layout
-            make_shared<gpu::HostFieldAllocator<double> >()  // allocator for host pinned memory
-            );
-}
+auto hostFieldAllocator = make_shared< gpu::HostFieldAllocator<real_t> >();
+BlockDataID const cpuFieldID =field::addToStorage< ScalarField >(blocks, "CPU Field", real_c(0.0), field::fzyx, uint_c(1), hostFieldAllocator);
 \endcode
 
 Now we initialize the CPU field just like in the previous tutorial \ref tutorial_basics03 .
diff --git a/python/lbmpy_walberla/__init__.py b/python/lbmpy_walberla/__init__.py
index 15de37c8112e16a21476ba3e388adc843af92956..deb96e02ed3e5e5acfff016b7f185676788b7a76 100644
--- a/python/lbmpy_walberla/__init__.py
+++ b/python/lbmpy_walberla/__init__.py
@@ -1,8 +1,16 @@
 from .boundary import generate_boundary, generate_alternating_lbm_boundary
+from .boundary_collection import lbm_boundary_generator, generate_boundary_collection
 from .walberla_lbm_generation import RefinementScaling, generate_lattice_model
+from .storage_specification import generate_lbm_storage_specification
+from .sweep_collection import generate_lbm_sweep_collection
 from .packinfo import generate_lb_pack_info
+from .packing_kernels import generate_packing_kernels
 from .alternating_sweeps import generate_alternating_lbm_sweep
+from .walberla_lbm_package import generate_lbm_package
 
 __all__ = ['generate_lattice_model', 'generate_alternating_lbm_sweep',
-           'RefinementScaling', 'generate_boundary', 'generate_alternating_lbm_boundary',
-           'generate_lb_pack_info']
+           'generate_lbm_storage_specification', 'generate_lbm_sweep_collection',
+           'RefinementScaling', 'lbm_boundary_generator', 'generate_boundary_collection', 'generate_boundary',
+           'generate_alternating_lbm_boundary',
+           'generate_lb_pack_info', 'generate_packing_kernels',
+           'generate_lbm_package']
diff --git a/python/lbmpy_walberla/alternating_sweeps.py b/python/lbmpy_walberla/alternating_sweeps.py
index dbcc1ab54e618101658a2c2262dac946f9d99805..444a2000adb65c3ad66bfc028f7bedcab4e60896 100644
--- a/python/lbmpy_walberla/alternating_sweeps.py
+++ b/python/lbmpy_walberla/alternating_sweeps.py
@@ -1,14 +1,17 @@
 from dataclasses import replace
+from typing import Set
 
 import numpy as np
 
-from pystencils_walberla.codegen import generate_selective_sweep, config_from_context
-from pystencils_walberla.kernel_selection import (
-    AbstractInterfaceArgumentMapping, AbstractConditionNode, KernelCallNode)
 from pystencils import Target, TypedSymbol
 from lbmpy.creationfunctions import create_lb_ast
 from lbmpy.advanced_streaming import Timestep, is_inplace
 
+from pystencils_walberla.sweep import generate_selective_sweep
+from pystencils_walberla.kernel_selection import (
+    AbstractInterfaceArgumentMapping, AbstractConditionNode, KernelCallNode)
+from pystencils_walberla.utility import config_from_context
+
 
 class EvenIntegerCondition(AbstractConditionNode):
     def __init__(self, parameter_name: str,
@@ -54,7 +57,7 @@ class TimestepTrackerMapping(AbstractInterfaceArgumentMapping):
         return f"{self.tracker_symbol.name}->getCounter()"
 
     @property
-    def headers(self):
+    def headers(self) -> Set:
         return {'"lbm/inplace_streaming/TimestepTracker.h"'}
 
 
diff --git a/python/lbmpy_walberla/boundary_collection.py b/python/lbmpy_walberla/boundary_collection.py
new file mode 100644
index 0000000000000000000000000000000000000000..17bfa245a3212404c35dd06c420fcb19a55c3049
--- /dev/null
+++ b/python/lbmpy_walberla/boundary_collection.py
@@ -0,0 +1,147 @@
+from jinja2 import Environment, PackageLoader, StrictUndefined
+
+import pystencils_walberla.boundary
+from lbmpy.boundaries.boundaryconditions import LbBoundary
+from lbmpy.boundaries.boundaryhandling import create_lattice_boltzmann_boundary_kernel
+from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
+from lbmpy.advanced_streaming import Timestep, is_inplace
+
+from pystencils_walberla.kernel_selection import KernelCallNode
+from lbmpy_walberla.alternating_sweeps import EvenIntegerCondition, OddIntegerCondition, TimestepTrackerMapping
+from lbmpy_walberla.additional_data_handler import default_additional_data_handler
+
+from pystencils import Target
+
+import numpy as np
+
+
+def lbm_boundary_generator(class_name: str, flag_uid: str, boundary_object: LbBoundary, additional_data_handler=None):
+    def generation_function(ctx, lb_method, field_name='pdfs',
+                            streaming_pattern='pull', after_collision=True,
+                            namespace='lbm',
+                            **create_kernel_params):
+        context = __generate_alternating_lbm_boundary(generation_context=ctx,
+                                                      class_name=class_name,
+                                                      boundary_object=boundary_object,
+                                                      lb_method=lb_method,
+                                                      field_name=field_name,
+                                                      streaming_pattern=streaming_pattern,
+                                                      after_collision=after_collision,
+                                                      additional_data_handler=additional_data_handler,
+                                                      namespace=namespace,
+                                                      **create_kernel_params)
+
+        return context
+
+    return {'flag_id': flag_uid, 'generator': generation_function}
+
+
+def generate_boundary_collection(generation_context,
+                                 class_name,
+                                 boundary_generators,
+                                 lb_method,
+                                 field_name='pdfs',
+                                 streaming_pattern='pull',
+                                 prev_timestep=Timestep.BOTH,
+                                 namespace='lbm',
+                                 **create_kernel_params):
+
+    kernel_list = []
+    includes = []
+    boundary_classes = []
+    flag_uids = []
+    object_names = []
+    targets = []
+
+    for boundary_generator in boundary_generators:
+        boundary_functor = boundary_generator['generator']
+        context = boundary_functor(generation_context, lb_method, field_name, streaming_pattern, prev_timestep,
+                                   namespace, **create_kernel_params)
+
+        kernel_list.append(context['kernel'])
+        includes.append(f"\"{context['class_name']}.h\"")
+        boundary_classes.append(f"{context['namespace']}::{context['class_name']}")
+        flag_uids.append(boundary_generator['flag_id'])
+        object_names.append(f"{context['class_name']}Object")
+        targets.append(f"{context['target']}")
+
+    assert len(set(targets)) == 1
+    target = targets[0]
+
+    jinja_context = {
+        'kernel_list': kernel_list,
+        'class_name': class_name,
+        'target': target,
+        'namespace': namespace,
+        'includes': includes,
+        'boundary_classes': boundary_classes,
+        'flag_uids': flag_uids,
+        'object_names': object_names
+    }
+
+    env = Environment(loader=PackageLoader('lbmpy_walberla'), undefined=StrictUndefined)
+    env.globals.update(zip=zip)
+    add_pystencils_filters_to_jinja_env(env)
+
+    header = env.get_template("BoundaryCollection.tmpl.h").render(**jinja_context)
+
+    generation_context.write_file(f"{class_name}.h", header)
+
+
+# Internal
+def __generate_alternating_lbm_boundary(generation_context,
+                                        class_name,
+                                        boundary_object,
+                                        lb_method,
+                                        field_name='pdfs',
+                                        streaming_pattern='pull',
+                                        after_collision=True,
+                                        additional_data_handler=None,
+                                        namespace='lbm',
+                                        **create_kernel_params):
+    if boundary_object.additional_data and additional_data_handler is None:
+        target = create_kernel_params.get('target', Target.CPU)
+        additional_data_handler = default_additional_data_handler(boundary_object, lb_method, field_name, target=target)
+
+    timestep_param_name = 'timestep'
+    timestep_param_dtype = np.uint8
+
+    def boundary_creation_function(field, index_field, stencil, boundary_functor, target=Target.CPU, **kwargs):
+        pargs = (field, index_field, lb_method, boundary_functor)
+        kwargs = {'target': target, **kwargs}
+        ast_even = create_lattice_boltzmann_boundary_kernel(*pargs,
+                                                            streaming_pattern=streaming_pattern,
+                                                            prev_timestep=Timestep.EVEN,
+                                                            **kwargs)
+        ast_even.function_name = 'even'
+        kernel_even = KernelCallNode(ast_even)
+
+        if is_inplace(streaming_pattern):
+            ast_odd = create_lattice_boltzmann_boundary_kernel(*pargs,
+                                                               streaming_pattern=streaming_pattern,
+                                                               prev_timestep=Timestep.ODD,
+                                                               **kwargs)
+            ast_odd.function_name = 'odd'
+            kernel_odd = KernelCallNode(ast_odd)
+        else:
+            kernel_odd = kernel_even
+
+        if after_collision:
+            return EvenIntegerCondition(timestep_param_name, kernel_even, kernel_odd, timestep_param_dtype)
+        else:
+            return OddIntegerCondition(timestep_param_name, kernel_even, kernel_odd, timestep_param_dtype)
+
+    timestep_advancement = {"field_name": field_name, "function": "getTimestep"}
+
+    context = pystencils_walberla.boundary.generate_boundary(generation_context,
+                                                             class_name,
+                                                             boundary_object,
+                                                             field_name=field_name,
+                                                             neighbor_stencil=lb_method.stencil,
+                                                             index_shape=[lb_method.stencil.Q],
+                                                             kernel_creation_function=boundary_creation_function,
+                                                             namespace=namespace,
+                                                             additional_data_handler=additional_data_handler,
+                                                             field_timestep=timestep_advancement,
+                                                             **create_kernel_params)
+    return context
diff --git a/python/lbmpy_walberla/function_generator.py b/python/lbmpy_walberla/function_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e3d552c2daa42bb12223150b65a7de0a59f8b3a
--- /dev/null
+++ b/python/lbmpy_walberla/function_generator.py
@@ -0,0 +1,26 @@
+from pystencils_walberla.kernel_selection import KernelCallNode, KernelFamily, HighLevelInterfaceSpec
+
+
+def kernel_family_function_generator(class_name: str, kernel_family: KernelFamily,
+                                     namespace: str = 'lbm', max_threads: int = None):
+
+    return lambda: __function_generator(class_name, kernel_family, namespace, max_threads)
+
+
+def __function_generator(class_name: str, kernel_family: KernelFamily,
+                         namespace: str = 'lbm', max_threads: int = None):
+
+    representative_field = {p.field_name for p in kernel_family.parameters if p.is_field_parameter}
+    representative_field = sorted(representative_field)[0]
+
+    interface_spec = HighLevelInterfaceSpec(kernel_family.kernel_selection_parameters, ())
+
+    jinja_context = {
+        'kernel': kernel_family,
+        'namespace': namespace,
+        'function_name': class_name,
+        'field': representative_field,
+        'interface_spec': interface_spec,
+        'max_threads': max_threads
+    }
+    return jinja_context
diff --git a/python/lbmpy_walberla/packinfo.py b/python/lbmpy_walberla/packinfo.py
index 796ccfd9832b082610d5dc8b1065ddfa8450ca36..b53ef743f03c0f4c75128f0cce3d3fbaffbff593 100644
--- a/python/lbmpy_walberla/packinfo.py
+++ b/python/lbmpy_walberla/packinfo.py
@@ -6,7 +6,7 @@ from lbmpy.advanced_streaming.communication import _extend_dir
 from pystencils import Assignment, Field, Target
 from pystencils.stencil import inverse_direction
 
-from pystencils_walberla.codegen import comm_directions, generate_pack_info
+from pystencils_walberla.pack_info import _comm_directions, generate_pack_info
 
 
 def generate_lb_pack_info(generation_context,
@@ -65,7 +65,7 @@ def generate_lb_pack_info(generation_context,
             if all(offset == 0 for offset in fa.offsets):
                 continue
             comm_direction = inverse_direction(fa.offsets)
-            for comm_dir in comm_directions(comm_direction):
+            for comm_dir in _comm_directions(comm_direction):
                 common_spec[(comm_dir,)].add(fa.field.center(*fa.index))
 
     full_stencil = LBStencil(Stencil.D3Q27) if stencil.D == 3 else LBStencil(Stencil.D2Q9)
diff --git a/python/lbmpy_walberla/packing_kernels.py b/python/lbmpy_walberla/packing_kernels.py
new file mode 100644
index 0000000000000000000000000000000000000000..985193f1434dd43d4294067a46ea7ba2ac01dbb3
--- /dev/null
+++ b/python/lbmpy_walberla/packing_kernels.py
@@ -0,0 +1,462 @@
+from dataclasses import replace
+from itertools import product
+
+import numpy as np
+import sympy as sp
+
+from jinja2 import Environment, PackageLoader, StrictUndefined
+
+from pystencils import Assignment, CreateKernelConfig, create_kernel, Field, FieldType, fields, Target
+from pystencils.stencil import offset_to_direction_string
+from pystencils.typing import TypedSymbol
+from pystencils.stencil import inverse_direction
+from pystencils.bit_masks import flag_cond
+
+from lbmpy.advanced_streaming import get_accessor, is_inplace, get_timesteps, Timestep
+from lbmpy.advanced_streaming.communication import _extend_dir
+from lbmpy.enums import Stencil
+from lbmpy.stencils import LBStencil
+
+from pystencils_walberla.kernel_selection import KernelFamily, KernelCallNode, SwitchNode
+from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
+from pystencils_walberla.utility import config_from_context
+
+from lbmpy_walberla.alternating_sweeps import EvenIntegerCondition
+from lbmpy_walberla.utility import timestep_suffix
+
+
+def generate_packing_kernels(generation_context, class_name: str, stencil: LBStencil, streaming_pattern: str = 'pull',
+                             namespace='lbm', nonuniform: bool = False,
+                             target: Target = Target.CPU, data_type=None, cpu_openmp: bool = False,
+                             **create_kernel_params):
+
+    config = config_from_context(generation_context, target=target, data_type=data_type, cpu_openmp=cpu_openmp,
+                                 **create_kernel_params)
+
+    # Packing kernels should never be vectorised
+    config = replace(config, cpu_vectorize_info=None)
+
+    default_dtype = config.data_type.default_factory()
+    is_float = True if issubclass(default_dtype.numpy_dtype.type, np.float32) else False
+
+    cg = PackingKernelsCodegen(stencil, streaming_pattern, class_name, config)
+
+    kernels = cg.create_uniform_kernel_families()
+
+    if nonuniform:
+        kernels = cg.create_nonuniform_kernel_families(kernels_dict=kernels)
+
+    jinja_context = {
+        'class_name': class_name,
+        'namespace': namespace,
+        'nonuniform': nonuniform,
+        'target': target.name.lower(),
+        'dtype': "float" if is_float else "double",
+        'is_gpu': target == Target.GPU,
+        'kernels': kernels,
+        'inplace': is_inplace(streaming_pattern),
+        'direction_sizes': cg.get_direction_sizes(),
+        'stencil_size': stencil.Q,
+        'dimension': stencil.D,
+        'src_field': cg.src_field,
+        'dst_field': cg.dst_field
+    }
+
+    if nonuniform:
+        jinja_context['mask_field'] = cg.mask_field
+
+    template_name = "NonuniformPackingKernels" if nonuniform else "PackingKernels"
+
+    env = Environment(loader=PackageLoader('lbmpy_walberla'), undefined=StrictUndefined)
+    add_pystencils_filters_to_jinja_env(env)
+    header = env.get_template(f"{template_name}.tmpl.h").render(**jinja_context)
+    source = env.get_template(f"{template_name}.tmpl.cpp").render(**jinja_context)
+
+    source_extension = "cpp" if target == Target.CPU else "cu"
+    generation_context.write_file(f"{class_name}.h", header)
+    generation_context.write_file(f"{class_name}.{source_extension}", source)
+
+
+#   ------------------------------ INTERNAL ----------------------------------------------------------------------------
+
+class PackingKernelsCodegen:
+
+    def __init__(self, stencil, streaming_pattern, class_name, config: CreateKernelConfig):
+        self.stencil = stencil
+        self.dim = stencil.D
+        self.values_per_cell = stencil.Q
+        self.full_stencil = LBStencil(Stencil.D3Q27) if self.dim == 3 else LBStencil(Stencil.D2Q9)
+        self.streaming_pattern = streaming_pattern
+        self.inplace = is_inplace(streaming_pattern)
+        self.class_name = class_name
+        self.config = config
+        self.data_type = config.data_type['pdfs'].numpy_dtype
+
+        self.src_field, self.dst_field = fields(
+            f'pdfs_src({self.values_per_cell}), pdfs_dst({self.values_per_cell}) :{self.data_type}[{self.dim}D]')
+        self.accessors = [get_accessor(streaming_pattern, t) for t in get_timesteps(streaming_pattern)]
+        self.mask_field = fields(f'mask : uint32 [{self.dim}D]')
+
+    def create_uniform_kernel_families(self, kernels_dict=None):
+        kernels = dict() if kernels_dict is None else kernels_dict
+
+        kernels['packAll'] = self.get_pack_all_kernel_family()
+        kernels['unpackAll'] = self.get_unpack_all_kernel_family()
+        kernels['localCopyAll'] = self.get_local_copy_all_kernel_family()
+
+        kernels['packDirection'] = self.get_pack_direction_kernel_family()
+        kernels['unpackDirection'] = self.get_unpack_direction_kernel_family()
+        kernels['localCopyDirection'] = self.get_local_copy_direction_kernel_family()
+        return kernels
+
+    def create_nonuniform_kernel_families(self, kernels_dict=None):
+        kernels = dict() if kernels_dict is None else kernels_dict
+        kernels['unpackRedistribute'] = self.get_unpack_redistribute_kernel_family()
+        kernels['packPartialCoalescence'] = self.get_pack_partial_coalescence_kernel_family()
+        kernels['zeroCoalescenceRegion'] = self.get_zero_coalescence_region_kernel_family()
+        kernels['unpackCoalescence'] = self.get_unpack_coalescence_kernel_family()
+
+        return kernels
+
+    # --------------------------- Pack / Unpack / LocalCopy All --------------------------------------------------------
+
+    def get_pack_all_ast(self, timestep):
+        config = replace(self.config, ghost_layers=0)
+
+        buffer = self._buffer(self.values_per_cell)
+        src, _ = self._stream_out_accs(timestep)
+        assignments = [Assignment(buffer(i), src[i]) for i in range(self.values_per_cell)]
+        ast = create_kernel(assignments, config=config)
+        ast.function_name = 'pack_ALL' + timestep_suffix(timestep)
+        return ast
+
+    def get_pack_all_kernel_family(self):
+        if not self.inplace:
+            tree = KernelCallNode(self.get_pack_all_ast(Timestep.BOTH))
+        else:
+            even_call = KernelCallNode(self.get_pack_all_ast(Timestep.EVEN))
+            odd_call = KernelCallNode(self.get_pack_all_ast(Timestep.ODD))
+            tree = EvenIntegerCondition('timestep', even_call, odd_call, parameter_dtype=np.uint8)
+        return KernelFamily(tree, self.class_name)
+
+    def get_unpack_all_ast(self, timestep):
+        config = replace(self.config, ghost_layers=0)
+
+        buffer = self._buffer(self.values_per_cell)
+        _, dst = self._stream_out_accs(timestep)
+        assignments = [Assignment(dst[i], buffer(i)) for i in range(self.values_per_cell)]
+        ast = create_kernel(assignments, config=config)
+        ast.function_name = 'unpack_ALL' + timestep_suffix(timestep)
+        return ast
+
+    def get_unpack_all_kernel_family(self):
+        if not self.inplace:
+            tree = KernelCallNode(self.get_unpack_all_ast(Timestep.BOTH))
+        else:
+            even_call = KernelCallNode(self.get_unpack_all_ast(Timestep.EVEN))
+            odd_call = KernelCallNode(self.get_unpack_all_ast(Timestep.ODD))
+            tree = EvenIntegerCondition('timestep', even_call, odd_call, parameter_dtype=np.uint8)
+        return KernelFamily(tree, self.class_name)
+
+    def get_local_copy_all_ast(self, timestep):
+        config = replace(self.config, ghost_layers=0)
+
+        src, dst = self._stream_out_accs(timestep)
+        assignments = [Assignment(dst[i], src[i]) for i in range(self.values_per_cell)]
+        ast = create_kernel(assignments, config=config)
+        ast.function_name = 'localCopy_ALL' + timestep_suffix(timestep)
+        return ast
+
+    def get_local_copy_all_kernel_family(self):
+        if not self.inplace:
+            tree = KernelCallNode(self.get_local_copy_all_ast(Timestep.BOTH))
+        else:
+            even_call = KernelCallNode(self.get_local_copy_all_ast(Timestep.EVEN))
+            odd_call = KernelCallNode(self.get_local_copy_all_ast(Timestep.ODD))
+            tree = EvenIntegerCondition('timestep', even_call, odd_call, parameter_dtype=np.uint8)
+        return KernelFamily(tree, self.class_name)
+
+    # --------------------------- Pack / Unpack / LocalCopy Direction --------------------------------------------------
+
+    def get_pack_direction_ast(self, comm_dir, timestep):
+        config = replace(self.config, ghost_layers=0)
+
+        assert not all(d == 0 for d in comm_dir)
+        dir_string = offset_to_direction_string(comm_dir)
+        streaming_dirs = self.get_streaming_dirs(comm_dir)
+        buffer = self._buffer(len(streaming_dirs))
+        src, _ = self._stream_out_accs(timestep)
+        assignments = []
+        dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
+        if len(dir_indices) == 0:
+            return None
+        for i, d in enumerate(dir_indices):
+            assignments.append(Assignment(buffer(i), src[d]))
+        ast = create_kernel(assignments, config=config)
+        ast.function_name = f'pack_{dir_string}' + timestep_suffix(timestep)
+        return ast
+
+    def get_pack_direction_kernel_family(self):
+        return self._construct_directionwise_kernel_family(self.get_pack_direction_ast)
+
+    def get_unpack_direction_ast(self, comm_dir, timestep):
+        config = replace(self.config, ghost_layers=0)
+
+        assert not all(d == 0 for d in comm_dir)
+        dir_string = offset_to_direction_string(comm_dir)
+        streaming_dirs = self.get_streaming_dirs(inverse_direction(comm_dir))
+        buffer = self._buffer(len(streaming_dirs))
+        _, dst = self._stream_out_accs(timestep)
+        assignments = []
+        dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
+        if len(dir_indices) == 0:
+            return None
+        for i, d in enumerate(dir_indices):
+            assignments.append(Assignment(dst[d], buffer(i)))
+        ast = create_kernel(assignments, config=config)
+        ast.function_name = f'unpack_{dir_string}' + timestep_suffix(timestep)
+        return ast
+
+    def get_unpack_direction_kernel_family(self):
+        return self._construct_directionwise_kernel_family(self.get_unpack_direction_ast)
+
+    def get_local_copy_direction_ast(self, comm_dir, timestep):
+        config = replace(self.config, ghost_layers=0)
+
+        assert not all(d == 0 for d in comm_dir)
+        dir_string = offset_to_direction_string(comm_dir)
+        streaming_dirs = self.get_streaming_dirs(comm_dir)
+        src, dst = self._stream_out_accs(timestep)
+        assignments = []
+        dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
+        if len(dir_indices) == 0:
+            return None
+        for direction in dir_indices:
+            assignments.append(Assignment(dst[direction], src[direction]))
+        ast = create_kernel(assignments, config=config)
+        ast.function_name = f'localCopy_{dir_string}' + timestep_suffix(timestep)
+        return ast
+
+    def get_local_copy_direction_kernel_family(self):
+        return self._construct_directionwise_kernel_family(self.get_local_copy_direction_ast)
+
+    # --------------------------- Pack / Unpack / LocalCopy Coarse to Fine ---------------------------------------------
+
+    def get_unpack_redistribute_ast(self, comm_dir, timestep):
+        assert not all(d == 0 for d in comm_dir)
+        dir_string = offset_to_direction_string(comm_dir)
+        streaming_dirs = self.get_streaming_dirs(inverse_direction(comm_dir))
+        dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
+        if len(dir_indices) == 0:
+            return None
+        buffer = self._buffer(self.values_per_cell)
+        _, dst = self._stream_out_accs(timestep)
+        orthos = self.orthogonal_principals(comm_dir)
+        sub_dirs = self.contained_principals(comm_dir)
+        orthogonal_combinations = self.linear_combinations(orthos)
+        subdir_combinations = self.linear_combinations_nozero(sub_dirs)
+        second_gl_dirs = [o + s for o, s in product(orthogonal_combinations, subdir_combinations)]
+        negative_dir_correction = np.array([(1 if d == -1 else 0) for d in comm_dir])
+        assignments = []
+        for offset in orthogonal_combinations:
+            o = offset + negative_dir_correction
+            for d in range(self.values_per_cell):
+                field_acc = dst[d].get_shifted(*o)
+                assignments.append(Assignment(field_acc, buffer(d)))
+
+        for offset in second_gl_dirs:
+            o = offset + negative_dir_correction
+            for d in dir_indices:
+                field_acc = dst[d].get_shifted(*o)
+                assignments.append(Assignment(field_acc, buffer(d)))
+
+        function_name = f'unpackRedistribute_{dir_string}' + timestep_suffix(timestep)
+        iteration_slice = tuple(slice(None, None, 2) for _ in range(self.dim))
+        config = CreateKernelConfig(function_name=function_name, iteration_slice=iteration_slice,
+                                    data_type=self.data_type, ghost_layers=0, allow_double_writes=True,
+                                    cpu_openmp=self.config.cpu_openmp, target=self.config.target)
+
+        return create_kernel(assignments, config=config)
+
+    def get_unpack_redistribute_kernel_family(self):
+        return self._construct_directionwise_kernel_family(self.get_unpack_redistribute_ast)
+
+    def get_local_copy_redistribute_ast(self, comm_dir, timestep):
+        #   TODO
+        raise NotImplementedError()
+
+    def get_local_copy_redistribute_kernel_family(self):
+        #   TODO
+        raise NotImplementedError()
+
+    # --------------------------- Pack / Unpack / LocalCopy Fine to Coarse ---------------------------------------------
+
+    def get_pack_partial_coalescence_ast(self, comm_dir, timestep):
+        assert not all(d == 0 for d in comm_dir)
+        dir_string = offset_to_direction_string(comm_dir)
+        streaming_dirs = self.get_streaming_dirs(comm_dir)
+        dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
+        if len(dir_indices) == 0:
+            return None
+        buffer = self._buffer(self.values_per_cell)
+        src, _ = self._stream_in_accs(timestep.next())
+        mask = self.mask_field
+
+        offsets = list(product(*((0, 1) for _ in comm_dir)))
+        assignments = []
+        for i, d in enumerate(dir_indices):
+            acc = 0
+            for o in offsets:
+                acc += flag_cond(d, mask[o], src[d].get_shifted(*o))
+            assignments.append(Assignment(buffer(i), acc))
+
+        iteration_slice = tuple(slice(None, None, 2) for _ in range(self.dim))
+        config = replace(self.config, iteration_slice=iteration_slice, ghost_layers=0)
+
+        ast = create_kernel(assignments, config=config)
+        ast.function_name = f'packPartialCoalescence_{dir_string}' + timestep_suffix(timestep)
+        return ast
+
+    def get_pack_partial_coalescence_kernel_family(self):
+        return self._construct_directionwise_kernel_family(self.get_pack_partial_coalescence_ast)
+
+    def get_unpack_coalescence_ast(self, comm_dir, timestep):
+        config = replace(self.config, ghost_layers=0)
+
+        assert not all(d == 0 for d in comm_dir)
+        dir_string = offset_to_direction_string(comm_dir)
+        streaming_dirs = self.get_streaming_dirs(inverse_direction(comm_dir))
+        dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
+        if len(dir_indices) == 0:
+            return None
+        buffer = self._buffer(self.values_per_cell)
+        _, dst = self._stream_in_accs(timestep.next())
+
+        coalescence_factor = sp.Rational(1, 2 ** self.dim)
+
+        assignments = []
+        for i, d in enumerate(dir_indices):
+            assignments.append(Assignment(dst[d], dst[d] + coalescence_factor * buffer(i)))
+
+        ast = create_kernel(assignments, config=config)
+        ast.function_name = f'unpackCoalescence_{dir_string}' + timestep_suffix(timestep)
+        return ast
+
+    def get_unpack_coalescence_kernel_family(self):
+        return self._construct_directionwise_kernel_family(self.get_unpack_coalescence_ast)
+
+    def get_zero_coalescence_region_ast(self, comm_dir, timestep):
+        config = replace(self.config, ghost_layers=0)
+
+        dir_string = offset_to_direction_string(comm_dir)
+        streaming_dirs = self.get_streaming_dirs(inverse_direction(comm_dir))
+        dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
+        if len(dir_indices) == 0:
+            return None
+        _, dst = self._stream_in_accs(timestep.next())
+
+        assignments = []
+        for i, d in enumerate(dir_indices):
+            assignments.append(Assignment(dst[d], 0.0))
+
+        ast = create_kernel(assignments, config=config)
+        ast.function_name = f'zeroCoalescenceRegion_{dir_string}' + timestep_suffix(timestep)
+        return ast
+
+    def get_zero_coalescence_region_kernel_family(self):
+        return self._construct_directionwise_kernel_family(self.get_zero_coalescence_region_ast)
+
+    #   TODO
+    def get_local_copy_partial_coalescence_ast(self, comm_dir, timestep):
+        raise NotImplementedError()
+
+    def get_local_copy_partial_coalescence_kernel_family(self):
+        raise NotImplementedError()
+
+    # ------------------------------------------ Utility ---------------------------------------------------------------
+
+    def get_streaming_dirs(self, comm_dir):
+        if all(d == 0 for d in comm_dir):
+            return set()
+        else:
+            return set(_extend_dir(comm_dir)) & set(self.stencil)
+
+    def get_direction_sizes(self):
+        return [len(self.get_streaming_dirs(d)) for d in self.full_stencil]
+
+    def principal(self, i):
+        e_i = np.zeros(self.dim, dtype=int)
+        e_i[i] = 1
+        return e_i
+
+    def principals(self):
+        """Returns the principal directions for the given dimension"""
+        return tuple(self.principal(i) for i in range(self.dim))
+
+    def orthogonal_principals(self, comm_dir):
+        """Returns the positive principal directions orthogonal to the comm_dir"""
+        return tuple(p for i, p in enumerate(self.principals()) if comm_dir[i] == 0)
+
+    def contained_principals(self, comm_dir):
+        """Returns the (positive or negative) principal directions contained in comm_dir"""
+        vecs = []
+        for i, d in enumerate(comm_dir):
+            if d != 0:
+                vecs.append(d * self.principal(i))
+        return vecs
+
+    def linear_combinations(self, vectors):
+        if not vectors:
+            return [np.zeros(self.dim, dtype=int)]
+        else:
+            rest = self.linear_combinations(vectors[1:])
+            return rest + [vectors[0] + r for r in rest]
+
+    def linear_combinations_nozero(self, vectors):
+        if len(vectors) == 1:
+            return [vectors[0]]
+        else:
+            rest = self.linear_combinations_nozero(vectors[1:])
+            return rest + [vectors[0]] + [vectors[0] + r for r in rest]
+
+    # --------------------------- Private Members ----------------------------------------------------------------------
+
+    def _construct_directionwise_kernel_family(self, create_ast_callback):
+        subtrees = []
+        direction_symbol = TypedSymbol('dir', dtype='stencil::Direction')
+        for t in get_timesteps(self.streaming_pattern):
+            cases_dict = dict()
+            for comm_dir in self.full_stencil:
+                if all(d == 0 for d in comm_dir):
+                    continue
+                dir_string = offset_to_direction_string(comm_dir)
+                ast = create_ast_callback(comm_dir, t)
+                if ast is None:
+                    continue
+                kernel_call = KernelCallNode(ast)
+                cases_dict[f"stencil::{dir_string}"] = kernel_call
+            subtrees.append(SwitchNode(direction_symbol, cases_dict))
+
+        if not self.inplace:
+            tree = subtrees[0]
+        else:
+            tree = EvenIntegerCondition('timestep', subtrees[Timestep.EVEN.idx], subtrees[Timestep.ODD.idx],
+                                        parameter_dtype=np.uint8)
+        return KernelFamily(tree, self.class_name)
+
+    def _stream_out_accs(self, timestep):
+        accessor = self.accessors[timestep.idx]
+        src_stream_out_accs = accessor.write(self.src_field, self.stencil)
+        dst_stream_out_accs = accessor.write(self.dst_field, self.stencil)
+        return src_stream_out_accs, dst_stream_out_accs
+
+    def _stream_in_accs(self, timestep):
+        accessor = self.accessors[timestep.idx]
+        src_stream_in_accs = accessor.read(self.src_field, self.stencil)
+        dst_stream_in_accs = accessor.read(self.dst_field, self.stencil)
+        return src_stream_in_accs, dst_stream_in_accs
+
+    def _buffer(self, size):
+        return Field.create_generic('buffer', spatial_dimensions=1, field_type=FieldType.BUFFER,
+                                    dtype=self.data_type,
+                                    index_shape=(size,))
diff --git a/python/lbmpy_walberla/storage_specification.py b/python/lbmpy_walberla/storage_specification.py
new file mode 100644
index 0000000000000000000000000000000000000000..c113604381be85e3895e3744285a528d4786f84e
--- /dev/null
+++ b/python/lbmpy_walberla/storage_specification.py
@@ -0,0 +1,88 @@
+# import warnings
+
+from dataclasses import replace
+from jinja2 import Environment, PackageLoader, StrictUndefined
+import numpy as np
+
+from pystencils import Target
+
+from lbmpy import LBMConfig
+from lbmpy.advanced_streaming import is_inplace
+from lbmpy.methods import AbstractLbMethod
+
+from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
+from pystencils_walberla.utility import config_from_context
+from lbmpy_walberla.packing_kernels import PackingKernelsCodegen
+
+
+def generate_lbm_storage_specification(generation_context, class_name: str,
+                                       method: AbstractLbMethod, lbm_config: LBMConfig, nonuniform: bool = False,
+                                       target: Target = Target.CPU, data_type=None, cpu_openmp: bool = False,
+                                       **create_kernel_params):
+    namespace = "lbm"
+    stencil = method.stencil
+    stencil_name = stencil.name
+    streaming_pattern = lbm_config.streaming_pattern
+
+    config = config_from_context(generation_context, target=target, data_type=data_type, cpu_openmp=cpu_openmp,
+                                 **create_kernel_params)
+
+    # Packing kernels should never be vectorised
+    config = replace(config, cpu_vectorize_info=None)
+
+    default_dtype = config.data_type.default_factory()
+    is_float = True if issubclass(default_dtype.numpy_dtype.type, np.float32) else False
+
+    cg = PackingKernelsCodegen(stencil, streaming_pattern, class_name, config)
+    kernels = cg.create_uniform_kernel_families()
+
+    if nonuniform:
+        kernels = cg.create_nonuniform_kernel_families(kernels_dict=kernels)
+
+    values_per_cell = len(stencil)
+    dimension = len(stencil[0])
+
+    # Pure storage specification
+    if not stencil_name:
+        raise ValueError("lb_method uses a stencil that is not supported in waLBerla")
+
+    communication_stencil_name = stencil_name if stencil_name != "D3Q15" else "D3Q27"
+
+    cqc = method.conserved_quantity_computation
+    equilibrium = method.equilibrium_distribution
+
+    jinja_context = {
+        'class_name': class_name,
+        'namespace': namespace,
+        'stencil_name': stencil_name,
+        'communication_stencil_name': communication_stencil_name,
+        'compressible': cqc.compressible,
+        'equilibriumAccuracyOrder': equilibrium.order,
+        'inplace': is_inplace(streaming_pattern),
+        'zero_centered': cqc.zero_centered_pdfs,
+        'eq_deviation_only': equilibrium.deviation_only,
+
+        'nonuniform': nonuniform,
+        'target': target.name.lower(),
+        'dtype': "float" if is_float else "double",
+        'is_gpu': target == Target.GPU,
+        'kernels': kernels,
+        'direction_sizes': cg.get_direction_sizes(),
+        'stencil_size': stencil.Q,
+        'dimension': stencil.D,
+        'src_field': cg.src_field,
+        'dst_field': cg.dst_field
+
+    }
+    if nonuniform:
+        jinja_context['mask_field'] = cg.mask_field
+
+    env = Environment(loader=PackageLoader('lbmpy_walberla'), undefined=StrictUndefined)
+    add_pystencils_filters_to_jinja_env(env)
+
+    header = env.get_template('LbmStorageSpecification.tmpl.h').render(**jinja_context)
+    source = env.get_template('LbmStorageSpecification.tmpl.cpp').render(**jinja_context)
+
+    source_extension = "cpp" if target == Target.CPU else "cu"
+    generation_context.write_file(f"{class_name}.h", header)
+    generation_context.write_file(f"{class_name}.{source_extension}", source)
diff --git a/python/lbmpy_walberla/sweep_collection.py b/python/lbmpy_walberla/sweep_collection.py
new file mode 100644
index 0000000000000000000000000000000000000000..8edd0779b328de768cba4a3acb5f04bdb6bb3acf
--- /dev/null
+++ b/python/lbmpy_walberla/sweep_collection.py
@@ -0,0 +1,230 @@
+from dataclasses import replace
+from typing import Dict
+
+import sympy as sp
+import numpy as np
+
+from pystencils import Target, create_kernel
+from pystencils.config import CreateKernelConfig
+from pystencils.field import Field
+
+from lbmpy.advanced_streaming import is_inplace, get_accessor, Timestep
+from lbmpy.creationfunctions import LbmCollisionRule
+from lbmpy.fieldaccess import CollideOnlyInplaceAccessor
+from lbmpy.macroscopic_value_kernels import macroscopic_values_setter, macroscopic_values_getter
+from lbmpy.updatekernels import create_lbm_kernel, create_stream_only_kernel
+
+from pystencils_walberla.kernel_selection import KernelCallNode, KernelFamily
+from pystencils_walberla.utility import config_from_context
+from pystencils_walberla import generate_sweep_collection
+
+from .alternating_sweeps import EvenIntegerCondition
+from .function_generator import kernel_family_function_generator
+
+
+def generate_lbm_sweep_collection(ctx, class_name: str, collision_rule: LbmCollisionRule,
+                                  streaming_pattern='pull',
+                                  field_layout='fzyx', refinement_scaling=None,
+                                  macroscopic_fields: Dict[str, Field] = None,
+                                  target=Target.CPU, data_type=None, cpu_openmp=None, cpu_vectorize_info=None,
+                                  max_threads=None,
+                                  **create_kernel_params):
+    config = config_from_context(ctx, target=target, data_type=data_type,
+                                 cpu_openmp=cpu_openmp, cpu_vectorize_info=cpu_vectorize_info, **create_kernel_params)
+
+    # usually a numpy layout is chosen by default i.e. xyzf - which is bad for waLBerla where at least the spatial
+    # coordinates should be ordered in reverse direction i.e. zyx
+    lb_method = collision_rule.method
+
+    q = lb_method.stencil.Q
+    dim = lb_method.stencil.D
+
+    if field_layout == 'fzyx':
+        config.cpu_vectorize_info['assume_inner_stride_one'] = True
+    elif field_layout == 'zyxf':
+        config.cpu_vectorize_info['assume_inner_stride_one'] = False
+
+    src_field = Field.create_generic('pdfs', dim, config.data_type['pdfs'].numpy_dtype,
+                                     index_dimensions=1, layout=field_layout, index_shape=(q,))
+    if is_inplace(streaming_pattern):
+        dst_field = src_field
+    else:
+        dst_field = Field.create_generic('pdfs_tmp', dim, config.data_type['pdfs_tmp'].numpy_dtype,
+                                         index_dimensions=1, layout=field_layout,
+                                         index_shape=(q,))
+
+    config = replace(config, ghost_layers=0)
+
+    function_generators = []
+
+    def family(name):
+        return lbm_kernel_family(class_name, name, collision_rule, streaming_pattern, src_field, dst_field, config)
+
+    def generator(name, kernel_family):
+        return kernel_family_function_generator(name, kernel_family, namespace='lbm', max_threads=max_threads)
+
+    function_generators.append(generator('streamCollide', family("streamCollide")))
+    function_generators.append(generator('collide', family("collide")))
+    function_generators.append(generator('stream', family("stream")))
+    function_generators.append(generator('streamOnlyNoAdvancement', family("streamOnlyNoAdvancement")))
+
+    setter_family = get_setter_family(class_name, lb_method, src_field, streaming_pattern, macroscopic_fields, config)
+    setter_generator = kernel_family_function_generator('initialise', setter_family,
+                                                        namespace='lbm', max_threads=max_threads)
+    function_generators.append(setter_generator)
+
+    getter_family = get_getter_family(class_name, lb_method, src_field, streaming_pattern, macroscopic_fields, config)
+    getter_generator = kernel_family_function_generator('calculateMacroscopicParameters', getter_family,
+                                                        namespace='lbm', max_threads=max_threads)
+    function_generators.append(getter_generator)
+
+    generate_sweep_collection(ctx, class_name, function_generators, refinement_scaling)
+
+
+class RefinementScaling:
+    def __init__(self):
+        self.scaling_info = []
+
+    def add_standard_relaxation_rate_scaling(self, viscosity_relaxation_rate):
+        self.add_scaling(viscosity_relaxation_rate)
+
+    def add_scaling(self, parameter):
+        if isinstance(parameter, sp.Symbol):
+            self.scaling_info.append(parameter.name)
+        else:
+            raise ValueError("Only pure symbols allowed")
+
+
+def lbm_kernel_family(class_name, kernel_name,
+                      collision_rule, streaming_pattern, src_field, dst_field, config: CreateKernelConfig):
+
+    if kernel_name == "streamCollide":
+        def lbm_kernel(field_accessor, lb_stencil):
+            return create_lbm_kernel(collision_rule, src_field, dst_field, field_accessor)
+        advance_timestep = {"field_name": src_field.name, "function": "advanceTimestep"}
+        temporary_fields = ['pdfs_tmp']
+        field_swaps = [('pdfs', 'pdfs_tmp')]
+    elif kernel_name == "collide":
+        def lbm_kernel(field_accessor, lb_stencil):
+            return create_lbm_kernel(collision_rule, src_field, dst_field, CollideOnlyInplaceAccessor())
+        advance_timestep = {"field_name": src_field.name, "function": "advanceTimestep"}
+        temporary_fields = ()
+        field_swaps = ()
+    elif kernel_name == "stream":
+        def lbm_kernel(field_accessor, lb_stencil):
+            return create_stream_only_kernel(lb_stencil, src_field, dst_field, field_accessor)
+        advance_timestep = {"field_name": src_field.name, "function": "advanceTimestep"}
+        temporary_fields = ['pdfs_tmp']
+        field_swaps = [('pdfs', 'pdfs_tmp')]
+    elif kernel_name == "streamOnlyNoAdvancement":
+        def lbm_kernel(field_accessor, lb_stencil):
+            return create_stream_only_kernel(lb_stencil, src_field, dst_field, field_accessor)
+        advance_timestep = {"field_name": src_field.name, "function": "getTimestepPlusOne"}
+        temporary_fields = ['pdfs_tmp']
+        field_swaps = ()
+    else:
+        raise ValueError(f"kernel name: {kernel_name} is not valid")
+
+    lb_method = collision_rule.method
+    stencil = lb_method.stencil
+
+    if is_inplace(streaming_pattern):
+        nodes = list()
+        for timestep in [Timestep.EVEN, Timestep.ODD]:
+            accessor = get_accessor(streaming_pattern, timestep)
+            timestep_suffix = str(timestep)
+
+            update_rule = lbm_kernel(accessor, stencil)
+            ast = create_kernel(update_rule, config=config)
+            ast.function_name = 'kernel_' + kernel_name + timestep_suffix
+            ast.assumed_inner_stride_one = config.cpu_vectorize_info['assume_inner_stride_one']
+            nodes.append(KernelCallNode(ast))
+
+        tree = EvenIntegerCondition('timestep', nodes[0], nodes[1], parameter_dtype=np.uint8)
+        family = KernelFamily(tree, class_name, field_timestep=advance_timestep)
+    else:
+        timestep = Timestep.BOTH
+        accessor = get_accessor(streaming_pattern, timestep)
+
+        update_rule = lbm_kernel(accessor, stencil)
+        ast = create_kernel(update_rule, config=config)
+        ast.function_name = 'kernel_' + kernel_name
+        ast.assumed_inner_stride_one = config.cpu_vectorize_info['assume_inner_stride_one']
+        node = KernelCallNode(ast)
+        family = KernelFamily(node, class_name, temporary_fields=temporary_fields, field_swaps=field_swaps)
+
+    return family
+
+
+def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopic_fields, config: CreateKernelConfig):
+    dim = lb_method.stencil.D
+    density = macroscopic_fields.get('density', 1.0)
+    velocity = macroscopic_fields.get('velocity', [0.0] * dim)
+
+    get_timestep = {"field_name": pdfs.name, "function": "getTimestep"}
+    temporary_fields = ()
+    field_swaps = ()
+
+    if is_inplace(streaming_pattern):
+        nodes = list()
+        for timestep in [Timestep.EVEN, Timestep.ODD]:
+            timestep_suffix = str(timestep)
+            setter = macroscopic_values_setter(lb_method,
+                                               density=density, velocity=velocity, pdfs=pdfs,
+                                               streaming_pattern=streaming_pattern, previous_timestep=timestep)
+
+            setter_ast = create_kernel(setter, config=config)
+            setter_ast.function_name = 'kernel_initialise' + timestep_suffix
+            nodes.append(KernelCallNode(setter_ast))
+        tree = EvenIntegerCondition('timestep', nodes[0], nodes[1], parameter_dtype=np.uint8)
+        family = KernelFamily(tree, class_name, field_timestep=get_timestep)
+    else:
+        timestep = Timestep.BOTH
+        setter = macroscopic_values_setter(lb_method,
+                                           density=density, velocity=velocity, pdfs=pdfs,
+                                           streaming_pattern=streaming_pattern, previous_timestep=timestep)
+
+        setter_ast = create_kernel(setter, config=config)
+        setter_ast.function_name = 'kernel_initialise'
+        node = KernelCallNode(setter_ast)
+        family = KernelFamily(node, class_name, temporary_fields=temporary_fields, field_swaps=field_swaps)
+
+    return family
+
+
+def get_getter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopic_fields, config: CreateKernelConfig):
+    density = macroscopic_fields.get('density', None)
+    velocity = macroscopic_fields.get('velocity', None)
+
+    if density is None and velocity is None:
+        return None
+
+    get_timestep = {"field_name": pdfs.name, "function": "getTimestep"}
+    temporary_fields = ()
+    field_swaps = ()
+
+    if is_inplace(streaming_pattern):
+        nodes = list()
+        for timestep in [Timestep.EVEN, Timestep.ODD]:
+            timestep_suffix = str(timestep)
+            getter = macroscopic_values_getter(lb_method,
+                                               density=density, velocity=velocity, pdfs=pdfs,
+                                               streaming_pattern=streaming_pattern, previous_timestep=timestep)
+
+            getter_ast = create_kernel(getter, config=config)
+            getter_ast.function_name = 'kernel_getter' + timestep_suffix
+            nodes.append(KernelCallNode(getter_ast))
+        tree = EvenIntegerCondition('timestep', nodes[0], nodes[1], parameter_dtype=np.uint8)
+        family = KernelFamily(tree, class_name, field_timestep=get_timestep)
+    else:
+        timestep = Timestep.BOTH
+        getter = macroscopic_values_getter(lb_method,
+                                           density=density, velocity=velocity, pdfs=pdfs,
+                                           streaming_pattern=streaming_pattern, previous_timestep=timestep)
+
+        getter_ast = create_kernel(getter, config=config)
+        getter_ast.function_name = 'kernel_getter'
+        node = KernelCallNode(getter_ast)
+        family = KernelFamily(node, class_name, temporary_fields=temporary_fields, field_swaps=field_swaps)
+
+    return family
diff --git a/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h b/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h
new file mode 100644
index 0000000000000000000000000000000000000000..5f49137846ba99d60888e7353ac4ff195ade2a84
--- /dev/null
+++ b/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h
@@ -0,0 +1,108 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file {{class_name}}.h
+//! \\author lbmpy
+//======================================================================================================================
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "domain_decomposition/IBlock.h"
+
+{% if target is equalto 'gpu' -%}
+#include "gpu/GPUWrapper.h"
+{%- endif %}
+
+{% for include in includes -%}
+#include {{include}}
+{% endfor %}
+
+
+namespace walberla{
+namespace {{namespace}} {
+
+template <typename FlagField_T>
+class {{class_name}}
+{
+ public:
+   enum Type { ALL = 0, INNER = 1, OUTER = 2 };
+
+
+   {{class_name}}( {{- ["const shared_ptr<StructuredBlockForest> & blocks", "BlockDataID flagID_", "BlockDataID pdfsID_", "FlagUID domainUID_", [kernel_list|generate_constructor_parameters(['indexVector', 'indexVectorSize', 'pdfs'])]] | type_identifier_list -}} )
+      : blocks_(blocks), flagID(flagID_), pdfsID(pdfsID_), domainUID(domainUID_)
+   {
+      {% for object_name, boundary_class, kernel in zip(object_names, boundary_classes, kernel_list) -%}
+
+      {{object_name}} = std::make_shared< {{boundary_class}} >({{- ["blocks", "pdfsID", [kernel|generate_function_collection_call(['indexVector', 'indexVectorSize', 'pdfs', 'timestep', 'gpuStream'])]] | type_identifier_list -}});
+      {% endfor %}
+
+      {% for object_name, flag_uid in zip(object_names, flag_uids) -%}
+      {{object_name}}->fillFromFlagField<FlagField_T>(blocks, flagID, walberla::FlagUID("{{flag_uid}}"), domainUID);
+      {% endfor %}
+   }
+
+   void run ({{- ["IBlock * block", ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}})
+   {
+      {% for object_name in object_names -%}
+      {{object_name}}->run({{- ["block", ["stream"] if target == 'gpu' else []] | type_identifier_list -}});
+      {% endfor %}
+   }
+
+   void inner ({{- ["IBlock * block", ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}})
+   {
+      {% for object_name in object_names -%}
+      {{object_name}}->inner({{- ["block", ["stream"] if target == 'gpu' else []] | type_identifier_list -}});
+      {% endfor %}
+   }
+
+   void outer ({{- ["IBlock * block", ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}})
+   {
+      {% for object_name in object_names -%}
+      {{object_name}}->outer({{- ["block", ["stream"] if target == 'gpu' else []] | type_identifier_list -}});
+      {% endfor %}
+   }
+
+   void operator() ({{- ["IBlock * block", ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}})
+   {
+      run({{- ["block", ["stream"] if target == 'gpu' else []] | type_identifier_list -}});
+   }
+
+   std::function<void (IBlock *)> getSweep({{- ["Type type = Type::ALL", ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}})
+   {
+      switch (type)
+      {
+      case Type::INNER:
+         return [{{- ["this", ["stream"] if target == 'gpu' else []] | type_identifier_list -}}](IBlock* block) { this->inner({{- ["block", ["stream"] if target == 'gpu' else []] | type_identifier_list -}}); };
+      case Type::OUTER:
+         return [{{- ["this", ["stream"] if target == 'gpu' else []] | type_identifier_list -}}](IBlock* block) { this->outer({{- ["block", ["stream"] if target == 'gpu' else []] | type_identifier_list -}}); };
+      default:
+         return [{{- ["this", ["stream"] if target == 'gpu' else []] | type_identifier_list -}}](IBlock* block) { this->run({{- ["block", ["stream"] if target == 'gpu' else []] | type_identifier_list -}}); };
+      }
+   }
+
+   weak_ptr< StructuredBlockStorage > blocks_;
+   BlockDataID flagID;
+   BlockDataID pdfsID;
+   walberla::FlagUID domainUID;
+
+   {% for object_name, boundary_class in zip(object_names, boundary_classes) -%}
+   shared_ptr<{{boundary_class}}> {{object_name}};
+   {% endfor %}
+};
+
+}
+}
+
diff --git a/python/lbmpy_walberla/templates/LatticeModel.tmpl.cpp b/python/lbmpy_walberla/templates/LatticeModel.tmpl.cpp
index dd50337e1714c8abcf49c35b0d77e2e23d4d9c29..17d5bdeb4b5e7443958f9619d08848ae817b9a89 100644
--- a/python/lbmpy_walberla/templates/LatticeModel.tmpl.cpp
+++ b/python/lbmpy_walberla/templates/LatticeModel.tmpl.cpp
@@ -13,7 +13,8 @@
 //  You should have received a copy of the GNU General Public License along
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \\author Martin Bauer <martin.bauer@fau.de>
+//! \\file {{class_name}}.cpp
+//! \\author lbmpy
 //======================================================================================================================
 
 #include <cmath>
diff --git a/python/lbmpy_walberla/templates/LatticeModel.tmpl.h b/python/lbmpy_walberla/templates/LatticeModel.tmpl.h
index 677be50025122939d50b10eb7d1381afe519eb4e..5631eec3250d2c1e99d9a59e268e5e1794520757 100644
--- a/python/lbmpy_walberla/templates/LatticeModel.tmpl.h
+++ b/python/lbmpy_walberla/templates/LatticeModel.tmpl.h
@@ -13,8 +13,8 @@
 //  You should have received a copy of the GNU General Public License along
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \\author Martin Bauer <martin.bauer@fau.de>
-//
+//! \\file {{class_name}}.h
+//! \\author lbmpy
 //======================================================================================================================
 
 #pragma once
diff --git a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..91c7d7d960a78552628d3d8568dd611f13c14a2d
--- /dev/null
+++ b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp
@@ -0,0 +1,180 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file {{class_name}}.cpp
+//! \\author lbmpy
+//======================================================================================================================
+
+#include "{{class_name}}.h"
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wfloat-equal"
+#   pragma GCC diagnostic ignored "-Wshadow"
+#   pragma GCC diagnostic ignored "-Wconversion"
+#   pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+/*************************************************************************************
+ *                                Kernel Definitions
+*************************************************************************************/
+{{ kernels['packAll']      | generate_definitions }}
+{{ kernels['unpackAll']    | generate_definitions }}
+{{ kernels['localCopyAll'] | generate_definitions }}
+
+{{ kernels['packDirection']      | generate_definitions }}
+{{ kernels['unpackDirection']    | generate_definitions }}
+{{ kernels['localCopyDirection'] | generate_definitions }}
+
+{% if nonuniform -%}
+{{ kernels['unpackRedistribute']    | generate_definitions }}
+{{ kernels['packPartialCoalescence']    | generate_definitions }}
+{{ kernels['zeroCoalescenceRegion']    | generate_definitions }}
+{{ kernels['unpackCoalescence']    | generate_definitions }}
+{%- endif %}
+
+/*************************************************************************************
+ *                                 Kernel Wrappers
+*************************************************************************************/
+
+namespace walberla {
+namespace {{namespace}} {
+
+   void {{class_name}}::PackKernels::packAll(
+      {{- [ "PdfField_T * " + src_field.name, "CellInterval & ci",
+             "unsigned char * outBuffer", kernels['packAll'].kernel_selection_parameters,
+             ["gpuStream_t stream"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const
+   {
+      {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(outBuffer);
+      {{kernels['packAll'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }}
+   }
+
+
+   void {{class_name}}::PackKernels::unpackAll(
+      {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+             "unsigned char * inBuffer", kernels['unpackAll'].kernel_selection_parameters,
+             ["gpuStream_t stream"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const
+   {
+      {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(inBuffer);
+      {{kernels['unpackAll'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }}
+   }
+
+
+   void {{class_name}}::PackKernels::localCopyAll(
+      {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
+             "PdfField_T * " + dst_field.name, "CellInterval & dstInterval",
+             kernels['localCopyAll'].kernel_selection_parameters,
+             ["gpuStream_t stream"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const
+   {
+      WALBERLA_ASSERT_EQUAL(srcInterval.xSize(), dstInterval.xSize())
+      WALBERLA_ASSERT_EQUAL(srcInterval.ySize(), dstInterval.ySize())
+      WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize())
+
+      {{kernels['localCopyAll']
+               | generate_call(cell_interval={src_field : 'srcInterval', dst_field : 'dstInterval'}, stream='stream')
+               | indent(6) }}
+   }
+
+   void {{class_name}}::PackKernels::packDirection(
+      {{- [ "PdfField_T * " + src_field.name, "CellInterval & ci",
+             "unsigned char * outBuffer", kernels['packDirection'].kernel_selection_parameters,
+             ["gpuStream_t stream"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const
+   {
+      {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(outBuffer);
+      {{kernels['packDirection'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }}
+   }
+
+   void {{class_name}}::PackKernels::unpackDirection(
+      {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+             "unsigned char * inBuffer", kernels['unpackDirection'].kernel_selection_parameters,
+             ["gpuStream_t stream"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const
+   {
+      {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(inBuffer);
+      {{kernels['unpackDirection'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }}
+   }
+
+   void {{class_name}}::PackKernels::localCopyDirection(
+      {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
+             "PdfField_T * " + dst_field.name, "CellInterval & dstInterval",
+             kernels['localCopyDirection'].kernel_selection_parameters,
+             ["gpuStream_t stream"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const
+   {
+      WALBERLA_ASSERT_EQUAL(srcInterval.xSize(), dstInterval.xSize())
+      WALBERLA_ASSERT_EQUAL(srcInterval.ySize(), dstInterval.ySize())
+      WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize())
+
+      {{kernels['localCopyDirection']
+          | generate_call(cell_interval={src_field : 'srcInterval', dst_field : 'dstInterval'}, stream='stream')
+          | indent(6) }}
+   }
+
+   {% if nonuniform -%}
+   void {{class_name}}::PackKernels::unpackRedistribute(
+      {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+             "unsigned char * inBuffer", kernels['unpackDirection'].kernel_selection_parameters,
+             ["gpuStream_t stream"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const
+   {
+      {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(inBuffer);
+      {{kernels['unpackRedistribute'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }}
+   }
+
+   void {{class_name}}::PackKernels::packPartialCoalescence(
+      {{- [ "PdfField_T * " + src_field.name, "MaskField_T * " + mask_field.name, "CellInterval & ci",
+             "unsigned char * outBuffer", kernels['packPartialCoalescence'].kernel_selection_parameters,
+             ["gpuStream_t stream"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const
+   {
+      {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(outBuffer);
+      {{kernels['packPartialCoalescence'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }}
+   }
+
+   void {{class_name}}::PackKernels::zeroCoalescenceRegion(
+      {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+             kernels['zeroCoalescenceRegion'].kernel_selection_parameters,
+             ["gpuStream_t stream"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const
+   {
+      {{kernels['zeroCoalescenceRegion'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }}
+   }
+
+   void {{class_name}}::PackKernels::unpackCoalescence(
+      {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+             "unsigned char * inBuffer", kernels['unpackCoalescence'].kernel_selection_parameters,
+             ["gpuStream_t stream"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const
+   {
+      {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(inBuffer);
+      {{kernels['unpackCoalescence'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }}
+   }
+   {%- endif %}
+}  // namespace {{namespace}}
+}  // namespace walberla
\ No newline at end of file
diff --git a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h
new file mode 100644
index 0000000000000000000000000000000000000000..866119390412a115f40283c7829cbdfc01e5baad
--- /dev/null
+++ b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h
@@ -0,0 +1,255 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file {{class_name}}.h
+//! \\author lbmpy
+//======================================================================================================================
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "core/mpi/SendBuffer.h"
+#include "core/mpi/RecvBuffer.h"
+
+#include "domain_decomposition/IBlock.h"
+#include "field/GhostLayerField.h"
+
+#include "stencil/{{stencil_name}}.h"
+#include "stencil/Directions.h"
+
+{% if target is equalto 'cpu' -%}
+#define FUNC_PREFIX
+{%- elif target is equalto 'gpu' -%}
+#define FUNC_PREFIX __global__
+#include "gpu/GPUWrapper.h"
+#include "gpu/GPUField.h"
+{%- endif %}
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if defined WALBERLA_CXX_COMPILER_IS_GNU || defined WALBERLA_CXX_COMPILER_IS_CLANG
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#endif
+
+namespace walberla
+{
+namespace {{namespace}}{
+
+class {{class_name}}
+{
+ public:
+   // Used lattice stencil
+   using Stencil = stencil::{{stencil_name}};
+   // Lattice stencil used for the communication (should be used to define which block directions need to be communicated)
+   using CommunicationStencil = stencil::{{communication_stencil_name}};
+
+   // If false used correction: Lattice Boltzmann Model for the Incompressible Navier–Stokes Equation, He 1997
+   static const bool compressible = {% if compressible %}true{% else %}false{% endif %};
+   // Cut off for the lattice Boltzmann equilibrium
+   static const int equilibriumAccuracyOrder = {{equilibriumAccuracyOrder}};
+
+   // If streaming pattern is inplace (esotwist, aa, ...) or not (pull, push)
+   static const bool inplace = {% if inplace -%} true {%- else -%} false {%- endif -%};
+
+   // If true the background deviation (rho_0 = 1) is subtracted for the collision step.
+   static const bool zeroCenteredPDFs = {% if zero_centered -%} true {%- else -%} false {%- endif -%};
+   // If true the equilibrium is computed in regard to "delta_rho" and not the actual density "rho"
+   static const bool deviationOnlyEquilibrium = {% if eq_deviation_only -%} true {%- else -%} false {%- endif -%};
+
+   // Compute kernels to pack and unpack MPI buffers
+   class PackKernels {
+
+    public:
+      using PdfField_T = {{src_field | field_type(is_gpu=is_gpu)}};
+      using value_type = typename PdfField_T::value_type;
+
+      {% if nonuniform -%}
+      {% if target is equalto 'cpu' -%}
+      using MaskField_T = GhostLayerField< uint32_t, 1 >;
+      {%- elif target is equalto 'gpu' -%}
+      using MaskField_T = gpu::GPUField< uint32_t >;
+      {%- endif %}
+      {%- endif %}
+
+      static const bool inplace = {% if inplace -%} true {%- else -%} false {%- endif -%};
+
+      /**
+       * Packs all pdfs from the given cell interval to the send buffer.
+       * */
+      void packAll(
+         {{- [ "PdfField_T * " + src_field.name, "CellInterval & ci",
+                "unsigned char * outBuffer", kernels['packAll'].kernel_selection_parameters,
+                ["gpuStream_t stream = nullptr"] if is_gpu else []]
+             | type_identifier_list -}}
+      ) const;
+
+      /**
+       * Unpacks all pdfs from the send buffer to the given cell interval.
+       * */
+      void unpackAll(
+         {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+                "unsigned char * inBuffer", kernels['unpackAll'].kernel_selection_parameters,
+                ["gpuStream_t stream = nullptr"] if is_gpu else []]
+             | type_identifier_list -}}
+      ) const;
+
+      /**
+       * Copies data between two blocks on the same process.
+       * All pdfs from the sending interval are copied onto the receiving interval.
+       * */
+      void localCopyAll(
+         {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
+                "PdfField_T * " + dst_field.name, "CellInterval & dstInterval",
+                kernels['localCopyAll'].kernel_selection_parameters,
+                ["gpuStream_t stream = nullptr"] if is_gpu else []]
+             | type_identifier_list -}}
+      ) const;
+
+      /**
+       * Packs only those populations streaming in directions aligned with the sending direction dir from the given cell interval.
+       * For example, in 2D, if dir == N, the pdfs streaming in directions NW, N, NE are packed.
+       * */
+      void packDirection(
+         {{- [ "PdfField_T * " + src_field.name, "CellInterval & ci",
+                "unsigned char * outBuffer", kernels['packDirection'].kernel_selection_parameters,
+                ["gpuStream_t stream = nullptr"] if is_gpu else []]
+             | type_identifier_list -}}
+      ) const;
+
+      /**
+       * Unpacks only those populations streaming in directions aligned with the sending direction dir to the given cell interval.
+       * For example, in 2D, if dir == N, the pdfs streaming in directions NW, N, NE are unpacked.
+       * */
+      void unpackDirection(
+         {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+                "unsigned char * inBuffer", kernels['unpackDirection'].kernel_selection_parameters,
+                ["gpuStream_t stream = nullptr"] if is_gpu else []]
+             | type_identifier_list -}}
+      ) const;
+
+      /** Copies data between two blocks on the same process.
+        * PDFs streaming aligned with the direction dir are copied from the sending interval onto the receiving interval.
+        * */
+      void localCopyDirection(
+         {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
+                "PdfField_T * " + dst_field.name, "CellInterval & dstInterval",
+                kernels['localCopyDirection'].kernel_selection_parameters,
+                ["gpuStream_t stream = nullptr"] if is_gpu else []]
+             | type_identifier_list -}}
+      ) const;
+
+      /**
+       * Returns the number of bytes that will be packed from / unpacked to the cell interval
+       * when using packDirection / unpackDirection
+       * @param ci  The cell interval
+       * @param dir The communication direction
+       * @return    The required size of the buffer, in bytes
+       * */
+      uint_t size (CellInterval & ci, stencil::Direction dir) const {
+         return ci.numCells() * sizes[dir] * sizeof(value_type);
+      }
+
+      /**
+       * Returns the number of bytes that will be packed from / unpacked to the cell interval
+       * when using packAll / unpackAll
+       * @param ci  The cell interval
+       * @return    The required size of the buffer, in bytes
+       * */
+      uint_t size (CellInterval & ci) const {
+         return ci.numCells() * {{stencil_size}} * sizeof(value_type);
+      }
+
+      {% if nonuniform -%}
+
+      /**
+       * Unpacks and uniformly redistributes populations coming from a coarse block onto the fine grid.
+       * */
+      void unpackRedistribute(
+         {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+                "unsigned char * inBuffer", kernels['unpackRedistribute'].kernel_selection_parameters,
+                ["gpuStream_t stream = nullptr"] if is_gpu else []]
+             | type_identifier_list -}}
+      ) const;
+
+      /**
+       * Partially coalesces and packs populations streaming from a fine block into a coarse block
+       * */
+      void packPartialCoalescence(
+         {{- [ "PdfField_T * " + src_field.name, "MaskField_T * " + mask_field.name, "CellInterval & ci",
+                "unsigned char * outBuffer", kernels['packPartialCoalescence'].kernel_selection_parameters,
+                ["gpuStream_t stream = nullptr"] if is_gpu else []]
+             | type_identifier_list -}}
+      ) const;
+
+      /**
+       * Prepares a coarse block for coalescence by setting every population that must be coalesced from fine blocks to zero.
+       * */
+      void zeroCoalescenceRegion(
+         {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+                kernels['zeroCoalescenceRegion'].kernel_selection_parameters,
+                ["gpuStream_t stream = nullptr"] if is_gpu else []]
+             | type_identifier_list -}}
+      ) const;
+
+      /**
+       * Unpacks and coalesces populations coming from a fine block onto the fine grid
+       * */
+      void unpackCoalescence(
+         {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+                "unsigned char * inBuffer", kernels['unpackCoalescence'].kernel_selection_parameters,
+                ["gpuStream_t stream = nullptr"] if is_gpu else []]
+             | type_identifier_list -}}
+      ) const;
+
+      /**
+       * Returns the number of bytes that will be unpacked to the cell interval
+       * when using unpackRedistribute. This is 2^{-d} of the data that would be
+       * unpacked during same-level communication.
+       * @param ci  The cell interval
+       * @return    The required size of the buffer, in bytes
+       * */
+      uint_t redistributeSize(CellInterval & ci) const {
+         return size(ci) >> {{dimension}};
+      }
+
+      /**
+       * Returns the number of bytes that will be packed from the cell interval
+       * when using packPartialCoalescence.
+       * @param ci  The cell interval
+       * @param dir The communication direction
+       * @return    The required size of the buffer, in bytes
+       * */
+      uint_t partialCoalescenceSize(CellInterval & ci, stencil::Direction dir) const {
+         return size(ci, dir) >> {{dimension}};
+      }
+
+      {%- endif %}
+
+    private:
+      const uint_t sizes[{{direction_sizes|length}}] { {{ direction_sizes | join(', ') }} };
+   };
+
+};
+
+}} //{{namespace}}/walberla
\ No newline at end of file
diff --git a/python/lbmpy_walberla/templates/NonuniformPackingKernels.tmpl.cpp b/python/lbmpy_walberla/templates/NonuniformPackingKernels.tmpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..558dc1faa53a56c281b4cf106f7eb643d0ae9dae
--- /dev/null
+++ b/python/lbmpy_walberla/templates/NonuniformPackingKernels.tmpl.cpp
@@ -0,0 +1,54 @@
+{% extends "PackingKernels.tmpl.cpp" %}
+
+{% block AdditionalKernelDefinitions %}
+{{ kernels['unpackRedistribute']    | generate_definitions }}
+{{ kernels['packPartialCoalescence']    | generate_definitions }}
+{{ kernels['zeroCoalescenceRegion']    | generate_definitions }}
+{{ kernels['unpackCoalescence']    | generate_definitions }}
+{% endblock %}
+
+{% block AdditionalDefinitions %}
+
+void {{class_name}}::unpackRedistribute(
+   {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+   "unsigned char * inBuffer", kernels['unpackDirection'].kernel_selection_parameters,
+       ["gpuStream_t stream"] if is_gpu else []]
+   | type_identifier_list -}}
+) const {
+   {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(inBuffer);
+
+   {{kernels['unpackRedistribute'] | generate_call(cell_interval='ci', stream='stream') | indent(3) }}
+}
+
+void {{class_name}}::packPartialCoalescence(
+   {{- [ "PdfField_T * " + src_field.name, "MaskField_T * " + mask_field.name, "CellInterval & ci",
+   "unsigned char * outBuffer", kernels['packPartialCoalescence'].kernel_selection_parameters,
+       ["gpuStream_t stream"] if is_gpu else []]
+   | type_identifier_list -}}
+) const {
+   {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(outBuffer);
+
+   {{kernels['packPartialCoalescence'] | generate_call(cell_interval='ci', stream='stream') | indent(3) }}
+}
+
+void {{class_name}}::zeroCoalescenceRegion(
+   {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+   kernels['zeroCoalescenceRegion'].kernel_selection_parameters,
+       ["gpuStream_t stream"] if is_gpu else []]
+   | type_identifier_list -}}
+) const {
+   {{kernels['zeroCoalescenceRegion'] | generate_call(cell_interval='ci', stream='stream') | indent(3) }}
+}
+
+void {{class_name}}::unpackCoalescence(
+   {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+   "unsigned char * inBuffer", kernels['unpackCoalescence'].kernel_selection_parameters,
+       ["gpuStream_t stream"] if is_gpu else []]
+   | type_identifier_list -}}
+) const {
+   {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(inBuffer);
+
+   {{kernels['unpackCoalescence'] | generate_call(cell_interval='ci', stream='stream') | indent(3) }}
+}
+
+{% endblock %}
diff --git a/python/lbmpy_walberla/templates/NonuniformPackingKernels.tmpl.h b/python/lbmpy_walberla/templates/NonuniformPackingKernels.tmpl.h
new file mode 100644
index 0000000000000000000000000000000000000000..01c99a69577f7928d830d743f9d0aba0b8584ea9
--- /dev/null
+++ b/python/lbmpy_walberla/templates/NonuniformPackingKernels.tmpl.h
@@ -0,0 +1,74 @@
+{% extends "PackingKernels.tmpl.h" %}
+
+{% block AdditionalPublicDeclarations %}
+{% if target is equalto 'cpu' -%}
+   using MaskField_T = GhostLayerField< uint32_t, 1 >;
+{%- elif target is equalto 'gpu' -%}
+   using MaskField_T = gpu::GPUField< uint32_t >;
+{%- endif %}
+
+
+   /**
+   * Unpacks and uniformly redistributes populations coming from a coarse block onto
+   * the fine grid.
+   */
+   void unpackRedistribute(
+      {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+      "unsigned char * inBuffer", kernels['unpackRedistribute'].kernel_selection_parameters,
+       ["gpuStream_t stream = nullptr"] if is_gpu else []]
+      | type_identifier_list -}}
+   ) const;
+
+   /**
+   * Partially coalesces and packs populations streaming from a fine block into a coarse block
+   */
+   void packPartialCoalescence(
+      {{- [ "PdfField_T * " + src_field.name, "MaskField_T * " + mask_field.name, "CellInterval & ci",
+      "unsigned char * outBuffer", kernels['packPartialCoalescence'].kernel_selection_parameters,
+          ["gpuStream_t stream = nullptr"] if is_gpu else []]
+      | type_identifier_list -}}
+   ) const;
+
+   /**
+    * Prepares a coarse block for coalescence by setting every population that must be coalesced from fine blocks
+    * to zero.
+    */
+   void zeroCoalescenceRegion(
+      {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+      kernels['zeroCoalescenceRegion'].kernel_selection_parameters,
+          ["gpuStream_t stream = nullptr"] if is_gpu else []]
+      | type_identifier_list -}}
+   ) const;
+
+   /**
+   * Unpacks and coalesces populations coming from a fine block onto the fine grid
+   */
+   void unpackCoalescence(
+      {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+      "unsigned char * inBuffer", kernels['unpackCoalescence'].kernel_selection_parameters,
+          ["gpuStream_t stream = nullptr"] if is_gpu else []]
+      | type_identifier_list -}}
+   ) const;
+
+   /**
+    * Returns the number of bytes that will be unpacked to the cell interval
+    * when using unpackRedistribute. This is 2^{-d} of the data that would be
+    * unpacked during same-level communication.
+    * @param ci  The cell interval
+    * @return    The required size of the buffer, in bytes
+    */
+   uint_t redistributeSize(CellInterval & ci) const {
+      return size(ci) >> {{dimension}};
+   }
+
+   /**
+    * Returns the number of bytes that will be packed from the cell interval
+    * when using packPartialCoalescence.
+    * @param ci  The cell interval
+    * @param dir The communication direction
+    * @return    The required size of the buffer, in bytes
+    */
+   uint_t partialCoalescenceSize(CellInterval & ci, stencil::Direction dir) const {
+      return size(ci, dir) >> {{dimension}};
+   }
+{% endblock %}
diff --git a/python/lbmpy_walberla/templates/PackingKernels.tmpl.cpp b/python/lbmpy_walberla/templates/PackingKernels.tmpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4e536a9f8bbff9bf3fa3c2d907029112d9126053
--- /dev/null
+++ b/python/lbmpy_walberla/templates/PackingKernels.tmpl.cpp
@@ -0,0 +1,137 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file {{class_name}}.cpp
+//! \\author lbmpy
+//======================================================================================================================
+
+#include "{{class_name}}.h"
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wfloat-equal"
+#   pragma GCC diagnostic ignored "-Wshadow"
+#   pragma GCC diagnostic ignored "-Wconversion"
+#   pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+
+/*************************************************************************************
+ *                                Kernel Definitions
+*************************************************************************************/
+//NOLINTBEGIN(readability-non-const-parameter*)
+{{ kernels['packAll']      | generate_definitions }}
+{{ kernels['unpackAll']    | generate_definitions }}
+{{ kernels['localCopyAll'] | generate_definitions }}
+
+{{ kernels['packDirection']      | generate_definitions }}
+{{ kernels['unpackDirection']    | generate_definitions }}
+{{ kernels['localCopyDirection'] | generate_definitions }}
+
+{% block AdditionalKernelDefinitions %}
+{% endblock %}
+//NOLINTEND(readability-non-const-parameter*)
+
+
+/*************************************************************************************
+ *                                 Kernel Wrappers
+*************************************************************************************/
+
+namespace walberla {
+namespace {{namespace}} {
+
+void {{class_name}}::packAll(
+   {{- [ "PdfField_T * " + src_field.name, "CellInterval & ci",
+          "unsigned char * outBuffer", kernels['packAll'].kernel_selection_parameters,
+          ["gpuStream_t stream"] if is_gpu else []]
+       | type_identifier_list -}}
+) const {
+   {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(outBuffer);
+
+   {{kernels['packAll'] | generate_call(cell_interval='ci', stream='stream') | indent(3) }}
+}
+
+
+void {{class_name}}::unpackAll(
+   {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+         "unsigned char * inBuffer", kernels['unpackAll'].kernel_selection_parameters,
+          ["gpuStream_t stream"] if is_gpu else []]
+       | type_identifier_list -}}
+) const {
+   {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(inBuffer);
+
+   {{kernels['unpackAll'] | generate_call(cell_interval='ci', stream='stream') | indent(3) }}
+}
+
+
+void {{class_name}}::localCopyAll(
+   {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
+         "PdfField_T * " + dst_field.name, "CellInterval & dstInterval",
+         kernels['localCopyAll'].kernel_selection_parameters,
+          ["gpuStream_t stream"] if is_gpu else []]
+       | type_identifier_list -}}
+) const {
+   WALBERLA_ASSERT_EQUAL(srcInterval.xSize(), dstInterval.xSize())
+   WALBERLA_ASSERT_EQUAL(srcInterval.ySize(), dstInterval.ySize())
+   WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize())
+
+   {{kernels['localCopyAll']
+     | generate_call(cell_interval={src_field : 'srcInterval', dst_field : 'dstInterval'}, stream='stream')
+     | indent(3) }}
+}
+
+void {{class_name}}::packDirection(
+   {{- [ "PdfField_T * " + src_field.name, "CellInterval & ci",
+         "unsigned char * outBuffer", kernels['packDirection'].kernel_selection_parameters,
+          ["gpuStream_t stream"] if is_gpu else []]
+       | type_identifier_list -}}
+) const {
+   {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(outBuffer);
+
+   {{kernels['packDirection'] | generate_call(cell_interval='ci', stream='stream') | indent(3) }}
+}
+
+void {{class_name}}::unpackDirection(
+   {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+         "unsigned char * inBuffer", kernels['unpackDirection'].kernel_selection_parameters,
+          ["gpuStream_t stream"] if is_gpu else []]
+       | type_identifier_list -}}
+) const {
+   {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(inBuffer);
+
+   {{kernels['unpackDirection'] | generate_call(cell_interval='ci', stream='stream') | indent(3) }}
+}
+
+void {{class_name}}::localCopyDirection(
+   {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
+         "PdfField_T * " + dst_field.name, "CellInterval & dstInterval",
+         kernels['localCopyDirection'].kernel_selection_parameters,
+          ["gpuStream_t stream"] if is_gpu else []]
+       | type_identifier_list -}}
+) const {
+   WALBERLA_ASSERT_EQUAL(srcInterval.xSize(), dstInterval.xSize())
+   WALBERLA_ASSERT_EQUAL(srcInterval.ySize(), dstInterval.ySize())
+   WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize())
+
+   {{kernels['localCopyDirection']
+     | generate_call(cell_interval={src_field : 'srcInterval', dst_field : 'dstInterval'}, stream='stream')
+     | indent(3) }}
+}
+
+{% block AdditionalDefinitions %}
+{% endblock %}
+
+}  // namespace {{namespace}}
+}  // namespace walberla
\ No newline at end of file
diff --git a/python/lbmpy_walberla/templates/PackingKernels.tmpl.h b/python/lbmpy_walberla/templates/PackingKernels.tmpl.h
new file mode 100644
index 0000000000000000000000000000000000000000..5371e395d948e758efde91dafdb237fdb855aa9c
--- /dev/null
+++ b/python/lbmpy_walberla/templates/PackingKernels.tmpl.h
@@ -0,0 +1,169 @@
+//======================================================================================================================
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file {{class_name}}.h
+//! \\author lbmpy
+//======================================================================================================================
+
+#pragma once
+
+#include "stencil/Directions.h"
+#include "core/cell/CellInterval.h"
+#include "core/mpi/SendBuffer.h"
+#include "core/mpi/RecvBuffer.h"
+#include "domain_decomposition/IBlock.h"
+#include "field/GhostLayerField.h"
+{% if target is equalto 'gpu' -%}
+#include "gpu/GPUWrapper.h"
+#include "gpu/GPUField.h"
+{%- endif %}
+
+{% if target is equalto 'cpu' -%}
+#define FUNC_PREFIX
+{%- elif target is equalto 'gpu' -%}
+#define FUNC_PREFIX __global__
+{%- endif %}
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#endif
+
+#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#endif
+
+namespace walberla{
+namespace {{namespace}} {
+
+class {{class_name}} {
+
+public:
+   using PdfField_T = {{src_field | field_type(is_gpu=is_gpu)}};
+   using value_type = typename PdfField_T::value_type;
+
+   static const bool inplace = {% if inplace -%} true {%- else -%} false {%- endif -%};
+
+   /**
+    * Packs all pdfs from the given cell interval to the send buffer.
+    */
+   void packAll(
+      {{- [ "PdfField_T * " + src_field.name, "CellInterval & ci",
+            "unsigned char * outBuffer", kernels['packAll'].kernel_selection_parameters,
+             ["gpuStream_t stream = nullptr"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const;
+
+   /**
+    * Unpacks all pdfs from the send buffer to the given cell interval.
+    */
+   void unpackAll(
+      {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+            "unsigned char * inBuffer", kernels['unpackAll'].kernel_selection_parameters,
+             ["gpuStream_t stream = nullptr"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const;
+
+   /**
+    * Copies data between two blocks on the same process.
+    * All pdfs from the sending interval are copied onto the receiving interval.
+    */
+   void localCopyAll(
+      {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
+            "PdfField_T * " + dst_field.name, "CellInterval & dstInterval",
+          kernels['localCopyAll'].kernel_selection_parameters,
+             ["gpuStream_t stream = nullptr"] if is_gpu else []]
+      | type_identifier_list -}}
+   ) const;
+
+   /**
+    * Packs only those populations streaming in directions aligned with the sending direction dir from the given
+    * cell interval.
+    * For example, in 2D, if dir == N, the pdfs streaming in directions NW, N, NE are packed.
+    */
+   void packDirection(
+      {{- [ "PdfField_T * " + src_field.name, "CellInterval & ci",
+            "unsigned char * outBuffer", kernels['packDirection'].kernel_selection_parameters,
+             ["gpuStream_t stream = nullptr"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const;
+
+   /**
+    * Unpacks only those populations streaming in directions aligned with the sending direction dir to the given
+    * cell interval.
+    * For example, in 2D, if dir == N, the pdfs streaming in directions NW, N, NE are unpacked.
+    */
+   void unpackDirection(
+      {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
+            "unsigned char * inBuffer", kernels['unpackDirection'].kernel_selection_parameters,
+             ["gpuStream_t stream = nullptr"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const;
+
+   /**
+    * Copies data between two blocks on the same process.
+    * PDFs streaming aligned with the direction dir are copied from the sending interval
+    * onto the receiving interval.
+    */
+   void localCopyDirection(
+      {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
+            "PdfField_T * " + dst_field.name, "CellInterval & dstInterval",
+            kernels['localCopyDirection'].kernel_selection_parameters,
+             ["gpuStream_t stream = nullptr"] if is_gpu else []]
+          | type_identifier_list -}}
+   ) const;
+
+   /**
+    * Returns the number of bytes that will be packed from / unpacked to the cell interval
+    * when using packDirection / unpackDirection
+    * @param ci  The cell interval
+    * @param dir The communication direction
+    * @return    The required size of the buffer, in bytes
+    */
+   uint_t size (CellInterval & ci, stencil::Direction dir) const {
+      return ci.numCells() * sizes[dir] * sizeof(value_type);
+   }
+
+   /**
+    * Returns the number of bytes that will be packed from / unpacked to the cell interval
+    * when using packAll / unpackAll
+    * @param ci  The cell interval
+    * @return    The required size of the buffer, in bytes
+    */
+   uint_t size (CellInterval & ci) const {
+      return ci.numCells() * {{stencil_size}} * sizeof(value_type);
+   }
+
+   {% block AdditionalPublicDeclarations %}
+   {% endblock %}
+
+ private:
+   const uint_t sizes[{{direction_sizes|length}}] { {{ direction_sizes | join(', ') }} };
+};
+
+}  // namespace {{namespace}}
+}  // namespace walberla
diff --git a/python/lbmpy_walberla/utility.py b/python/lbmpy_walberla/utility.py
new file mode 100644
index 0000000000000000000000000000000000000000..1289c381e7b50ac7e83d34fca887e6d659959b92
--- /dev/null
+++ b/python/lbmpy_walberla/utility.py
@@ -0,0 +1,11 @@
+from lbmpy.advanced_streaming import Timestep
+
+
+def timestep_suffix(timestep: Timestep):
+    """ get the suffix as string for a timestep
+
+    :param timestep: instance of class lbmpy.advanced_streaming.Timestep
+    :return: either "even", "odd" or an empty string
+    """
+    return ("_" + str(timestep)) if timestep != Timestep.BOTH else ''
+
diff --git a/python/lbmpy_walberla/walberla_lbm_generation.py b/python/lbmpy_walberla/walberla_lbm_generation.py
index 8566d3915697e28600f54524d7d43e53a98c17b7..e264fb8bbbb8c67040de8c309e40e8b57c0f7053 100644
--- a/python/lbmpy_walberla/walberla_lbm_generation.py
+++ b/python/lbmpy_walberla/walberla_lbm_generation.py
@@ -1,4 +1,6 @@
 # import warnings
+from typing import Callable, List
+
 
 import numpy as np
 import sympy as sp
@@ -18,8 +20,10 @@ from pystencils.node_collection import NodeCollection
 from pystencils.stencil import offset_to_direction_string
 from pystencils.sympyextensions import get_symmetric_part
 from pystencils.typing.transformations import add_types
-from pystencils_walberla.codegen import KernelInfo, config_from_context
+
+from pystencils_walberla.kernel_info import KernelInfo
 from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
+from pystencils_walberla.utility import config_from_context
 
 cpp_printer = CustomSympyPrinter()
 REFINEMENT_SCALE_FACTOR = sp.Symbol("level_scale_factor")
@@ -155,7 +159,7 @@ def __lattice_model(generation_context, class_name, config, lb_method, stream_co
     generation_context.write_file(f"{class_name}.cpp", source)
 
 
-def generate_lattice_model(generation_context, class_name, collision_rule, field_layout='zyxf', refinement_scaling=None,
+def generate_lattice_model(generation_context, class_name, collision_rule, field_layout='fzyx', refinement_scaling=None,
                            target=Target.CPU, data_type=None, cpu_openmp=None, cpu_vectorize_info=None,
                            **create_kernel_params):
 
diff --git a/python/lbmpy_walberla/walberla_lbm_package.py b/python/lbmpy_walberla/walberla_lbm_package.py
new file mode 100644
index 0000000000000000000000000000000000000000..e21d6c9613a1c2be87e21cbc06a2a78212f72552
--- /dev/null
+++ b/python/lbmpy_walberla/walberla_lbm_package.py
@@ -0,0 +1,53 @@
+from typing import Callable, List, Dict
+
+from pystencils import Target, Field
+
+from lbmpy.creationfunctions import LbmCollisionRule, LBMConfig, LBMOptimisation
+from lbmpy.relaxationrates import get_shear_relaxation_rate
+
+from pystencils_walberla.cmake_integration import CodeGenerationContext
+
+from lbmpy_walberla.boundary_collection import generate_boundary_collection
+from lbmpy_walberla.storage_specification import generate_lbm_storage_specification
+from lbmpy_walberla.sweep_collection import generate_lbm_sweep_collection, RefinementScaling
+
+
+def generate_lbm_package(ctx: CodeGenerationContext, name: str,
+                         collision_rule: LbmCollisionRule,
+                         lbm_config: LBMConfig, lbm_optimisation: LBMOptimisation,
+                         nonuniform: bool = False, boundaries: List[Callable] = None,
+                         macroscopic_fields: Dict[str, Field] = None,
+                         target: Target = Target.CPU, data_type=None, cpu_openmp=None, cpu_vectorize_info=None,
+                         max_threads=None,
+                         **kernel_parameters):
+
+    if macroscopic_fields is None:
+        macroscopic_fields = {}
+
+    method = collision_rule.method
+
+    storage_spec_name = f'{name}StorageSpecification'
+    generate_lbm_storage_specification(ctx, storage_spec_name, method, lbm_config,
+                                       nonuniform=nonuniform, target=target, data_type=data_type)
+
+    if nonuniform:
+        omega = get_shear_relaxation_rate(method)
+        refinement_scaling = RefinementScaling()
+        refinement_scaling.add_standard_relaxation_rate_scaling(omega)
+    else:
+        refinement_scaling = None
+
+    streaming_pattern = lbm_config.streaming_pattern
+    generate_lbm_sweep_collection(ctx, f'{name}SweepCollection', collision_rule,
+                                  streaming_pattern=streaming_pattern,
+                                  field_layout=lbm_optimisation.field_layout,
+                                  refinement_scaling=refinement_scaling,
+                                  macroscopic_fields=macroscopic_fields,
+                                  target=target, data_type=data_type,
+                                  cpu_openmp=cpu_openmp, cpu_vectorize_info=cpu_vectorize_info,
+                                  max_threads=max_threads,
+                                  **kernel_parameters)
+
+    generate_boundary_collection(ctx, f'{name}BoundaryCollection', boundary_generators=boundaries,
+                                 lb_method=method, streaming_pattern=streaming_pattern,
+                                 target=target, layout=lbm_optimisation.field_layout)
diff --git a/python/pystencils_walberla/__init__.py b/python/pystencils_walberla/__init__.py
index 0ea2d02cb4b93fc880f0addc38058e0363e39c8c..f78f7fcf244e7fd140cd2abcc93ebaebaea2f94f 100644
--- a/python/pystencils_walberla/__init__.py
+++ b/python/pystencils_walberla/__init__.py
@@ -1,13 +1,17 @@
 from .boundary import generate_staggered_boundary, generate_staggered_flux_boundary
-from .cmake_integration import CodeGeneration
-from .codegen import (
-    generate_pack_info, generate_pack_info_for_field, generate_pack_info_from_kernel,
-    generate_mpidtype_info_from_kernel, generate_sweep, get_vectorize_instruction_set, generate_selective_sweep,
-    config_from_context)
-from .utility import generate_info_header
+from .cmake_integration import CodeGeneration, ManualCodeGenerationContext
 
-__all__ = ['CodeGeneration',
-           'generate_sweep', 'generate_pack_info_from_kernel', 'generate_pack_info_for_field', 'generate_pack_info',
-           'generate_mpidtype_info_from_kernel', 'generate_staggered_boundary', 'generate_staggered_flux_boundary',
-           'get_vectorize_instruction_set', 'generate_selective_sweep', 'config_from_context',
-           'generate_info_header']
+from .function_generator import function_generator
+from .kernel_info import KernelInfo
+from .sweep import generate_sweep, generate_selective_sweep, generate_sweep_collection
+from .pack_info import (generate_pack_info, generate_pack_info_for_field,
+                        generate_pack_info_from_kernel, generate_mpidtype_info_from_kernel)
+from .utility import generate_info_header, get_vectorize_instruction_set, config_from_context
+
+__all__ = ['generate_staggered_boundary', 'generate_staggered_flux_boundary',
+           'CodeGeneration', 'ManualCodeGenerationContext',
+           'function_generator',
+           'generate_sweep', 'generate_selective_sweep', 'generate_sweep_collection',
+           'generate_pack_info', 'generate_pack_info_for_field', 'generate_pack_info_from_kernel',
+           'generate_mpidtype_info_from_kernel',
+           'generate_info_header', 'get_vectorize_instruction_set', 'config_from_context']
diff --git a/python/pystencils_walberla/boundary.py b/python/pystencils_walberla/boundary.py
index 4fc9cf6e517d9b513511530eb05b5dab9eb10edd..c5a5e54c1d00d9d6e476306453eae4320b6f5aa8 100644
--- a/python/pystencils_walberla/boundary.py
+++ b/python/pystencils_walberla/boundary.py
@@ -2,13 +2,10 @@ import numpy as np
 from jinja2 import Environment, PackageLoader, StrictUndefined
 from pystencils import Field, FieldType, Target
 from pystencils.boundaries.boundaryhandling import create_boundary_kernel
-from pystencils.boundaries.createindexlist import (
-    boundary_index_array_coordinate_names, direction_member_name,
-    numpy_data_type_for_boundary_object)
+from pystencils.boundaries.createindexlist import numpy_data_type_for_boundary_object
 from pystencils.typing import TypedSymbol, create_type
-from pystencils.stencil import inverse_direction
 
-from pystencils_walberla.codegen import config_from_context
+from pystencils_walberla.utility import config_from_context, struct_from_numpy_dtype
 from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
 from pystencils_walberla.additional_data_handler import AdditionalDataHandler
 from pystencils_walberla.kernel_selection import (
@@ -32,6 +29,7 @@ def generate_boundary(generation_context,
                       interface_mappings=(),
                       generate_functor=True,
                       layout='fzyx',
+                      field_timestep=None,
                       **create_kernel_params):
 
     if boundary_object.additional_data and additional_data_handler is None:
@@ -75,8 +73,9 @@ def generate_boundary(generation_context,
     else:
         raise ValueError(f"kernel_creation_function returned wrong type: {kernel.__class__}")
 
-    kernel_family = KernelFamily(selection_tree, class_name)
-    interface_spec = HighLevelInterfaceSpec(kernel_family.kernel_selection_parameters, interface_mappings)
+    kernel_family = KernelFamily(selection_tree, class_name, field_timestep=field_timestep)
+    selection_parameters = kernel_family.kernel_selection_parameters if field_timestep is None else []
+    interface_spec = HighLevelInterfaceSpec(selection_parameters, interface_mappings)
 
     if additional_data_handler is None:
         additional_data_handler = AdditionalDataHandler(stencil=neighbor_stencil)
@@ -97,8 +96,9 @@ def generate_boundary(generation_context,
         'inner_or_boundary': boundary_object.inner_or_boundary,
         'single_link': boundary_object.single_link,
         'additional_data_handler': additional_data_handler,
-        'dtype': "float" if is_float else "double",
-        'layout': layout
+        'dtype': "double" if is_float else "float",
+        'layout': layout,
+        'index_shape': index_shape
     }
 
     env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined)
@@ -111,6 +111,8 @@ def generate_boundary(generation_context,
     generation_context.write_file(f"{class_name}.h", header)
     generation_context.write_file(f"{class_name}.{source_extension}", source)
 
+    return context
+
 
 def generate_staggered_boundary(generation_context, class_name, boundary_object,
                                 dim, neighbor_stencil, index_shape, target=Target.CPU, **kwargs):
@@ -126,28 +128,3 @@ def generate_staggered_flux_boundary(generation_context, class_name, boundary_ob
                       FieldType.STAGGERED_FLUX, target=target, **kwargs)
 
 
-def struct_from_numpy_dtype(struct_name, numpy_dtype):
-    result = f"struct {struct_name} {{ \n"
-
-    equality_compare = []
-    constructor_params = []
-    constructor_initializer_list = []
-    for name, (sub_type, offset) in numpy_dtype.fields.items():
-        pystencils_type = create_type(sub_type)
-        result += f"    {pystencils_type} {name};\n"
-        if name in boundary_index_array_coordinate_names or name == direction_member_name:
-            constructor_params.append(f"{pystencils_type} {name}_")
-            constructor_initializer_list.append(f"{name}({name}_)")
-        else:
-            constructor_initializer_list.append(f"{name}()")
-        if pystencils_type.is_float():
-            equality_compare.append(f"floatIsEqual({name}, o.{name})")
-        else:
-            equality_compare.append(f"{name} == o.{name}")
-
-    result += "    %s(%s) : %s {}\n" % \
-              (struct_name, ", ".join(constructor_params), ", ".join(constructor_initializer_list))
-    result += "    bool operator==(const %s & o) const {\n        return %s;\n    }\n" % \
-              (struct_name, " && ".join(equality_compare))
-    result += "};\n"
-    return result
diff --git a/python/pystencils_walberla/cmake_integration.py b/python/pystencils_walberla/cmake_integration.py
index 932e5ce69dbc8309c8000b53e2fd9a34b21e2f4a..4d5654c08a1474b53852f643c2cf4249a12901db 100644
--- a/python/pystencils_walberla/cmake_integration.py
+++ b/python/pystencils_walberla/cmake_integration.py
@@ -105,6 +105,12 @@ class ManualCodeGenerationContext:
     def write_file(self, name, content):
         self.files[name] = content
 
+    def write_all_files(self):
+        for name, content in self.files.items():
+            with open(name, 'w') as f:
+                f.write(content)
+        self.files = dict()
+
     def __enter__(self):
         return self
 
diff --git a/python/pystencils_walberla/codegen.py b/python/pystencils_walberla/codegen.py
index 9e6ada3b86c6d57757d5ca814a481b41a571d0c5..ac475f72c9489d9e7b74ce25d9bf303413ae7834 100644
--- a/python/pystencils_walberla/codegen.py
+++ b/python/pystencils_walberla/codegen.py
@@ -124,7 +124,7 @@ def generate_selective_sweep(generation_context, class_name, selection_tree, int
     elif target != kernel_family.get_ast_attr('target'):
         raise ValueError('Mismatch between target parameter and AST targets.')
 
-    if not (generation_context.cuda or generation_context.hip) and target == Target.GPU:
+    if not generation_context.gpu and target == Target.GPU:
         return
 
     representative_field = {p.field_name for p in kernel_family.parameters if p.is_field_parameter}
diff --git a/python/pystencils_walberla/function_generator.py b/python/pystencils_walberla/function_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c7b60803c59d788ce615ceeeb314b5ece0acbe1
--- /dev/null
+++ b/python/pystencils_walberla/function_generator.py
@@ -0,0 +1,77 @@
+from typing import Sequence, Union
+
+
+from pystencils import Target, Assignment, AssignmentCollection
+from pystencils import create_kernel, create_staggered_kernel
+
+from pystencils_walberla.cmake_integration import CodeGenerationContext
+from pystencils_walberla.kernel_selection import KernelCallNode, KernelFamily, HighLevelInterfaceSpec
+from pystencils_walberla.utility import config_from_context
+
+
+def function_generator(ctx: CodeGenerationContext, class_name: str,
+                       assignments: Union[Sequence[Assignment], AssignmentCollection],
+                       namespace: str = 'pystencils', staggered=False, field_swaps=None, varying_parameters=(),
+                       ghost_layers_to_include=0,
+                       target=Target.CPU, data_type=None, cpu_openmp=None, cpu_vectorize_info=None,
+                       max_threads=None,
+                       **create_kernel_params):
+    return lambda: __function_generator(ctx, class_name, assignments,
+                                        namespace, staggered, field_swaps, varying_parameters,
+                                        ghost_layers_to_include,
+                                        target, data_type, cpu_openmp, cpu_vectorize_info, max_threads,
+                                        **create_kernel_params)
+
+
+def __function_generator(ctx: CodeGenerationContext, class_name: str,
+                         assignments: Union[Sequence[Assignment], AssignmentCollection],
+                         namespace: str = 'pystencils', staggered=False, field_swaps=None, varying_parameters=(),
+                         ghost_layers_to_include=0,
+                         target=Target.CPU, data_type=None, cpu_openmp=None, cpu_vectorize_info=None,
+                         max_threads=None,
+                         **create_kernel_params):
+    if staggered:
+        assert 'omp_single_loop' not in create_kernel_params
+
+    create_kernel_params['omp_single_loop'] = False
+    config = config_from_context(ctx, target=target, data_type=data_type, cpu_openmp=cpu_openmp,
+                                 cpu_vectorize_info=cpu_vectorize_info, **create_kernel_params)
+
+    if not staggered:
+        ast = create_kernel(assignments, config=config)
+    else:
+        # This should not be necessary but create_staggered_kernel does not take a config at the moment ...
+        ast = create_staggered_kernel(assignments, **config.__dict__)
+
+    ast.function_name = class_name.lower()
+
+    all_field_names = [f.name for f in ast.fields_accessed]
+    all_field_names.sort()
+
+    temporary_fields = [f for f in all_field_names if "_tmp" in f]
+
+    if field_swaps is None:
+        field_swaps = []
+        for field_name in all_field_names:
+            if field_name + "_tmp" in temporary_fields:
+                field_swaps.append((field_name, field_name + "_tmp"))
+
+    selection_tree = KernelCallNode(ast)
+    kernel_family = KernelFamily(selection_tree, class_name,
+                                 temporary_fields, field_swaps, varying_parameters)
+
+    representative_field = {p.field_name for p in kernel_family.parameters if p.is_field_parameter}
+    representative_field = sorted(representative_field)[0]
+
+    interface_spec = HighLevelInterfaceSpec(kernel_family.kernel_selection_parameters, ())
+
+    jinja_context = {
+        'kernel': kernel_family,
+        'namespace': namespace,
+        'function_name': class_name,
+        'field': representative_field,
+        'ghost_layers_to_include': ghost_layers_to_include,
+        'interface_spec': interface_spec,
+        'max_threads': max_threads
+    }
+    return jinja_context
diff --git a/python/pystencils_walberla/jinja_filters.py b/python/pystencils_walberla/jinja_filters.py
index 61ca6f12ac1eabfc6c1be09aad836b424c34aaee..b2413bcefe8c4a4468e4971c1b8901116519c57d 100644
--- a/python/pystencils_walberla/jinja_filters.py
+++ b/python/pystencils_walberla/jinja_filters.py
@@ -4,6 +4,7 @@ try:
 except ImportError:
     from jinja2 import contextfilter as jinja2_context_decorator
 
+from collections.abc import Iterable
 import sympy as sp
 
 from pystencils import Target, Backend
@@ -43,6 +44,18 @@ delete_loop = """
     }}
 """
 
+standard_parameter_registration = """
+for (uint_t level = 0; level < blocks->getNumberOfLevels(); level++)
+{{
+    const {dtype} level_scale_factor = {dtype}(uint_t(1) << level);
+    const {dtype} one                = {dtype}(1.0);
+    const {dtype} half               = {dtype}(0.5);
+    
+    {name}Vector.push_back( {dtype}({name} / (level_scale_factor * (-{name} * half + one) + {name} * half)) );
+}}
+"""
+
+
 # the target will enter the jinja filters as string. The reason for that is, that is not easy to work with the
 # enum in the template files.
 def translate_target(target):
@@ -61,6 +74,12 @@ def make_field_type(dtype, f_size, is_gpu):
         return f"field::GhostLayerField<{dtype}, {f_size}>"
 
 
+def field_type(field, is_gpu=False):
+    dtype = get_base_type(field.dtype)
+    f_size = get_field_fsize(field)
+    return make_field_type(dtype, f_size, is_gpu)
+
+
 def get_field_fsize(field):
     """Determines the size of the index coordinate. Since walberla fields only support one index dimension,
     pystencils fields with multiple index dimensions are linearized to a single index dimension.
@@ -147,35 +166,30 @@ def field_extraction_code(field, is_temporary, declaration_only=False,
         is_gpu: if the field is a GhostLayerField or a GpuField
         update_member: specify if function is used inside a constructor; add _ to members
     """
-    # Determine size of f coordinate which is a template parameter
-    f_size = get_field_fsize(field)
-    field_name = field.name
-    dtype = get_base_type(field.dtype)
-    field_type = make_field_type(dtype, f_size, is_gpu)
+    wlb_field_type = field_type(field, is_gpu)
 
     if not is_temporary:
-        dtype = get_base_type(field.dtype)
-        field_type = make_field_type(dtype, f_size, is_gpu)
         if declaration_only:
-            return f"{field_type} * {field_name}_;"
+            return f"{wlb_field_type} * {field.name}_;"
         else:
             prefix = "" if no_declaration else "auto "
             if update_member:
-                return f"{prefix}{field_name}_ = block->getData< {field_type} >({field_name}ID);"
+                return f"{prefix}{field.name}_ = block->getData< {wlb_field_type} >({field.name}ID);"
             else:
-                return f"{prefix}{field_name} = block->getData< {field_type} >({field_name}ID);"
+                return f"{prefix}{field.name} = block->getData< {wlb_field_type} >({field.name}ID);"
     else:
-        assert field_name.endswith('_tmp')
-        original_field_name = field_name[:-len('_tmp')]
+        assert field.name.endswith('_tmp')
+        original_field_name = field.name[:-len('_tmp')]
         if declaration_only:
-            return f"{field_type} * {field_name}_;"
+            return f"{wlb_field_type} * {field.name}_;"
         else:
-            declaration = f"{field_type} * {field_name};"
+            declaration = f"{wlb_field_type} * {field.name};"
             tmp_field_str = temporary_fieldTemplate.format(original_field_name=original_field_name,
-                                                           tmp_field_name=field_name, type=field_type)
+                                                           tmp_field_name=field.name, type=wlb_field_type)
             return tmp_field_str if no_declaration else declaration + tmp_field_str
 
 
+# TODO fields are not sorted
 @jinja2_context_decorator
 def generate_block_data_to_field_extraction(ctx, kernel_info, parameters_to_ignore=(), parameters=None,
                                             declarations_only=False, no_declarations=False, update_member=False):
@@ -211,11 +225,22 @@ def generate_block_data_to_field_extraction(ctx, kernel_info, parameters_to_igno
     return result
 
 
-def generate_refs_for_kernel_parameters(kernel_info, prefix, parameters_to_ignore=(), ignore_fields=False):
+def generate_refs_for_kernel_parameters(kernel_info, prefix, parameters_to_ignore=(), ignore_fields=False,
+                                        parameter_registration=None):
     symbols = {p.field_name for p in kernel_info.parameters if p.is_field_pointer and not ignore_fields}
     symbols.update(p.symbol.name for p in kernel_info.parameters if not p.is_field_parameter)
     symbols.difference_update(parameters_to_ignore)
-    return "\n".join("auto & %s = %s%s_;" % (s, prefix, s) for s in symbols)
+    type_information = {p.symbol.name: p.symbol.dtype for p in kernel_info.parameters if not p.is_field_parameter}
+    result = []
+    registered_parameters = [] if not parameter_registration else parameter_registration.scaling_info
+    for s in symbols:
+        if s in registered_parameters:
+            dtype = type_information[s].c_name
+            result.append("const uint_t level = block->getBlockStorage().getLevel(*block);")
+            result.append(f"{dtype} & {s} = {s}Vector[level];")
+        else:
+            result.append(f"auto & {s} = {prefix}{s}_;")
+    return "\n".join(result)
 
 
 @jinja2_context_decorator
@@ -235,7 +260,7 @@ def generate_call(ctx, kernel, ghost_layers_to_include=0, cell_interval=None, st
                        that defines the inner region for the kernel to loop over. Parameter has to be left to default
                        if ghost_layers_to_include is specified.
         stream: optional name of gpu stream variable
-        spatial_shape_symbols: relevant only for gpu kernels - to determine CUDA block and grid sizes the iteration
+        spatial_shape_symbols: relevant only for gpu kernels - to determine GPU block and grid sizes the iteration
                                region (i.e. field shape) has to be known. This can normally be inferred by the kernel
                                parameters - however in special cases like boundary conditions a manual specification
                                may be necessary.
@@ -260,33 +285,38 @@ def generate_call(ctx, kernel, ghost_layers_to_include=0, cell_interval=None, st
         required_ghost_layers = 0
     else:
         # ghost layer info is ((x_gl_front, x_gl_end), (y_gl_front, y_gl_end).. )
-        required_ghost_layers = max(max(kernel_ghost_layers))
+        if isinstance(kernel_ghost_layers, int):
+            required_ghost_layers = kernel_ghost_layers
+        else:
+            required_ghost_layers = max(max(kernel_ghost_layers))
 
     kernel_call_lines = []
 
+    def get_cell_interval(field_object):
+        if isinstance(cell_interval, str):
+            return cell_interval
+        elif isinstance(cell_interval, dict):
+            return cell_interval[field_object]
+        else:
+            return None
+
     def get_start_coordinates(field_object):
-        if cell_interval is None:
+        ci = get_cell_interval(field_object)
+        if ci is None:
             return [-ghost_layers_to_include - required_ghost_layers] * field_object.spatial_dimensions
         else:
             assert ghost_layers_to_include == 0
-            if field_object.spatial_dimensions == 3:
-                return [sp.Symbol("{ci}.{coord}Min()".format(coord=coord_name, ci=cell_interval)) - required_ghost_layers
-                        for coord_name in ('x', 'y', 'z')]
-            elif field_object.spatial_dimensions == 2:
-                return [sp.Symbol("{ci}.{coord}Min()".format(coord=coord_name, ci=cell_interval)) - required_ghost_layers
-                        for coord_name in ('x', 'y')]
-            else:
-                raise NotImplementedError(f"Only 2D and 3D fields are supported but a field with "
-                                          f"{field_object.spatial_dimensions} dimensions was passed")
+            return [sp.Symbol(f"{ci}.{coord_name}Min()") - required_ghost_layers for coord_name in ('x', 'y', 'z')]
 
     def get_end_coordinates(field_object):
-        if cell_interval is None:
+        ci = get_cell_interval(field_object)
+        if ci is None:
             shape_names = ['xSize()', 'ySize()', 'zSize()'][:field_object.spatial_dimensions]
             offset = 2 * ghost_layers_to_include + 2 * required_ghost_layers
-            return [f"cell_idx_c({field_object.name}->{e}) + {offset}" for e in shape_names]
+            return [f"int64_c({field_object.name}->{e}) + {offset}" for e in shape_names]
         else:
             assert ghost_layers_to_include == 0
-            return [f"cell_idx_c({cell_interval}.{coord_name}Size()) + {2 * required_ghost_layers}"
+            return [f"int64_c({ci}.{coord_name}Size()) + {2 * required_ghost_layers}"
                     for coord_name in ('x', 'y', 'z')]
 
     for param in ast_params:
@@ -347,6 +377,38 @@ def generate_call(ctx, kernel, ghost_layers_to_include=0, cell_interval=None, st
     return "\n".join(kernel_call_lines)
 
 
+@jinja2_context_decorator
+def generate_function_collection_call(ctx, kernel_info, parameters_to_ignore=(), cell_interval=None, ghost_layers=None):
+    target = translate_target(ctx['target'])
+    is_gpu = target == Target.GPU
+
+    parameters = []
+    for param in kernel_info.parameters:
+        if param.is_field_pointer and param.field_name not in parameters_to_ignore:
+            parameters.append(param.field_name)
+
+    for param in kernel_info.parameters:
+        if not param.is_field_parameter and param.symbol.name not in parameters_to_ignore:
+            parameters.append(param.symbol.name)
+
+    # TODO due to backward compatibility with high level interface spec
+    for parameter in kernel_info.kernel_selection_tree.get_selection_parameter_list():
+        if parameter.name not in parameters_to_ignore:
+            parameters.append(parameter.name)
+
+    if cell_interval:
+        assert ghost_layers is None, "If a cell interval is specified ghost layers can not be specified"
+        parameters.append(cell_interval)
+
+    if ghost_layers:
+        parameters.append(ghost_layers)
+
+    if is_gpu and "gpuStream" not in parameters_to_ignore:
+        parameters.append(f"gpuStream")
+
+    return ", ".join(parameters)
+
+
 def generate_swaps(kernel_info):
     """Generates code to swap main fields with temporary fields"""
     swaps = ""
@@ -355,119 +417,229 @@ def generate_swaps(kernel_info):
     return swaps
 
 
-# TODO: basically 3 times the same code :(
-def generate_constructor_initializer_list(kernel_info, parameters_to_ignore=None):
-    if parameters_to_ignore is None:
-        parameters_to_ignore = []
+def generate_timestep_advancements(kernel_info, advance=True):
+    """Generates code to detect even or odd timestep"""
+    if kernel_info.field_timestep:
+        field_name = kernel_info.field_timestep["field_name"]
+        advancement_function = kernel_info.field_timestep["function"]
+        if advancement_function == "advanceTimestep" and advance is False:
+            advancement_function = "getTimestepPlusOne"
+        return f"uint8_t timestep = {field_name}->{advancement_function}();"
+    return ""
 
-    varying_parameter_names = []
-    if hasattr(kernel_info, 'varying_parameters'):
-        varying_parameter_names = tuple(e[1] for e in kernel_info.varying_parameters)
-    parameters_to_ignore += kernel_info.temporary_fields + varying_parameter_names
+
+def generate_constructor_initializer_list(kernel_infos, parameters_to_ignore=None, parameter_registration=None):
+    if not isinstance(kernel_infos, Iterable):
+        kernel_infos = [kernel_infos]
+
+    parameters_to_skip = []
+    if parameters_to_ignore is not None:
+        parameters_to_skip = [p for p in parameters_to_ignore]
+
+    for kernel_info in kernel_infos:
+        parameters_to_skip += kernel_info.temporary_fields
 
     parameter_initializer_list = []
     # First field pointer
-    for param in kernel_info.parameters:
-        if param.is_field_pointer and param.field_name not in parameters_to_ignore:
-            parameter_initializer_list.append(f"{param.field_name}ID({param.field_name}ID_)")
+    for kernel_info in kernel_infos:
+        for param in kernel_info.parameters:
+            if param.is_field_pointer and param.field_name not in parameters_to_skip:
+                parameter_initializer_list.append(f"{param.field_name}ID({param.field_name}ID_)")
+                parameters_to_skip.append(param.field_name)
 
     # Then free parameters
-    for param in kernel_info.parameters:
-        if not param.is_field_parameter and param.symbol.name not in parameters_to_ignore:
-            parameter_initializer_list.append(f"{param.symbol.name}_({param.symbol.name})")
+    if parameter_registration is not None:
+        parameters_to_skip.extend(parameter_registration.scaling_info)
+
+    for kernel_info in kernel_infos:
+        for param in kernel_info.parameters:
+            if not param.is_field_parameter and param.symbol.name not in parameters_to_skip:
+                parameter_initializer_list.append(f"{param.symbol.name}_({param.symbol.name})")
+                parameters_to_skip.append(param.symbol.name)
 
     return ", ".join(parameter_initializer_list)
 
 
-def generate_constructor_parameters(kernel_info, parameters_to_ignore=None):
-    if parameters_to_ignore is None:
-        parameters_to_ignore = []
+# TODO check varying_parameters
+def generate_constructor_parameters(kernel_infos, parameters_to_ignore=None):
+    if not isinstance(kernel_infos, Iterable):
+        kernel_infos = [kernel_infos]
+
+    parameters_to_skip = []
+    if parameters_to_ignore is not None:
+        parameters_to_skip = [p for p in parameters_to_ignore]
 
     varying_parameters = []
-    if hasattr(kernel_info, 'varying_parameters'):
-        varying_parameters = kernel_info.varying_parameters
-    varying_parameter_names = tuple(e[1] for e in varying_parameters)
-    parameters_to_ignore += kernel_info.temporary_fields + varying_parameter_names
+    for kernel_info in kernel_infos:
+        if hasattr(kernel_info, 'varying_parameters'):
+            varying_parameters = kernel_info.varying_parameters
+        varying_parameter_names = tuple(e[1] for e in varying_parameters)
+        parameters_to_skip += kernel_info.temporary_fields + varying_parameter_names
 
     parameter_list = []
     # First field pointer
-    for param in kernel_info.parameters:
-        if param.is_field_pointer and param.field_name not in parameters_to_ignore:
-            parameter_list.append(f"BlockDataID {param.field_name}ID_")
+    for kernel_info in kernel_infos:
+        for param in kernel_info.parameters:
+            if param.is_field_pointer and param.field_name not in parameters_to_skip:
+                parameter_list.append(f"BlockDataID {param.field_name}ID_")
+                parameters_to_skip.append(param.field_name)
 
     # Then free parameters
-    for param in kernel_info.parameters:
-        if not param.is_field_parameter and param.symbol.name not in parameters_to_ignore:
-            parameter_list.append(f"{param.symbol.dtype} {param.symbol.name}")
+    for kernel_info in kernel_infos:
+        for param in kernel_info.parameters:
+            if not param.is_field_parameter and param.symbol.name not in parameters_to_skip:
+                parameter_list.append(f"{param.symbol.dtype} {param.symbol.name}")
+                parameters_to_skip.append(param.symbol.name)
 
     varying_parameters = ["%s %s" % e for e in varying_parameters]
     return ", ".join(parameter_list + varying_parameters)
 
 
 def generate_constructor_call_arguments(kernel_info, parameters_to_ignore=None):
-    if parameters_to_ignore is None:
-        parameters_to_ignore = []
+    parameters_to_skip = []
+    if parameters_to_ignore is not None:
+        parameters_to_skip = [p for p in parameters_to_ignore]
 
     varying_parameters = []
     if hasattr(kernel_info, 'varying_parameters'):
         varying_parameters = kernel_info.varying_parameters
     varying_parameter_names = tuple(e[1] for e in varying_parameters)
-    parameters_to_ignore += kernel_info.temporary_fields + varying_parameter_names
+    parameters_to_skip += kernel_info.temporary_fields + varying_parameter_names
 
     parameter_list = []
     for param in kernel_info.parameters:
-        if param.is_field_pointer and param.field_name not in parameters_to_ignore:
+        if param.is_field_pointer and param.field_name not in parameters_to_skip:
             parameter_list.append(f"{param.field_name}ID")
-        elif not param.is_field_parameter and param.symbol.name not in parameters_to_ignore:
+        elif not param.is_field_parameter and param.symbol.name not in parameters_to_skip:
             parameter_list.append(f'{param.symbol.name}_')
     varying_parameters = [f"{e}_" for e in varying_parameter_names]
     return ", ".join(parameter_list + varying_parameters)
 
 
 @jinja2_context_decorator
-def generate_members(ctx, kernel_info, parameters_to_ignore=(), only_fields=False):
-    fields = {f.name: f for f in kernel_info.fields_accessed}
+def generate_members(ctx, kernel_infos, parameters_to_ignore=None, only_fields=False, parameter_registration=None):
+    if not isinstance(kernel_infos, Iterable):
+        kernel_infos = [kernel_infos]
+
+    if parameters_to_ignore is None:
+        parameters_to_ignore = []
+
+    params_to_skip = [p for p in parameters_to_ignore]
+
+    fields = dict()
+    for kernel_info in kernel_infos:
+        for field in kernel_info.fields_accessed:
+            fields[field.name] = field
+
+    varying_parameters = []
+    for kernel_info in kernel_infos:
+        if hasattr(kernel_info, 'varying_parameters'):
+            varying_parameters = kernel_info.varying_parameters
+        varying_parameter_names = tuple(e[1] for e in varying_parameters)
+        params_to_skip += kernel_info.temporary_fields
+        params_to_skip += varying_parameter_names
 
-    params_to_skip = tuple(parameters_to_ignore) + tuple(kernel_info.temporary_fields)
-    params_to_skip += tuple(e[1] for e in kernel_info.varying_parameters)
     target = translate_target(ctx['target'])
     is_gpu = target == Target.GPU
 
     result = []
-    for param in kernel_info.parameters:
-        if only_fields and not param.is_field_parameter:
-            continue
-        if param.is_field_pointer and param.field_name not in params_to_skip:
-            result.append(f"BlockDataID {param.field_name}ID;")
+    for kernel_info in kernel_infos:
+        for param in kernel_info.parameters:
+            if only_fields and not param.is_field_parameter:
+                continue
+            if param.is_field_pointer and param.field_name not in params_to_skip:
+                result.append(f"BlockDataID {param.field_name}ID;")
+                params_to_skip.append(param.field_name)
+
+    for kernel_info in kernel_infos:
+        for param in kernel_info.parameters:
+            if only_fields and not param.is_field_parameter:
+                continue
+            if not param.is_field_parameter and param.symbol.name not in params_to_skip:
+                if parameter_registration and param.symbol.name in parameter_registration.scaling_info:
+                    result.append(f"std::vector<{param.symbol.dtype}> {param.symbol.name}Vector;")
+                else:
+                    result.append(f"{param.symbol.dtype} {param.symbol.name}_;")
+                params_to_skip.append(param.symbol.name)
+
+    for kernel_info in kernel_infos:
+        for field_name in kernel_info.temporary_fields:
+            f = fields[field_name]
+            if field_name in parameters_to_ignore:
+                continue
+            parameters_to_ignore.append(field_name)
+            assert field_name.endswith('_tmp')
+            original_field_name = field_name[:-len('_tmp')]
+            f_size = get_field_fsize(f)
+            field_type = make_field_type(get_base_type(f.dtype), f_size, is_gpu)
+            result.append(temporary_fieldMemberTemplate.format(type=field_type, original_field_name=original_field_name))
+
+    for kernel_info in kernel_infos:
+        if hasattr(kernel_info, 'varying_parameters'):
+            result.extend(["%s %s_;" % e for e in kernel_info.varying_parameters])
 
+    return "\n".join(result)
+
+
+@jinja2_context_decorator
+def generate_plain_parameter_list(ctx, kernel_info, cell_interval=None, ghost_layers=None, stream=None):
+    fields = {f.name: f for f in kernel_info.fields_accessed}
+    target = translate_target(ctx['target'])
+    is_gpu = target == Target.GPU
+
+    result = []
     for param in kernel_info.parameters:
-        if only_fields and not param.is_field_parameter:
+        if not param.is_field_parameter:
             continue
-        if not param.is_field_parameter and param.symbol.name not in params_to_skip:
-            result.append(f"{param.symbol.dtype} {param.symbol.name}_;")
+        if param.is_field_pointer and param.field_name:
+            f = fields[param.field_name]
+            f_size = get_field_fsize(f)
+            field_type = make_field_type(get_base_type(f.dtype), f_size, is_gpu)
+            result.append(f"{field_type} * {param.field_name}")
 
-    for field_name in kernel_info.temporary_fields:
-        f = fields[field_name]
-        if field_name in parameters_to_ignore:
-            continue
-        assert field_name.endswith('_tmp')
-        original_field_name = field_name[:-len('_tmp')]
-        f_size = get_field_fsize(f)
-        field_type = make_field_type(get_base_type(f.dtype), f_size, is_gpu)
-        result.append(temporary_fieldMemberTemplate.format(type=field_type, original_field_name=original_field_name))
+    for param in kernel_info.parameters:
+        if not param.is_field_parameter and param.symbol.name:
+            result.append(f"{param.symbol.dtype} {param.symbol.name}")
 
     if hasattr(kernel_info, 'varying_parameters'):
         result.extend(["%s %s_;" % e for e in kernel_info.varying_parameters])
 
-    return "\n".join(result)
+    # TODO due to backward compatibility with high level interface spec
+    for parameter in kernel_info.kernel_selection_tree.get_selection_parameter_list():
+        result.append(f"{parameter.dtype} {parameter.name}")
+
+    if cell_interval:
+        result.append(f"const CellInterval & {cell_interval}")
+
+    if ghost_layers is not None:
+        if type(ghost_layers) in (int, ):
+            result.append(f"const cell_idx_t ghost_layers = {ghost_layers}")
+        else:
+            result.append(f"const cell_idx_t ghost_layers")
+
+    if is_gpu:
+        if stream is not None:
+            result.append(f"gpuStream_t stream = {stream}")
+        else:
+            result.append(f"gpuStream_t stream")
+
+    return ", ".join(result)
 
 
-def generate_destructor(kernel_info, class_name):
-    if not kernel_info.temporary_fields:
+def generate_destructor(kernel_infos, class_name):
+    temporary_fields = []
+    if not isinstance(kernel_infos, Iterable):
+        kernel_infos = [kernel_infos]
+    for kernel_info in kernel_infos:
+        for tmp_field in kernel_info.temporary_fields:
+            if tmp_field not in temporary_fields:
+                temporary_fields.append(tmp_field)
+
+    if not temporary_fields:
         return ""
     else:
         contents = ""
-        for field_name in kernel_info.temporary_fields:
+        for field_name in temporary_fields:
             contents += delete_loop.format(original_field_name=field_name[:-len('_tmp')])
         return temporary_constructor.format(contents=contents, class_name=class_name)
 
@@ -502,6 +674,47 @@ def nested_class_method_definition_prefix(ctx, nested_class_name):
         return f"{outer_class}::{nested_class_name}"
 
 
+@jinja2_context_decorator
+def generate_parameter_registration(ctx, kernel_infos, parameter_registration):
+    if parameter_registration is None:
+        return ""
+    if not isinstance(kernel_infos, Iterable):
+        kernel_infos = [kernel_infos]
+
+    params_to_skip = []
+    result = []
+    for kernel_info in kernel_infos:
+        for param in kernel_info.parameters:
+            if not param.is_field_parameter and param.symbol.name not in params_to_skip:
+                if param.symbol.name in parameter_registration.scaling_info:
+                    result.append(standard_parameter_registration.format(dtype=param.symbol.dtype,
+                                                                         name=param.symbol.name))
+                    params_to_skip.append(param.symbol.name)
+
+    return "\n".join(result)
+
+
+@jinja2_context_decorator
+def generate_constructor(ctx, kernel_infos, parameter_registration):
+    if parameter_registration is None:
+        return ""
+    if not isinstance(kernel_infos, Iterable):
+        kernel_infos = [kernel_infos]
+
+    params_to_skip = []
+    result = []
+    for kernel_info in kernel_infos:
+        for param in kernel_info.parameters:
+            if not param.is_field_parameter and param.symbol.name not in params_to_skip:
+                if param.symbol.name in parameter_registration.scaling_info:
+                    name = param.symbol.name
+                    dtype = param.symbol.dtype
+                    result.append(standard_parameter_registration.format(dtype=dtype, name=name))
+                    params_to_skip.append(name)
+
+    return "\n".join(result)
+
+
 def generate_list_of_expressions(expressions, prepend=''):
     if len(expressions) == 0:
         return ''
@@ -518,7 +731,7 @@ def type_identifier_list(nested_arg_list):
 
     def recursive_flatten(arg_list):
         for s in arg_list:
-            if isinstance(s, str):
+            if isinstance(s, str) and len(s) > 0:
                 result.append(s)
             elif isinstance(s, TypedSymbol):
                 result.append(f"{s.dtype} {s.name}")
@@ -555,16 +768,22 @@ def add_pystencils_filters_to_jinja_env(jinja_env):
     jinja_env.filters['generate_definitions'] = generate_definitions
     jinja_env.filters['generate_declarations'] = generate_declarations
     jinja_env.filters['generate_members'] = generate_members
+    jinja_env.filters['generate_plain_parameter_list'] = generate_plain_parameter_list
     jinja_env.filters['generate_constructor_parameters'] = generate_constructor_parameters
     jinja_env.filters['generate_constructor_initializer_list'] = generate_constructor_initializer_list
     jinja_env.filters['generate_constructor_call_arguments'] = generate_constructor_call_arguments
     jinja_env.filters['generate_call'] = generate_call
+    jinja_env.filters['generate_function_collection_call'] = generate_function_collection_call
     jinja_env.filters['generate_block_data_to_field_extraction'] = generate_block_data_to_field_extraction
+    jinja_env.filters['generate_timestep_advancements'] = generate_timestep_advancements
     jinja_env.filters['generate_swaps'] = generate_swaps
     jinja_env.filters['generate_refs_for_kernel_parameters'] = generate_refs_for_kernel_parameters
     jinja_env.filters['generate_destructor'] = generate_destructor
     jinja_env.filters['generate_field_type'] = generate_field_type
     jinja_env.filters['nested_class_method_definition_prefix'] = nested_class_method_definition_prefix
+    jinja_env.filters['generate_parameter_registration'] = generate_parameter_registration
+    jinja_env.filters['generate_constructor'] = generate_constructor
     jinja_env.filters['type_identifier_list'] = type_identifier_list
     jinja_env.filters['identifier_list'] = identifier_list
     jinja_env.filters['list_of_expressions'] = generate_list_of_expressions
+    jinja_env.filters['field_type'] = field_type
diff --git a/python/pystencils_walberla/kernel_info.py b/python/pystencils_walberla/kernel_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..1382d94f4220495da28bf02113636fdf8addbaf1
--- /dev/null
+++ b/python/pystencils_walberla/kernel_info.py
@@ -0,0 +1,67 @@
+from functools import reduce
+
+from pystencils import Target
+
+from pystencils.backends.cbackend import get_headers
+from pystencils.backends.cuda_backend import CudaSympyPrinter
+from pystencils.typing.typed_sympy import SHAPE_DTYPE
+from pystencils.typing import TypedSymbol
+
+from pystencils_walberla.utility import merge_sorted_lists
+
+
+# TODO KernelInfo and KernelFamily should have same interface
+class KernelInfo:
+    def __init__(self, ast, temporary_fields=(), field_swaps=(), varying_parameters=()):
+        self.ast = ast
+        self.temporary_fields = tuple(temporary_fields)
+        self.field_swaps = tuple(field_swaps)
+        self.varying_parameters = tuple(varying_parameters)
+        self.parameters = ast.get_parameters()  # cache parameters here
+
+    @property
+    def fields_accessed(self):
+        return self.ast.fields_accessed
+
+    def get_ast_attr(self, name):
+        """Returns the value of an attribute of the AST managed by this KernelInfo.
+        For compatibility with KernelFamily."""
+        return self.ast.__getattribute__(name)
+
+    def get_headers(self):
+        all_headers = [list(get_headers(self.ast))]
+        return reduce(merge_sorted_lists, all_headers)
+
+    def generate_kernel_invocation_code(self, **kwargs):
+        ast = self.ast
+        ast_params = self.parameters
+        is_cpu = self.ast.target == Target.CPU
+        call_parameters = ", ".join([p.symbol.name for p in ast_params])
+
+        if not is_cpu:
+            stream = kwargs.get('stream', '0')
+            spatial_shape_symbols = kwargs.get('spatial_shape_symbols', ())
+
+            if not spatial_shape_symbols:
+                spatial_shape_symbols = [p.symbol for p in ast_params if p.is_field_shape]
+                spatial_shape_symbols.sort(key=lambda e: e.coordinate)
+            else:
+                spatial_shape_symbols = [TypedSymbol(s, SHAPE_DTYPE) for s in spatial_shape_symbols]
+
+            assert spatial_shape_symbols, "No shape parameters in kernel function arguments.\n"\
+                "Please only use kernels for generic field sizes!"
+
+            indexing_dict = ast.indexing.call_parameters(spatial_shape_symbols)
+            sp_printer_c = CudaSympyPrinter()
+            kernel_call_lines = [
+                "dim3 _block(int(%s), int(%s), int(%s));" % tuple(sp_printer_c.doprint(e)
+                                                                  for e in indexing_dict['block']),
+                "dim3 _grid(int(%s), int(%s), int(%s));" % tuple(sp_printer_c.doprint(e)
+                                                                 for e in indexing_dict['grid']),
+                "internal_%s::%s<<<_grid, _block, 0, %s>>>(%s);" % (ast.function_name, ast.function_name,
+                                                                    stream, call_parameters),
+            ]
+
+            return "\n".join(kernel_call_lines)
+        else:
+            return f"internal_{ast.function_name}::{ast.function_name}({call_parameters});"
diff --git a/python/pystencils_walberla/kernel_selection.py b/python/pystencils_walberla/kernel_selection.py
index c62f441775edc763f4ad41b2e9c218a5b86930d8..c946f85105185159e317ea18d4740667cd7761c7 100644
--- a/python/pystencils_walberla/kernel_selection.py
+++ b/python/pystencils_walberla/kernel_selection.py
@@ -8,6 +8,8 @@ from pystencils.backends.cbackend import get_headers
 from pystencils.backends.cuda_backend import CudaSympyPrinter
 from pystencils.typing.typed_sympy import SHAPE_DTYPE
 
+from pystencils_walberla.utility import merge_lists_of_symbols, merge_sorted_lists
+
 
 """
 
@@ -120,6 +122,41 @@ class AbstractConditionNode(AbstractKernelSelectionNode, ABC):
         return code
 
 
+class SwitchNode(AbstractKernelSelectionNode):
+    def __init__(self, parameter_symbol, cases_dict):
+        self.cases_dict = cases_dict
+        self.parameter_symbol = parameter_symbol
+
+    @property
+    def selection_parameters(self):
+        return {self.parameter_symbol}
+
+    def collect_kernel_calls(self):
+        return reduce(lambda x, y: x | y.collect_kernel_calls(), self.cases_dict.values(), set())
+
+    def collect_selection_parameters(self):
+        return reduce(lambda x, y: x | y.collect_selection_parameters(),
+                      self.cases_dict.values(),
+                      self.selection_parameters)
+
+    def get_code(self, **kwargs):
+        def case_code(case, subtree):
+            code = f"case {case} : {{\n"
+            code += do_indent(subtree.get_code(**kwargs), width=4, first=True)
+            code += "\n    break;\n}"
+            return code
+
+        cases = [case_code(k, v) for k, v in self.cases_dict.items()]
+        switch_code = f"switch ({self.parameter_symbol.name}) {{\n"
+
+        switch_body = '\n'.join(cases)
+        switch_body = do_indent(switch_body, width=4, first=True)
+
+        switch_code += switch_body
+        switch_code += "default: break; \n}"
+        return switch_code
+
+
 class KernelCallNode(AbstractKernelSelectionNode):
     def __init__(self, ast):
         self.ast = ast
@@ -192,22 +229,29 @@ class SimpleBooleanCondition(AbstractConditionNode):
 class KernelFamily:
     def __init__(self, kernel_selection_tree: AbstractKernelSelectionNode,
                  class_name: str,
-                 temporary_fields=(), field_swaps=(), varying_parameters=()):
+                 temporary_fields=(), field_swaps=(), varying_parameters=(),
+                 field_timestep=None):
         self.kernel_selection_tree = kernel_selection_tree
         self.kernel_selection_parameters = kernel_selection_tree.get_selection_parameter_list()
         self.temporary_fields = tuple(temporary_fields)
         self.field_swaps = tuple(field_swaps)
+        self.field_timestep = field_timestep
         self.varying_parameters = tuple(varying_parameters)
 
         all_kernel_calls = self.kernel_selection_tree.collect_kernel_calls()
         all_param_lists = [k.parameters for k in all_kernel_calls]
         asts_list = [k.ast for k in all_kernel_calls]
         self.representative_ast = asts_list[0]
+        self.target = self.representative_ast.target
 
         #   Eliminate duplicates
         self.all_asts = set(asts_list)
 
-        #   Check function names for uniqueness and reformat them
+        # TODO due to backward compatibility with high level interface spec
+        if self.field_timestep is not None:
+            self.kernel_selection_parameters = []
+
+    #   Check function names for uniqueness and reformat them
         #   using the class name
         function_names = [ast.function_name.lower() for ast in self.all_asts]
         unique_names = set(function_names)
@@ -258,7 +302,7 @@ class AbstractInterfaceArgumentMapping:
         raise NotImplementedError()
 
     @property
-    def headers(self):
+    def headers(self) -> Set:
         return set()
 
 
@@ -312,34 +356,4 @@ class HighLevelInterfaceSpec:
 # ---------------------------------- Helpers --------------------------------------------------------------------------
 
 
-def merge_sorted_lists(lx, ly, sort_key=lambda x: x, identity_check_key=None):
-    if identity_check_key is None:
-        identity_check_key = sort_key
-    nx = len(lx)
-    ny = len(ly)
-
-    def recursive_merge(lx_intern, ly_intern, ix_intern, iy_intern):
-        if ix_intern == nx:
-            return ly_intern[iy_intern:]
-        if iy_intern == ny:
-            return lx_intern[ix_intern:]
-        x = lx_intern[ix_intern]
-        y = ly_intern[iy_intern]
-        skx = sort_key(x)
-        sky = sort_key(y)
-        if skx == sky:
-            if identity_check_key(x) == identity_check_key(y):
-                return [x] + recursive_merge(lx_intern, ly_intern, ix_intern + 1, iy_intern + 1)
-            else:
-                raise ValueError(f'Elements <{x}> and <{y}> with equal sort key where not identical!')
-        elif skx < sky:
-            return [x] + recursive_merge(lx_intern, ly_intern, ix_intern + 1, iy_intern)
-        else:
-            return [y] + recursive_merge(lx_intern, ly_intern, ix_intern, iy_intern + 1)
-    return recursive_merge(lx, ly, 0, 0)
-
 
-def merge_lists_of_symbols(lists):
-    def merger(lx, ly):
-        return merge_sorted_lists(lx, ly, sort_key=lambda x: x.symbol.name, identity_check_key=lambda x: x.symbol)
-    return reduce(merger, lists)
diff --git a/python/pystencils_walberla/pack_info.py b/python/pystencils_walberla/pack_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..221a946e004143f0f02c3a2663df6726add4027f
--- /dev/null
+++ b/python/pystencils_walberla/pack_info.py
@@ -0,0 +1,288 @@
+from collections import OrderedDict, defaultdict
+from dataclasses import replace
+from itertools import product
+from typing import Dict, Optional, Sequence, Tuple
+
+from jinja2 import Environment, PackageLoader, StrictUndefined
+
+from pystencils import Assignment, AssignmentCollection, Field, FieldType, Target, create_kernel
+from pystencils.backends.cbackend import get_headers
+from pystencils.stencil import inverse_direction, offset_to_direction_string
+
+from pystencils_walberla.cmake_integration import CodeGenerationContext
+from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
+from pystencils_walberla.kernel_info import KernelInfo
+from pystencils_walberla.utility import config_from_context
+
+
+def generate_pack_info_for_field(ctx: CodeGenerationContext, class_name: str, field: Field,
+                                 direction_subset: Optional[Tuple[Tuple[int, int, int]]] = None,
+                                 operator=None, gl_to_inner=False,
+                                 target=Target.CPU, data_type=None, cpu_openmp=False,
+                                 **create_kernel_params):
+    """Creates a pack info for a pystencils field assuming a pull-type stencil, packing all cell elements.
+
+    Args:
+        ctx: see documentation of `generate_sweep`
+        class_name: name of the generated class
+        field: pystencils field for which to generate pack info
+        direction_subset: optional sequence of directions for which values should be packed
+                          otherwise a D3Q27 stencil is assumed
+        operator: optional operator for, e.g., reduction pack infos
+        gl_to_inner: communicates values from ghost layers of sender to interior of receiver
+        target: An pystencils Target to define cpu or gpu code generation. See pystencils.Target
+        data_type: default datatype for the kernel creation. Default is double
+        cpu_openmp: if loops should use openMP or not.
+        **create_kernel_params: remaining keyword arguments are passed to `pystencils.create_kernel`
+    """
+
+    if not direction_subset:
+        direction_subset = tuple((i, j, k) for i, j, k in product(*[(-1, 0, 1)] * 3))
+
+    all_index_accesses = [field(*ind) for ind in product(*[range(s) for s in field.index_shape])]
+    return generate_pack_info(ctx, class_name, {direction_subset: all_index_accesses}, operator=operator,
+                              gl_to_inner=gl_to_inner, target=target, data_type=data_type, cpu_openmp=cpu_openmp,
+                              **create_kernel_params)
+
+
+def generate_pack_info_from_kernel(ctx: CodeGenerationContext, class_name: str, assignments: Sequence[Assignment],
+                                   kind='pull', operator=None, target=Target.CPU, data_type=None, cpu_openmp=False,
+                                   **create_kernel_params):
+    """Generates a waLBerla GPU PackInfo from a (pull) kernel.
+
+    Args:
+        ctx: see documentation of `generate_sweep`
+        class_name: name of the generated class
+        assignments: list of assignments from the compute kernel - generates PackInfo for "pull" part only
+                     i.e. the kernel is expected to only write to the center
+        kind: can either be pull or push
+        operator: optional operator for, e.g., reduction pack infos
+        target: An pystencils Target to define cpu or gpu code generation. See pystencils.Target
+        data_type: default datatype for the kernel creation. Default is double
+        cpu_openmp: if loops should use openMP or not.
+        **create_kernel_params: remaining keyword arguments are passed to `pystencils.create_kernel`
+    """
+    assert kind in ('push', 'pull')
+    reads = set()
+    writes = set()
+
+    if isinstance(assignments, AssignmentCollection):
+        assignments = assignments.all_assignments
+
+    for a in assignments:
+        if not isinstance(a, Assignment):
+            continue
+        reads.update(a.rhs.atoms(Field.Access))
+        writes.update(a.lhs.atoms(Field.Access))
+    spec = defaultdict(set)
+    if kind == 'pull':
+        for fa in reads:
+            assert all(abs(e) <= 1 for e in fa.offsets)
+            if all(offset == 0 for offset in fa.offsets):
+                continue
+            comm_direction = inverse_direction(fa.offsets)
+            for comm_dir in _comm_directions(comm_direction):
+                spec[(comm_dir,)].add(fa.field.center(*fa.index))
+    elif kind == 'push':
+        for fa in writes:
+            assert all(abs(e) <= 1 for e in fa.offsets)
+            if all(offset == 0 for offset in fa.offsets):
+                continue
+            for comm_dir in _comm_directions(fa.offsets):
+                spec[(comm_dir,)].add(fa)
+    else:
+        raise ValueError("Invalid 'kind' parameter")
+    return generate_pack_info(ctx, class_name, spec, operator=operator,
+                              target=target, data_type=data_type, cpu_openmp=cpu_openmp, **create_kernel_params)
+
+
+def generate_pack_info(ctx: CodeGenerationContext, class_name: str,
+                       directions_to_pack_terms: Dict[Tuple[Tuple], Sequence[Field.Access]],
+                       namespace='pystencils', operator=None, gl_to_inner=False,
+                       target=Target.CPU, data_type=None, cpu_openmp=False,
+                       **create_kernel_params):
+    """Generates a waLBerla GPU PackInfo
+
+    Args:
+        ctx: see documentation of `generate_sweep`
+        class_name: name of the generated class
+        directions_to_pack_terms: maps tuples of directions to read field accesses, specifying which values have to be
+                                  packed for which direction
+        namespace: inner namespace of the generated class
+        operator: optional operator for, e.g., reduction pack infos
+        gl_to_inner: communicates values from ghost layers of sender to interior of receiver
+        target: An pystencils Target to define cpu or gpu code generation. See pystencils.Target
+        data_type: default datatype for the kernel creation. Default is double
+        cpu_openmp: if loops should use openMP or not.
+        **create_kernel_params: remaining keyword arguments are passed to `pystencils.create_kernel`
+    """
+    if cpu_openmp:
+        raise ValueError("The packing kernels are already called inside an OpenMP parallel region. Thus "
+                         "additionally parallelising each kernel is not supported.")
+    items = [(e[0], sorted(e[1], key=lambda x: str(x))) for e in directions_to_pack_terms.items()]
+    items = sorted(items, key=lambda e: e[0])
+    directions_to_pack_terms = OrderedDict(items)
+
+    config = config_from_context(ctx, target=target, data_type=data_type, cpu_openmp=cpu_openmp,
+                                 **create_kernel_params)
+
+    config_zero_gl = config_from_context(ctx, target=target, data_type=data_type, cpu_openmp=cpu_openmp,
+                                         ghost_layers=0, **create_kernel_params)
+
+    # Vectorisation of the pack info is not implemented.
+    config = replace(config, cpu_vectorize_info=None)
+    config_zero_gl = replace(config_zero_gl, cpu_vectorize_info=None)
+
+    config = replace(config, allow_double_writes=True)
+    config_zero_gl = replace(config_zero_gl, allow_double_writes=True)
+
+    template_name = "CpuPackInfo.tmpl" if config.target == Target.CPU else 'GpuPackInfo.tmpl'
+
+    fields_accessed = set()
+    for terms in directions_to_pack_terms.values():
+        for term in terms:
+            assert isinstance(term, Field.Access)  # and all(e == 0 for e in term.offsets)
+            fields_accessed.add(term)
+
+    field_names = {fa.field.name for fa in fields_accessed}
+
+    data_types = {fa.field.dtype for fa in fields_accessed}
+    if len(data_types) == 0:
+        raise ValueError("No fields to pack!")
+    if len(data_types) != 1:
+        err_detail = "\n".join(f" - {f.name} [{f.dtype}]" for f in fields_accessed)
+        raise NotImplementedError("Fields of different data types are used - this is not supported.\n" + err_detail)
+    dtype = data_types.pop()
+
+    pack_kernels = OrderedDict()
+    unpack_kernels = OrderedDict()
+    all_accesses = set()
+    elements_per_cell = OrderedDict()
+    for direction_set, terms in directions_to_pack_terms.items():
+        for d in direction_set:
+            if not all(abs(i) <= 1 for i in d):
+                raise NotImplementedError("Only first neighborhood supported")
+
+        buffer = Field.create_generic('buffer', spatial_dimensions=1, field_type=FieldType.BUFFER,
+                                      dtype=dtype.numpy_dtype, index_shape=(len(terms),))
+
+        direction_strings = tuple(offset_to_direction_string(d) for d in direction_set)
+        all_accesses.update(terms)
+
+        pack_assignments = [Assignment(buffer(i), term) for i, term in enumerate(terms)]
+        pack_ast = create_kernel(pack_assignments, config=config_zero_gl)
+        pack_ast.function_name = 'pack_{}'.format("_".join(direction_strings))
+        if operator is None:
+            unpack_assignments = [Assignment(term, buffer(i)) for i, term in enumerate(terms)]
+        else:
+            unpack_assignments = [Assignment(term, operator(term, buffer(i))) for i, term in enumerate(terms)]
+        unpack_ast = create_kernel(unpack_assignments, config=config_zero_gl)
+        unpack_ast.function_name = 'unpack_{}'.format("_".join(direction_strings))
+
+        pack_kernels[direction_strings] = KernelInfo(pack_ast)
+        unpack_kernels[direction_strings] = KernelInfo(unpack_ast)
+        elements_per_cell[direction_strings] = len(terms)
+    fused_kernel = create_kernel([Assignment(buffer.center, t) for t in all_accesses], config=config)
+
+    jinja_context = {
+        'class_name': class_name,
+        'pack_kernels': pack_kernels,
+        'unpack_kernels': unpack_kernels,
+        'fused_kernel': KernelInfo(fused_kernel),
+        'elements_per_cell': elements_per_cell,
+        'headers': get_headers(fused_kernel),
+        'target': config.target.name.lower(),
+        'dtype': dtype,
+        'field_name': field_names.pop(),
+        'namespace': namespace,
+        'gl_to_inner': gl_to_inner,
+    }
+    env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined)
+    add_pystencils_filters_to_jinja_env(env)
+    header = env.get_template(template_name + ".h").render(**jinja_context)
+    source = env.get_template(template_name + ".cpp").render(**jinja_context)
+
+    source_extension = "cpp" if config.target == Target.CPU else "cu"
+    ctx.write_file(f"{class_name}.h", header)
+    ctx.write_file(f"{class_name}.{source_extension}", source)
+
+
+def generate_mpidtype_info_from_kernel(ctx: CodeGenerationContext, class_name: str,
+                                       assignments: Sequence[Assignment], kind='pull', namespace='pystencils'):
+    assert kind in ('push', 'pull')
+    reads = set()
+    writes = set()
+
+    if isinstance(assignments, AssignmentCollection):
+        assignments = assignments.all_assignments
+
+    for a in assignments:
+        if not isinstance(a, Assignment):
+            continue
+        reads.update(a.rhs.atoms(Field.Access))
+        writes.update(a.lhs.atoms(Field.Access))
+
+    spec = defaultdict(set)
+    if kind == 'pull':
+        read_fields = set(fa.field for fa in reads)
+        assert len(read_fields) == 1, "Only scenarios where one fields neighbors are accessed"
+        field = read_fields.pop()
+        for fa in reads:
+            assert all(abs(e) <= 1 for e in fa.offsets)
+            if all(offset == 0 for offset in fa.offsets):
+                continue
+            comm_direction = inverse_direction(fa.offsets)
+            for comm_dir in _comm_directions(comm_direction):
+                assert len(fa.index) == 1, "Supports only fields with a single index dimension"
+                spec[(offset_to_direction_string(comm_dir),)].add(fa.index[0])
+    elif kind == 'push':
+        written_fields = set(fa.field for fa in writes)
+        assert len(written_fields) == 1, "Only scenarios where one fields neighbors are accessed"
+        field = written_fields.pop()
+
+        for fa in writes:
+            assert all(abs(e) <= 1 for e in fa.offsets)
+            if all(offset == 0 for offset in fa.offsets):
+                continue
+            for comm_dir in _comm_directions(fa.offsets):
+                assert len(fa.index) == 1, "Supports only fields with a single index dimension"
+                spec[(offset_to_direction_string(comm_dir),)].add(fa.index[0])
+    else:
+        raise ValueError("Invalid 'kind' parameter")
+
+    jinja_context = {
+        'class_name': class_name,
+        'namespace': namespace,
+        'kind': kind,
+        'field_name': field.name,
+        'f_size': field.index_shape[0],
+        'spec': spec,
+    }
+    env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined)
+    header = env.get_template("MpiDtypeInfo.tmpl.h").render(**jinja_context)
+    ctx.write_file(f"{class_name}.h", header)
+
+
+# ---------------------------------- Internal --------------------------------------------------------------------------
+
+def _comm_directions(direction):
+    if all(e == 0 for e in direction):
+        yield direction
+    binary_numbers_list = binary_numbers(len(direction))
+    for comm_direction in binary_numbers_list:
+        for i in range(len(direction)):
+            if direction[i] == 0:
+                comm_direction[i] = 0
+            if direction[i] == -1 and comm_direction[i] == 1:
+                comm_direction[i] = -1
+        if not all(e == 0 for e in comm_direction):
+            yield tuple(comm_direction)
+
+
+def binary_numbers(n):
+    result = list()
+    for i in range(1 << n):
+        binary_number = bin(i)[2:]
+        binary_number = '0' * (n - len(binary_number)) + binary_number
+        result.append((list(map(int, binary_number))))
+    return result
diff --git a/python/pystencils_walberla/sweep.py b/python/pystencils_walberla/sweep.py
new file mode 100644
index 0000000000000000000000000000000000000000..ddf9a2a52b0de504394becdf99127a06f866383d
--- /dev/null
+++ b/python/pystencils_walberla/sweep.py
@@ -0,0 +1,199 @@
+from typing import Callable, Sequence
+
+from jinja2 import Environment, PackageLoader, StrictUndefined
+
+from pystencils import Target, Assignment
+from pystencils import Field, create_kernel, create_staggered_kernel
+from pystencils.astnodes import KernelFunction
+
+from pystencils_walberla.cmake_integration import CodeGenerationContext
+from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
+from pystencils_walberla.kernel_selection import KernelCallNode, KernelFamily, HighLevelInterfaceSpec
+from pystencils_walberla.utility import config_from_context
+
+
+def generate_sweep(ctx: CodeGenerationContext, class_name: str, assignments: Sequence[Assignment],
+                   namespace: str = 'pystencils', field_swaps=(), staggered=False, varying_parameters=(),
+                   inner_outer_split=False, ghost_layers_to_include=0,
+                   target=Target.CPU, data_type=None, cpu_openmp=None, cpu_vectorize_info=None, max_threads=None,
+                   **create_kernel_params):
+    """Generates a waLBerla sweep from a pystencils representation.
+
+    The constructor of the C++ sweep class expects all kernel parameters (fields and parameters) in alphabetical order.
+    Fields have to passed using BlockDataID's pointing to walberla fields
+
+    Args:
+        ctx: build system context filled with information from waLBerla's CMake. The context for example
+                            defines where to write generated files, if OpenMP is available or which SIMD instruction
+                            set should be used. See waLBerla examples on how to get a context.
+        class_name: name of the generated sweep class
+        assignments: list of assignments defining the stencil update rule or a :class:`KernelFunction`
+        namespace: the generated class is accessible as walberla::<namespace>::<class_name>
+        field_swaps: sequence of field pairs (field, temporary_field). The generated sweep only gets the first field
+                     as argument, creating a temporary field internally which is swapped with the first field after
+                     each iteration.
+        staggered: set to True to create staggered kernels with `pystencils.create_staggered_kernel`
+        varying_parameters: Depending on the configuration, the generated kernels may receive different arguments for
+                            different setups. To not have to adapt the C++ application when then parameter change,
+                            the varying_parameters sequence can contain parameter names, which are always expected by
+                            the C++ class constructor even if the kernel does not need them.
+        inner_outer_split: if True generate a sweep that supports separate iteration over inner and outer regions
+                           to allow for communication hiding.
+        ghost_layers_to_include: determines how many ghost layers should be included for the Sweep.
+                                 This is relevant if a setter kernel should also set correct values to the ghost layers.
+        target: An pystencils Target to define cpu or gpu code generation. See pystencils.Target
+        data_type: default datatype for the kernel creation. Default is double
+        cpu_openmp: if loops should use openMP or not.
+        cpu_vectorize_info: dictionary containing necessary information for the usage of a SIMD instruction set.
+        max_threads: only relevant for GPU kernels. Will be argument of `__launch_bounds__`
+        **create_kernel_params: remaining keyword arguments are passed to `pystencils.create_kernel`
+    """
+    if staggered:
+        assert 'omp_single_loop' not in create_kernel_params
+        create_kernel_params['omp_single_loop'] = False
+    config = config_from_context(ctx, target=target, data_type=data_type, cpu_openmp=cpu_openmp,
+                                 cpu_vectorize_info=cpu_vectorize_info, **create_kernel_params)
+
+    if isinstance(assignments, KernelFunction):
+        ast = assignments
+        target = ast.target
+    elif not staggered:
+        ast = create_kernel(assignments, config=config)
+    else:
+        # This should not be necessary but create_staggered_kernel does not take a config at the moment ...
+        ast = create_staggered_kernel(assignments, **config.__dict__)
+
+    ast.function_name = class_name.lower()
+
+    selection_tree = KernelCallNode(ast)
+    generate_selective_sweep(ctx, class_name, selection_tree, target=target, namespace=namespace,
+                             field_swaps=field_swaps, varying_parameters=varying_parameters,
+                             inner_outer_split=inner_outer_split, ghost_layers_to_include=ghost_layers_to_include,
+                             cpu_vectorize_info=config.cpu_vectorize_info,
+                             cpu_openmp=config.cpu_openmp, max_threads=max_threads)
+
+
+def generate_selective_sweep(ctx, class_name, selection_tree, interface_mappings=(), target=None,
+                             namespace='pystencils', field_swaps=(), varying_parameters=(),
+                             inner_outer_split=False, ghost_layers_to_include=0,
+                             cpu_vectorize_info=None, cpu_openmp=False, max_threads=None):
+    """Generates a selective sweep from a kernel selection tree. A kernel selection tree consolidates multiple
+    pystencils ASTs in a tree-like structure. See also module `pystencils_walberla.kernel_selection`.
+
+    Args:
+        ctx: see documentation of `generate_sweep`
+        class_name: name of the generated sweep class
+        selection_tree: Instance of `AbstractKernelSelectionNode`, root of the selection tree
+        interface_mappings: sequence of `AbstractInterfaceArgumentMapping` instances for selection arguments of
+                            the selection tree
+        target: `None`, `Target.CPU` or `Target.GPU`; inferred from kernels if `None` is given.
+        namespace: see documentation of `generate_sweep`
+        field_swaps: see documentation of `generate_sweep`
+        varying_parameters: see documentation of `generate_sweep`
+        inner_outer_split: see documentation of `generate_sweep`
+        ghost_layers_to_include: see documentation of `generate_sweep`
+        cpu_vectorize_info: Dictionary containing information about CPU vectorization applied to the kernels
+        cpu_openmp: Whether or not CPU kernels use OpenMP parallelization
+        max_threads: only relevant for GPU kernels. Will be argument of `__launch_bounds__`
+    """
+    def to_name(f):
+        return f.name if isinstance(f, Field) else f
+
+    field_swaps = tuple((to_name(e[0]), to_name(e[1])) for e in field_swaps)
+    temporary_fields = tuple(e[1] for e in field_swaps)
+
+    kernel_family = KernelFamily(selection_tree, class_name,
+                                 temporary_fields, field_swaps, varying_parameters)
+
+    if target is None:
+        target = kernel_family.get_ast_attr('target')
+    elif target != kernel_family.get_ast_attr('target'):
+        raise ValueError('Mismatch between target parameter and AST targets.')
+
+    if not ctx.gpu and target == Target.GPU:
+        return
+
+    representative_field = {p.field_name for p in kernel_family.parameters if p.is_field_parameter}
+    representative_field = sorted(representative_field)[0]
+
+    env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined)
+    add_pystencils_filters_to_jinja_env(env)
+
+    interface_spec = HighLevelInterfaceSpec(kernel_family.kernel_selection_parameters, interface_mappings)
+
+    jinja_context = {
+        'kernel': kernel_family,
+        'namespace': namespace,
+        'class_name': class_name,
+        'target': target.name.lower(),
+        'field': representative_field,
+        'ghost_layers_to_include': ghost_layers_to_include,
+        'inner_outer_split': inner_outer_split,
+        'interface_spec': interface_spec,
+        'generate_functor': True,
+        'cpu_vectorize_info': cpu_vectorize_info,
+        'cpu_openmp': cpu_openmp,
+        'max_threads': max_threads
+    }
+    header = env.get_template("Sweep.tmpl.h").render(**jinja_context)
+    source = env.get_template("Sweep.tmpl.cpp").render(**jinja_context)
+
+    source_extension = "cpp" if target == Target.CPU else "cu"
+    ctx.write_file(f"{class_name}.h", header)
+    ctx.write_file(f"{class_name}.{source_extension}", source)
+
+
+def generate_sweep_collection(ctx, class_name: str, function_generators: Sequence[Callable], parameter_scaling=None):
+    """Generates a sweep collection
+    """
+
+    contexts_function_generators = list()
+    for fct in function_generators:
+        contexts_function_generators.append(fct())
+
+    namespaces = set([context['namespace'] for context in contexts_function_generators])
+    assert len(namespaces) == 1, "All function_generators must output the same namespace!"
+    namespace = namespaces.pop()
+
+    headers = set()
+    for context in contexts_function_generators:
+        for header in context['interface_spec'].headers:
+            headers.add(header)
+        for header in context['kernel'].get_headers():
+            headers.add(header)
+
+    kernel_list = list()
+    for context in contexts_function_generators:
+        kernel_list.append(context['kernel'])
+
+    kernels = list()
+    for context in contexts_function_generators:
+        kernels.append({
+            'kernel': context['kernel'],
+            'function_name': context['function_name'],
+            'ghost_layers_to_include': 'ghost_layers',
+            'field': context['field'],
+            'max_threads': context['max_threads']
+        })
+
+    target = kernels[0]['kernel'].target
+
+    jinja_context = {
+        'kernel_list': kernel_list,
+        'kernels': kernels,
+        'namespace': namespace,
+        'class_name': class_name,
+        'headers': headers,
+        'target': target.name.lower(),
+        'parameter_scaling': parameter_scaling,
+    }
+
+    env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined)
+    add_pystencils_filters_to_jinja_env(env)
+
+    header = env.get_template("SweepCollection.tmpl.h").render(**jinja_context)
+    source = env.get_template("SweepCollection.tmpl.cpp").render(**jinja_context)
+
+    source_extension = "cpp" if target == Target.CPU else "cu"
+    ctx.write_file(f"{class_name}.h", header)
+    ctx.write_file(f"{class_name}.{source_extension}", source)
diff --git a/python/pystencils_walberla/templates/Boundary.tmpl.cpp b/python/pystencils_walberla/templates/Boundary.tmpl.cpp
index b4fe6df4794d61f06c2a1a900879871e7ac9f14d..644202ba67cd574724e46ef2b42e60535dc2e5c6 100644
--- a/python/pystencils_walberla/templates/Boundary.tmpl.cpp
+++ b/python/pystencils_walberla/templates/Boundary.tmpl.cpp
@@ -17,8 +17,6 @@
 //! \\author pystencils
 //======================================================================================================================
 
-#include <cmath>
-
 #include "core/DataTypes.h"
 #include "core/Macros.h"
 #include "{{class_name}}.h"
@@ -53,9 +51,9 @@ namespace {{namespace}} {
 #pragma diag_suppress 177
 #endif
 #endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
 {{kernel|generate_definitions(target)}}
-
+//NOLINTEND(readability-non-const-parameter*)
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
@@ -85,6 +83,7 @@ void {{class_name}}::run_impl(
    uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
 
    {{kernel|generate_block_data_to_field_extraction(['indexVector', 'indexVectorSize'])|indent(4)}}
+   {{kernel|generate_timestep_advancements|indent(4)}}
    {{kernel|generate_refs_for_kernel_parameters(prefix='', parameters_to_ignore=['indexVectorSize'], ignore_fields=True)|indent(4) }}
    {{kernel|generate_call(spatial_shape_symbols=['indexVectorSize'], stream='stream')|indent(4)}}
 }
diff --git a/python/pystencils_walberla/templates/CpuPackInfo.tmpl.cpp b/python/pystencils_walberla/templates/CpuPackInfo.tmpl.cpp
index d56ec573032eaddba9ba9b959883a864a3f3ce63..0191994f3f3a29ef9384b2a2270294be9df59f43 100644
--- a/python/pystencils_walberla/templates/CpuPackInfo.tmpl.cpp
+++ b/python/pystencils_walberla/templates/CpuPackInfo.tmpl.cpp
@@ -1,3 +1,22 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file {{class_name}}.cpp
+//! \\author pystencils
+//======================================================================================================================
+
 #include "stencil/Directions.h"
 #include "core/cell/CellInterval.h"
 #include "core/DataTypes.h"
diff --git a/python/pystencils_walberla/templates/CpuPackInfo.tmpl.h b/python/pystencils_walberla/templates/CpuPackInfo.tmpl.h
index d25c04b2b782fe891de361356aa046554d32f1ae..66114de6ee87d58f37d08ef2e39251a2f1060717 100644
--- a/python/pystencils_walberla/templates/CpuPackInfo.tmpl.h
+++ b/python/pystencils_walberla/templates/CpuPackInfo.tmpl.h
@@ -1,3 +1,22 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file {{class_name}}.h
+//! \\author pystencils
+//======================================================================================================================
+
 #pragma once
 #include "stencil/Directions.h"
 #include "core/cell/CellInterval.h"
diff --git a/python/pystencils_walberla/templates/GpuPackInfo.tmpl.cpp b/python/pystencils_walberla/templates/GpuPackInfo.tmpl.cpp
index 054d589ecbc43addfbd20a6009c65d873f56e802..19b7b11ed507f8f068a3deb5908a1ca6fe867711 100644
--- a/python/pystencils_walberla/templates/GpuPackInfo.tmpl.cpp
+++ b/python/pystencils_walberla/templates/GpuPackInfo.tmpl.cpp
@@ -1,10 +1,22 @@
-#include "core/DataTypes.h"
-#include "core/cell/CellInterval.h"
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file {{class_name}}.cpp
+//! \\author pystencils
+//======================================================================================================================
 
-#include "stencil/Directions.h"
-
-#include "gpu/GPUField.h"
-#include "gpu/GPUWrapper.h"
 #include "{{class_name}}.h"
 
 {% if target is equalto 'cpu' -%}
diff --git a/python/pystencils_walberla/templates/GpuPackInfo.tmpl.h b/python/pystencils_walberla/templates/GpuPackInfo.tmpl.h
index 2b182905cd8584794ba53108f072f1da5abb37bc..b301bced5b8bd159c028e6e75c26fd37df5a63b2 100644
--- a/python/pystencils_walberla/templates/GpuPackInfo.tmpl.h
+++ b/python/pystencils_walberla/templates/GpuPackInfo.tmpl.h
@@ -1,4 +1,24 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file {{class_name}}.h
+//! \\author pystencils
+//======================================================================================================================
+
 #pragma once
+
 #include "core/DataTypes.h"
 #include "core/cell/CellInterval.h"
 
@@ -36,9 +56,13 @@ public:
     {};
     virtual ~{{class_name}}() {}
 
-    virtual void pack  (stencil::Direction dir, unsigned char * buffer, IBlock * block, gpuStream_t stream);
-    virtual void unpack(stencil::Direction dir, unsigned char * buffer, IBlock * block, gpuStream_t stream);
-    virtual uint_t size  (stencil::Direction dir, IBlock * block);
+    void pack  (stencil::Direction dir, unsigned char * buffer, IBlock * block, gpuStream_t stream) override;
+    void communicateLocal  ( stencil::Direction /*dir*/, const IBlock* /* sender */, IBlock* /* receiver */, gpuStream_t /* stream */ ) override
+    {
+       WALBERLA_ABORT("Local Communication not implemented yet for standard PackInfos. To run your application turn of local communication in the Communication class")
+    }
+    void unpack(stencil::Direction dir, unsigned char * buffer, IBlock * block, gpuStream_t stream) override;
+    uint_t size  (stencil::Direction dir, IBlock * block) override;
 
 private:
     {{fused_kernel|generate_members(parameters_to_ignore=['buffer'])|indent(4)}}
diff --git a/python/pystencils_walberla/templates/MpiDtypeInfo.tmpl.h b/python/pystencils_walberla/templates/MpiDtypeInfo.tmpl.h
index 3f9cbb2e659f58eb0b6ae1ff7dcb0e5b1cf0a8e5..860ea49717b76efbe205698a1eb14ed3c0d71797 100644
--- a/python/pystencils_walberla/templates/MpiDtypeInfo.tmpl.h
+++ b/python/pystencils_walberla/templates/MpiDtypeInfo.tmpl.h
@@ -1,3 +1,22 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file {{class_name}}.h
+//! \\author pystencils
+//======================================================================================================================
+
 #pragma once
 
 #include "core/debug/Debug.h"
diff --git a/python/pystencils_walberla/templates/Sweep.tmpl.cpp b/python/pystencils_walberla/templates/Sweep.tmpl.cpp
index 10e180fb56a916f79352660fd0bbdd0c3b136c01..8f3e14e59074a2f483fe14c5f85eb3e352c0a836 100644
--- a/python/pystencils_walberla/templates/Sweep.tmpl.cpp
+++ b/python/pystencils_walberla/templates/Sweep.tmpl.cpp
@@ -14,8 +14,7 @@
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
 //! \\file {{class_name}}.cpp
-//! \\ingroup lbm
-//! \\author lbmpy
+//! \\author pystencils
 //======================================================================================================================
 
 #include <cmath>
diff --git a/python/pystencils_walberla/templates/Sweep.tmpl.h b/python/pystencils_walberla/templates/Sweep.tmpl.h
index 599ade337a0b9ebf08d67d247bc4e0474c2b7ead..e0b773ab1b1ab656a8db81ae10459d01b84766a9 100644
--- a/python/pystencils_walberla/templates/Sweep.tmpl.h
+++ b/python/pystencils_walberla/templates/Sweep.tmpl.h
@@ -19,6 +19,7 @@
 
 #pragma once
 #include "core/DataTypes.h"
+#include "core/logging/Logging.h"
 
 {% if target is equalto 'cpu' -%}
 #include "field/GhostLayerField.h"
diff --git a/python/pystencils_walberla/templates/SweepCollection.tmpl.cpp b/python/pystencils_walberla/templates/SweepCollection.tmpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a9a1c28434bff3d257ca2bf9c76bd4fa20d9f1db
--- /dev/null
+++ b/python/pystencils_walberla/templates/SweepCollection.tmpl.cpp
@@ -0,0 +1,69 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file {{class_name}}.cpp
+//! \\author pystencils
+//======================================================================================================================
+#include "{{class_name}}.h"
+
+{% if target is equalto 'cpu' -%}
+#define FUNC_PREFIX
+{%- elif target is equalto 'gpu' -%}
+#define FUNC_PREFIX __global__
+{%- endif %}
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL )
+#pragma warning push
+#pragma warning( disable :  1599 )
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace {{namespace}} {
+
+{% for kernel in kernels %}
+{{kernel['kernel']|generate_definitions(target, kernel['max_threads'])}}
+{% endfor %}
+
+
+{% for kernel in kernels %}
+void {{class_name}}::{{kernel['function_name']}}( {{kernel['kernel']|generate_plain_parameter_list(ghost_layers=True)}} )
+{
+   {{kernel['kernel']|generate_call(ghost_layers_to_include=kernel['ghost_layers_to_include'], stream='stream')|indent(3)}}
+}
+void {{class_name}}::{{kernel['function_name']}}CellInterval( {{kernel['kernel']|generate_plain_parameter_list(cell_interval='ci')}})
+{
+   {{kernel['kernel']|generate_call(stream='stream', cell_interval='ci')|indent(3)}}
+}
+{% endfor %}
+
+
+} // namespace {{namespace}}
+} // namespace walberla
+
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic pop
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL )
+#pragma warning pop
+#endif
diff --git a/python/pystencils_walberla/templates/SweepCollection.tmpl.h b/python/pystencils_walberla/templates/SweepCollection.tmpl.h
new file mode 100644
index 0000000000000000000000000000000000000000..5db4ccb33457efcc2f9f9385d0f2b32db35aef5e
--- /dev/null
+++ b/python/pystencils_walberla/templates/SweepCollection.tmpl.h
@@ -0,0 +1,298 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file {{class_name}}.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/logging/Logging.h"
+#include "core/Macros.h"
+
+{% if target is equalto 'gpu' -%}
+#include "gpu/GPUField.h"
+#include "gpu/ParallelStreams.h"
+{%- endif %}
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+
+#include "field/SwapableCompare.h"
+#include "field/GhostLayerField.h"
+
+#include <set>
+#include <cmath>
+
+{% for header in headers %}
+#include {{header}}
+{% endfor %}
+
+using namespace std::placeholders;
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wunused-parameter"
+#   pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace {{namespace}} {
+
+
+class {{class_name}}
+{
+public:
+  enum Type { ALL = 0, INNER = 1, OUTER = 2 };
+
+   {{class_name}}(const shared_ptr< StructuredBlockStorage > & blocks, {{kernel_list|generate_constructor_parameters}}, const Cell & outerWidth=Cell(1, 1, 1))
+     : blocks_(blocks), {{ kernel_list|generate_constructor_initializer_list(parameter_registration=parameter_scaling) }}, outerWidth_(outerWidth)
+   {
+      {{kernel_list|generate_constructor(parameter_registration=parameter_scaling) |indent(6)}}
+
+      for (auto& iBlock : *blocks)
+      {
+         if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 ||
+             int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 ||
+             int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
+          WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock")
+      }
+   };
+
+   {{ kernel_list| generate_destructor(class_name) |indent(4) }}
+
+   /*************************************************************************************
+   *                Internal Function Definitions with raw Pointer
+   *************************************************************************************/
+
+   {%- for kernel in kernels %}
+   static void {{kernel['function_name']}} ({{kernel['kernel']|generate_plain_parameter_list(ghost_layers=0, stream="nullptr")}});
+   static void {{kernel['function_name']}}CellInterval ({{kernel['kernel']|generate_plain_parameter_list(cell_interval='ci', stream="nullptr")}});
+   {% endfor %}
+
+   /*************************************************************************************
+   *                Function Definitions for external Usage
+   *************************************************************************************/
+
+   {%- for kernel in kernels %}
+
+   std::function<void (IBlock *)> {{kernel['function_name']}}()
+   {
+      return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", ] | type_identifier_list -}}); };
+   }
+
+   std::function<void (IBlock *)> {{kernel['function_name']}}({{- ["Type type", ] | type_identifier_list -}})
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", ] | type_identifier_list -}}); };
+         case Type::OUTER:
+            return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", ] | type_identifier_list -}}); };
+         default:
+            return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", ] | type_identifier_list -}}); };
+      }
+   }
+
+   std::function<void (IBlock *)> {{kernel['function_name']}}({{- ["Type type", "const cell_idx_t ghost_layers"] | type_identifier_list -}})
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", ] | type_identifier_list -}}); };
+         case Type::OUTER:
+            return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", ] | type_identifier_list -}}); };
+         default:
+            return [{{- ["this", "ghost_layers"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "ghost_layers"] | type_identifier_list -}}); };
+      }
+   }
+
+   {% if target is equalto 'gpu' -%}
+   std::function<void (IBlock *)> {{kernel['function_name']}}({{- ["Type type", "const cell_idx_t ghost_layers", "gpuStream_t gpuStream"] | type_identifier_list -}})
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+         case Type::OUTER:
+            return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+         default:
+            return [{{- ["this", "ghost_layers", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "ghost_layers", "gpuStream"] | type_identifier_list -}}); };
+      }
+   }
+
+   std::function<void (IBlock *)> {{kernel['function_name']}}({{- ["Type type", "gpuStream_t gpuStream"] | type_identifier_list -}})
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+         case Type::OUTER:
+            return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+         default:
+            return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "cell_idx_c(0)", "gpuStream"] | type_identifier_list -}}); };
+      }
+   }
+   {%- endif %}
+
+   void {{kernel['function_name']}}({{- ["IBlock * block",] | type_identifier_list -}})
+   {
+      const cell_idx_t ghost_layers = 0;
+      {% if target is equalto 'gpu' -%}
+      gpuStream_t gpuStream = nullptr;
+      {%- endif %}
+
+      {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+      {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+      {{kernel['kernel']|generate_timestep_advancements|indent(6)}}
+      {{kernel['function_name']}}({{kernel['kernel']|generate_function_collection_call(ghost_layers='ghost_layers')}});
+      {{kernel['kernel']|generate_swaps|indent(6)}}
+   }
+
+   void {{kernel['function_name']}}({{- ["IBlock * block", "const cell_idx_t ghost_layers"] | type_identifier_list -}})
+   {
+      {% if target is equalto 'gpu' -%}
+      gpuStream_t gpuStream = nullptr;
+      {%- endif %}
+
+      {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+      {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+      {{kernel['kernel']|generate_timestep_advancements|indent(6)}}
+      {{kernel['function_name']}}({{kernel['kernel']|generate_function_collection_call(ghost_layers='ghost_layers')}});
+      {{kernel['kernel']|generate_swaps|indent(6)}}
+   }
+
+   {% if target is equalto 'gpu' -%}
+   void {{kernel['function_name']}}({{- ["IBlock * block", "const cell_idx_t ghost_layers", "gpuStream_t gpuStream"] | type_identifier_list -}})
+   {
+      {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+      {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+      {{kernel['kernel']|generate_timestep_advancements|indent(6)}}
+      {{kernel['function_name']}}({{kernel['kernel']|generate_function_collection_call(ghost_layers='ghost_layers')}});
+      {{kernel['kernel']|generate_swaps|indent(6)}}
+   }
+   {%- endif %}
+
+   void {{kernel['function_name']}}CellInterval({{- ["IBlock * block", "const CellInterval & ci", ["gpuStream_t gpuStream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}})
+   {
+      {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+      {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+      {{kernel['kernel']|generate_timestep_advancements|indent(6)}}
+      {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}});
+      {{kernel['kernel']|generate_swaps|indent(6)}}
+   }
+
+   void {{kernel['function_name']}}Inner({{- ["IBlock * block", ["gpuStream_t gpuStream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}})
+   {
+      {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+      {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+      {{kernel['kernel']|generate_timestep_advancements(advance=False)|indent(6)}}
+
+      CellInterval inner = {{kernel['field']}}->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='inner')}});
+   }
+
+   void {{kernel['function_name']}}Outer({{- ["IBlock * block", ["gpuStream_t gpuStream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}})
+   {
+
+      {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+      {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+      {{kernel['kernel']|generate_timestep_advancements|indent(6)}}
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    {%if target is equalto 'gpu'%}
+      {
+         auto parallelSection_ = parallelStreams_.parallelSection( gpuStream );
+         for( auto & ci: layers_ )
+         {
+          parallelSection_.run([&]( auto s ) {
+             {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}});
+          });
+         }
+      }
+    {% else %}
+      for( auto & ci: layers_ )
+      {
+         {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}});
+      }
+    {% endif %}
+
+    {{kernel['kernel']|generate_swaps|indent(9)}}
+   }
+   {% endfor %}
+
+   {%if target is equalto 'gpu'%}
+   void setOuterPriority(int priority)
+   {
+      parallelStreams_.setStreamPriority(priority);
+   }
+   {%endif%}
+
+   private:
+      shared_ptr< StructuredBlockStorage > blocks_;
+      {{kernel_list|generate_members(parameter_registration=parameter_scaling)|indent(4)}}
+
+      Cell outerWidth_;
+      std::vector<CellInterval> layers_;
+
+      {%if target is equalto 'gpu' -%}
+      gpu::ParallelStreams parallelStreams_;
+      // std::map<BlockID, gpuStream_t > streams_;
+      {%- endif %}
+};
+
+
+} // namespace {{namespace}}
+} // namespace walberla
+
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic pop
+#endif
diff --git a/python/pystencils_walberla/utility.py b/python/pystencils_walberla/utility.py
index c109265ef3e5b0f16ff8f9c276394422d096097f..f19a0997497e9659a8c37cb81ba0db85472e7b22 100644
--- a/python/pystencils_walberla/utility.py
+++ b/python/pystencils_walberla/utility.py
@@ -1,9 +1,17 @@
 from os import path
-from pystencils.typing import get_base_type
-from pystencils_walberla.cmake_integration import CodeGenerationContext
+from functools import reduce
+from typing import Union, Dict, DefaultDict
+import warnings
+
+from pystencils import CreateKernelConfig, Target
+from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets
+from pystencils.boundaries.createindexlist import boundary_index_array_coordinate_names, direction_member_name
+from pystencils.typing import BasicType, create_type, get_base_type
 
 from lbmpy import LBStencil
 
+from pystencils_walberla.cmake_integration import CodeGenerationContext
+
 HEADER_EXTENSIONS = {'.h', '.hpp'}
 
 
@@ -59,6 +67,145 @@ def generate_info_header(ctx: CodeGenerationContext,
     ctx.write_file(filename, lines + additional_code)
 
 
+def get_vectorize_instruction_set(ctx: CodeGenerationContext):
+    """returns a list of supported vector instruction sets. If waLBerla is not build with
+       `WALBERLA_OPTIMIZE_FOR_LOCALHOST` `None` is returned.
+
+    Args:
+        ctx: Code Generation Context
+    """
+
+    if ctx.optimize_for_localhost:
+        supported_instruction_sets = get_supported_instruction_sets()
+        if supported_instruction_sets:
+            return supported_instruction_sets[-1]
+        else:  # if cpuinfo package is not installed
+            warnings.warn("Could not obtain supported vectorization instruction sets - defaulting to sse. "
+                          "This problem can probably be fixed by installing py-cpuinfo. This package can "
+                          "gather the needed hardware information.")
+            return 'sse'
+    else:
+        return None
+
+
+def config_from_context(ctx: CodeGenerationContext, target: Target = Target.CPU,
+                        data_type: Union[type, str, DefaultDict[str, BasicType], Dict[str, BasicType]] = None,
+                        cpu_openmp: Union[bool, int] = None, cpu_vectorize_info: Dict = None,
+                        **kwargs) -> CreateKernelConfig:
+    """Creates a :class: `pystencils.config.CreateKernelConfig` from the code generation context. By default,
+       all arguments are determined by the generation context. This means for example if `DWALBERLA_BUILD_WITH_GPU_SUPPORT` is
+       `True` the kernel will be generated for GPU using either CUDA or HIP.
+
+    Args:
+        ctx: Code Generation Context
+        target: All targets are defined in :class:`pystencils.enums.Target`
+        data_type: Data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name to
+                   type. If specified as a dict ideally a defaultdict is used to define a default value for symbols
+                   not listed in the dict. If a plain dict is provided it will be transformed into a defaultdict
+                   internally. The default value will then be specified via type collation then.
+        cpu_openmp: `True` or number of threads for OpenMP parallelization, `False` for no OpenMP.
+                     If set to `True`, the maximum number of available threads will be chosen.
+        cpu_vectorize_info: A dictionary with keys, 'vector_instruction_set', 'assume_aligned' and 'nontemporal'
+                            for documentation of these parameters see vectorize function. Example:
+                            '{'instruction_set': 'avx512', 'assume_aligned': True, 'nontemporal':True}'
+        kwargs: keyword arguments that can be taken by :class: `pystencils.config.CreateKernelConfig`
+    """
+
+    if target == Target.GPU and not ctx.gpu:
+        raise ValueError("can not generate gpu code if waLBerla is not build with GPU support. Please use "
+                         "-DWALBERLA_BUILD_WITH_CUDA=1 or -DWALBERLA_BUILD_WITH_HIP=1 for configuring cmake")
+
+    default_dtype = "float64" if ctx.double_accuracy else "float32"
+    if data_type is None:
+        data_type = default_dtype
+
+    if cpu_openmp and not ctx.openmp:
+        warnings.warn("Code is generated with OpenMP pragmas but waLBerla is not build with OpenMP. "
+                      "The compilation might not work due to wrong compiler flags. "
+                      "Please use -DWALBERLA_BUILD_WITH_OPENMP=1 for configuring cmake")
+
+    if cpu_openmp is None:
+        cpu_openmp = ctx.openmp
+
+    if cpu_vectorize_info is None:
+        cpu_vectorize_info = {}
+
+    default_vec_is = get_vectorize_instruction_set(ctx)
+
+    cpu_vectorize_info['instruction_set'] = cpu_vectorize_info.get('instruction_set', default_vec_is)
+    cpu_vectorize_info['assume_inner_stride_one'] = cpu_vectorize_info.get('assume_inner_stride_one', True)
+    cpu_vectorize_info['assume_aligned'] = cpu_vectorize_info.get('assume_aligned', False)
+    cpu_vectorize_info['nontemporal'] = cpu_vectorize_info.get('nontemporal', False)
+    cpu_vectorize_info['assume_sufficient_line_padding'] = cpu_vectorize_info.get('assume_sufficient_line_padding',
+                                                                                  False)
+
+    config = CreateKernelConfig(target=target, data_type=data_type, default_number_float=data_type,
+                                cpu_openmp=cpu_openmp, cpu_vectorize_info=cpu_vectorize_info,
+                                **kwargs)
+
+    return config
+
+
+def merge_sorted_lists(lx, ly, sort_key=lambda x: x, identity_check_key=None):
+    if identity_check_key is None:
+        identity_check_key = sort_key
+    nx = len(lx)
+    ny = len(ly)
+
+    def recursive_merge(lx_intern, ly_intern, ix_intern, iy_intern):
+        if ix_intern == nx:
+            return ly_intern[iy_intern:]
+        if iy_intern == ny:
+            return lx_intern[ix_intern:]
+        x = lx_intern[ix_intern]
+        y = ly_intern[iy_intern]
+        skx = sort_key(x)
+        sky = sort_key(y)
+        if skx == sky:
+            if identity_check_key(x) == identity_check_key(y):
+                return [x] + recursive_merge(lx_intern, ly_intern, ix_intern + 1, iy_intern + 1)
+            else:
+                raise ValueError(f'Elements <{x}> and <{y}> with equal sort key where not identical!')
+        elif skx < sky:
+            return [x] + recursive_merge(lx_intern, ly_intern, ix_intern + 1, iy_intern)
+        else:
+            return [y] + recursive_merge(lx_intern, ly_intern, ix_intern, iy_intern + 1)
+    return recursive_merge(lx, ly, 0, 0)
+
+
+def merge_lists_of_symbols(lists):
+    def merger(lx, ly):
+        return merge_sorted_lists(lx, ly, sort_key=lambda x: x.symbol.name, identity_check_key=lambda x: x.symbol)
+    return reduce(merger, lists)
+
+
+def struct_from_numpy_dtype(struct_name, numpy_dtype):
+    result = f"struct {struct_name} {{ \n"
+
+    equality_compare = []
+    constructor_params = []
+    constructor_initializer_list = []
+    for name, (sub_type, offset) in numpy_dtype.fields.items():
+        pystencils_type = create_type(sub_type)
+        result += f"    {pystencils_type} {name};\n"
+        if name in boundary_index_array_coordinate_names or name == direction_member_name:
+            constructor_params.append(f"{pystencils_type} {name}_")
+            constructor_initializer_list.append(f"{name}({name}_)")
+        else:
+            constructor_initializer_list.append(f"{name}()")
+        if pystencils_type.is_float():
+            equality_compare.append(f"floatIsEqual({name}, o.{name})")
+        else:
+            equality_compare.append(f"{name} == o.{name}")
+
+    result += "    %s(%s) : %s {}\n" % \
+              (struct_name, ", ".join(constructor_params), ", ".join(constructor_initializer_list))
+    result += "    bool operator==(const %s & o) const {\n        return %s;\n    }\n" % \
+              (struct_name, " && ".join(equality_compare))
+    result += "};\n"
+    return result
+
+
 #   ------------------------------------- INTERNAL -------------------------------------------------------------
 
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 891a92988bbd994f3d69c4deacac7ff08ce46362..92b465e32b32f8ec8396f8b6fb08767daadfa146 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -37,6 +37,7 @@ add_subdirectory( gather )
 add_subdirectory( geometry )
 add_subdirectory( gui )
 add_subdirectory( lbm )
+add_subdirectory( lbm_generated )
 add_subdirectory( lbm_mesapd_coupling )
 add_subdirectory( mesa_pd )
 if( OPENMESH_FOUND )
diff --git a/src/blockforest/Block.h b/src/blockforest/Block.h
index 64c7dafa70efecb428807de242ddf165e4417023..a61de6ac5c898c0ea1f8bb6a28f0a7b7f33fe002 100644
--- a/src/blockforest/Block.h
+++ b/src/blockforest/Block.h
@@ -270,21 +270,21 @@ inline bool Block::neighborhoodSectionHasSmallerBlocks( const uint_t sectionInde
 {
    WALBERLA_ASSERT_LESS( sectionIndex, 26 );
 
-   return !neighborhoodSection_[sectionIndex].empty() && neighborhoodSection_[sectionIndex][0]->id_.getUsedBits() > id_.getUsedBits();
+   return neighborhoodSectionHasBlocks(sectionIndex) && neighborhoodSection_[sectionIndex][0]->id_.getUsedBits() > id_.getUsedBits();
 }
 
 inline bool Block::neighborhoodSectionHasEquallySizedBlock( const uint_t sectionIndex ) const
 {
    WALBERLA_ASSERT_LESS( sectionIndex, 26 );
 
-   return !neighborhoodSection_[sectionIndex].empty() && neighborhoodSection_[sectionIndex][0]->id_.getUsedBits() == id_.getUsedBits();
+   return neighborhoodSectionHasBlocks(sectionIndex) && neighborhoodSection_[sectionIndex][0]->id_.getUsedBits() == id_.getUsedBits();
 }
 
 inline bool Block::neighborhoodSectionHasLargerBlock( const uint_t sectionIndex ) const
 {
    WALBERLA_ASSERT_LESS( sectionIndex, 26 );
 
-   return !neighborhoodSection_[sectionIndex].empty() && neighborhoodSection_[sectionIndex][0]->id_.getUsedBits() < id_.getUsedBits();
+   return neighborhoodSectionHasBlocks(sectionIndex) && neighborhoodSection_[sectionIndex][0]->id_.getUsedBits() < id_.getUsedBits();
 }
 
 
diff --git a/src/blockforest/BlockDataHandling.h b/src/blockforest/BlockDataHandling.h
index 7f56467c06b9eebf033753847020843a14a264c3..71e0138be0dd8c3b5af16769c32ffc3ac6e9f386 100644
--- a/src/blockforest/BlockDataHandling.h
+++ b/src/blockforest/BlockDataHandling.h
@@ -122,65 +122,65 @@ public:
    
    BlockData * initialize( IBlock * const block ) override
    {
-      WALBERLA_ASSERT_NOT_NULLPTR( block );
+      WALBERLA_ASSERT_NOT_NULLPTR( block )
       T * ptr = dataHandling_->initialize( block );
       return ptr ? new BlockData( ptr ) : nullptr;
    }
    
    void serialize( IBlock * const block, const BlockDataID & id, mpi::SendBuffer & buffer ) override
    {
-      WALBERLA_ASSERT_NOT_NULLPTR( block );
+      WALBERLA_ASSERT_NOT_NULLPTR( block )
       dataHandling_->serialize( block, id, buffer );
    }
    
    void serializeCoarseToFine( Block * const block, const BlockDataID & id, mpi::SendBuffer & buffer, const uint_t child ) override
    {
-      WALBERLA_ASSERT_NOT_NULLPTR( block );
+      WALBERLA_ASSERT_NOT_NULLPTR( block )
       dataHandling_->serializeCoarseToFine( block, id, buffer, child );
    }
    
    void serializeFineToCoarse( Block * const block, const BlockDataID & id, mpi::SendBuffer & buffer ) override
    {
-      WALBERLA_ASSERT_NOT_NULLPTR( block );
+      WALBERLA_ASSERT_NOT_NULLPTR( block )
       dataHandling_->serializeFineToCoarse( block, id, buffer );
    }
    
    BlockData * deserialize( IBlock * const block ) override
    {
-      WALBERLA_ASSERT_NOT_NULLPTR( block );
+      WALBERLA_ASSERT_NOT_NULLPTR( block )
       T * ptr = dataHandling_->deserialize( block );
       return ptr ? new BlockData( ptr ) : nullptr;
    }
    
    BlockData * deserializeCoarseToFine( Block * const block ) override
    {
-      WALBERLA_ASSERT_NOT_NULLPTR( block );
+      WALBERLA_ASSERT_NOT_NULLPTR( block )
       T * ptr = dataHandling_->deserializeCoarseToFine( block );
       return ptr ? new BlockData( ptr ) : nullptr;
    }
    
    BlockData * deserializeFineToCoarse( Block * const block ) override
    {
-      WALBERLA_ASSERT_NOT_NULLPTR( block );
+      WALBERLA_ASSERT_NOT_NULLPTR( block )
       T * ptr = dataHandling_->deserializeFineToCoarse( block );
       return ptr ? new BlockData( ptr ) : nullptr;
    }
    
    void deserialize( IBlock * const block, const BlockDataID & id, mpi::RecvBuffer & buffer ) override
    {
-      WALBERLA_ASSERT_NOT_NULLPTR( block );
+      WALBERLA_ASSERT_NOT_NULLPTR( block )
       dataHandling_->deserialize( block, id, buffer );
    }
    
    void deserializeCoarseToFine( Block * const block, const BlockDataID & id, mpi::RecvBuffer & buffer ) override
    {
-      WALBERLA_ASSERT_NOT_NULLPTR( block );
+      WALBERLA_ASSERT_NOT_NULLPTR( block )
       dataHandling_->deserializeCoarseToFine( block, id, buffer );
    }   
    
    void deserializeFineToCoarse( Block * const block, const BlockDataID & id, mpi::RecvBuffer & buffer, const uint_t child ) override
    {
-      WALBERLA_ASSERT_NOT_NULLPTR( block );
+      WALBERLA_ASSERT_NOT_NULLPTR( block )
       dataHandling_->deserializeFineToCoarse( block, id, buffer, child );
    }
    
diff --git a/src/blockforest/communication/NonUniformBufferedScheme.h b/src/blockforest/communication/NonUniformBufferedScheme.h
index caf91651c578ddc7da5bea5b8a67398e8cc590ee..be27a51ec805285144983d2d3a3618c502596d50 100644
--- a/src/blockforest/communication/NonUniformBufferedScheme.h
+++ b/src/blockforest/communication/NonUniformBufferedScheme.h
@@ -65,10 +65,10 @@ public:
    //**Construction & Destruction***************************************************************************************
    /*! \name Construction & Destruction */
    //@{
-   explicit NonUniformBufferedScheme( weak_ptr<StructuredBlockForest> bf,
+   explicit NonUniformBufferedScheme( const weak_ptr<StructuredBlockForest>& bf,
                                       const int baseTag = 778 ); // waLBerla = 119+97+76+66+101+114+108+97
 
-   NonUniformBufferedScheme( weak_ptr<StructuredBlockForest> bf,
+   NonUniformBufferedScheme( const weak_ptr<StructuredBlockForest>& bf,
                              const Set<SUID> & requiredBlockSelectors, 
                              const Set<SUID> & incompatibleBlockSelectors,
                              const int baseTag = 778 ); // waLBerla = 119+97+76+66+101+114+108+97
@@ -96,6 +96,16 @@ public:
    inline void communicateEqualLevel  ( const uint_t level );
    inline void communicateCoarseToFine( const uint_t fineLevel );
    inline void communicateFineToCoarse( const uint_t fineLevel );
+
+   std::function<void()>  communicateEqualLevelFunctor(const uint_t level) {
+      return [level, this](){ NonUniformBufferedScheme::communicateEqualLevel(level);};
+   }
+   std::function<void()>  communicateCoarseToFineFunctor(const uint_t fineLevel) {
+      return [fineLevel, this](){ NonUniformBufferedScheme::communicateCoarseToFine(fineLevel);};
+   }
+   std::function<void()>  communicateFineToCoarseFunctor(const uint_t fineLevel) {
+      return [fineLevel, this](){ NonUniformBufferedScheme::communicateFineToCoarse(fineLevel);};
+   }
    //@}
    //*******************************************************************************************************************
    
@@ -190,7 +200,7 @@ protected:
 
 
 template< typename Stencil >
-NonUniformBufferedScheme<Stencil>::NonUniformBufferedScheme( weak_ptr<StructuredBlockForest> bf, const int baseTag )
+NonUniformBufferedScheme<Stencil>::NonUniformBufferedScheme( const weak_ptr<StructuredBlockForest>& bf, const int baseTag )
    : blockForest_( bf ), localMode_( START ), baseTag_( baseTag ),
      requiredBlockSelectors_( Set<SUID>::emptySet() ), incompatibleBlockSelectors_( Set<SUID>::emptySet() )
 {
@@ -200,7 +210,7 @@ NonUniformBufferedScheme<Stencil>::NonUniformBufferedScheme( weak_ptr<Structured
 
 
 template< typename Stencil >
-NonUniformBufferedScheme<Stencil>::NonUniformBufferedScheme( weak_ptr<StructuredBlockForest> bf,
+NonUniformBufferedScheme<Stencil>::NonUniformBufferedScheme( const weak_ptr<StructuredBlockForest>& bf,
                                                              const Set<SUID> & requiredBlockSelectors, 
                                                              const Set<SUID> & incompatibleBlockSelectors,
                                                              const int baseTag /*= 778*/ ) // waLBerla = 119+97+76+66+101+114+108+97
@@ -236,10 +246,10 @@ void NonUniformBufferedScheme<Stencil>::init()
 template< typename Stencil >
 void NonUniformBufferedScheme<Stencil>::refresh()
 {
-   WALBERLA_ASSERT( !isAnyCommunicationInProgress() );
+   WALBERLA_ASSERT( !isAnyCommunicationInProgress() )
 
    auto forest = blockForest_.lock();
-   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" );
+   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" )
    const uint_t levels = forest->getNumberOfLevels();
 
    for( uint_t i = 0; i != 3; ++i )
@@ -296,7 +306,7 @@ inline void NonUniformBufferedScheme<Stencil>::addPackInfo( const PackInfo & pac
 {
    if( isAnyCommunicationInProgress() )
    {
-      WALBERLA_ABORT( "You may not add a PackInfo to a NonUniformBufferedScheme if any communication is in progress!" );
+      WALBERLA_ABORT( "You may not add a PackInfo to a NonUniformBufferedScheme if any communication is in progress!" )
    }
 
    packInfos_.push_back( packInfo );
@@ -381,7 +391,7 @@ template< typename Stencil >
 inline void NonUniformBufferedScheme<Stencil>::startCommunicateEqualLevel()
 {
    auto forest = blockForest_.lock();
-   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" );
+   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" )
    const uint_t levelIndex = forest->getNumberOfLevels();
 
    if( forestModificationStamp_ != forest->getBlockForest().getModificationStamp() )
@@ -400,7 +410,7 @@ template< typename Stencil >
 inline void NonUniformBufferedScheme<Stencil>::startCommunicateCoarseToFine()
 {
    auto forest = blockForest_.lock();
-   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" );
+   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" )
    const uint_t levelIndex = forest->getNumberOfLevels();
 
    if( levelIndex == 1 )
@@ -421,7 +431,7 @@ template< typename Stencil >
 inline void NonUniformBufferedScheme<Stencil>::startCommunicateFineToCoarse()
 {
    auto forest = blockForest_.lock();
-   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" );
+   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" )
    const uint_t levelIndex = forest->getNumberOfLevels();
    
    if( levelIndex == 1 )
@@ -442,8 +452,8 @@ template< typename Stencil >
 inline void NonUniformBufferedScheme<Stencil>::startCommunicateEqualLevel( const uint_t level )
 {
    auto forest = blockForest_.lock();
-   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" );
-   WALBERLA_ASSERT_LESS( level, forest->getNumberOfLevels() );
+   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" )
+   WALBERLA_ASSERT_LESS( level, forest->getNumberOfLevels() )
 
    if( forestModificationStamp_ != forest->getBlockForest().getModificationStamp() )
       refresh();
@@ -460,9 +470,9 @@ template< typename Stencil >
 inline void NonUniformBufferedScheme<Stencil>::startCommunicateCoarseToFine( const uint_t fineLevel )
 {
    auto forest = blockForest_.lock();
-   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" );
-   WALBERLA_ASSERT_GREATER( fineLevel, uint_t(0) );
-   WALBERLA_ASSERT_LESS( fineLevel, forest->getNumberOfLevels() );
+   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" )
+   WALBERLA_ASSERT_GREATER( fineLevel, uint_t(0) )
+   WALBERLA_ASSERT_LESS( fineLevel, forest->getNumberOfLevels() )
 
    if( forestModificationStamp_ != forest->getBlockForest().getModificationStamp() )
       refresh();
@@ -479,9 +489,9 @@ template< typename Stencil >
 inline void NonUniformBufferedScheme<Stencil>::startCommunicateFineToCoarse( const uint_t fineLevel )
 {
    auto forest = blockForest_.lock();
-   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" );
-   WALBERLA_ASSERT_GREATER( fineLevel, uint_t(0) );
-   WALBERLA_ASSERT_LESS( fineLevel, forest->getNumberOfLevels() );
+   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" )
+   WALBERLA_ASSERT_GREATER( fineLevel, uint_t(0) )
+   WALBERLA_ASSERT_LESS( fineLevel, forest->getNumberOfLevels() )
 
    if( forestModificationStamp_ != forest->getBlockForest().getModificationStamp() )
       refresh();
@@ -498,10 +508,10 @@ template< typename Stencil >
 inline void NonUniformBufferedScheme<Stencil>::waitCommunicateEqualLevel()
 {
    auto forest = blockForest_.lock();
-   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" );
+   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" )
    const uint_t levelIndex = forest->getNumberOfLevels();
-   WALBERLA_ASSERT_EQUAL( levelIndex, bufferSystem_[EQUAL_LEVEL].size() - uint_t(1) );
-   WALBERLA_ASSERT_EQUAL( forestModificationStamp_, forest->getBlockForest().getModificationStamp() );
+   WALBERLA_ASSERT_EQUAL( levelIndex, bufferSystem_[EQUAL_LEVEL].size() - uint_t(1) )
+   WALBERLA_ASSERT_EQUAL( forestModificationStamp_, forest->getBlockForest().getModificationStamp() )
 
    wait( EQUAL_LEVEL, levelIndex );
 }
@@ -512,10 +522,10 @@ template< typename Stencil >
 inline void NonUniformBufferedScheme<Stencil>::waitCommunicateCoarseToFine()
 {
    auto forest = blockForest_.lock();
-   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" );
+   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" )
    const uint_t levelIndex = forest->getNumberOfLevels();
-   WALBERLA_ASSERT_EQUAL( levelIndex, bufferSystem_[COARSE_TO_FINE].size() - uint_t(1) );
-   WALBERLA_ASSERT_EQUAL( forestModificationStamp_, forest->getBlockForest().getModificationStamp() );
+   WALBERLA_ASSERT_EQUAL( levelIndex, bufferSystem_[COARSE_TO_FINE].size() - uint_t(1) )
+   WALBERLA_ASSERT_EQUAL( forestModificationStamp_, forest->getBlockForest().getModificationStamp() )
 
    if( levelIndex == 1 )
       return;
@@ -529,10 +539,10 @@ template< typename Stencil >
 inline void NonUniformBufferedScheme<Stencil>::waitCommunicateFineToCoarse()
 {
    auto forest = blockForest_.lock();
-   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" );
+   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" )
    const uint_t levelIndex = forest->getNumberOfLevels();
-   WALBERLA_ASSERT_EQUAL( levelIndex, bufferSystem_[FINE_TO_COARSE].size() - uint_t(1) );
-   WALBERLA_ASSERT_EQUAL( forestModificationStamp_, forest->getBlockForest().getModificationStamp() );
+   WALBERLA_ASSERT_EQUAL( levelIndex, bufferSystem_[FINE_TO_COARSE].size() - uint_t(1) )
+   WALBERLA_ASSERT_EQUAL( forestModificationStamp_, forest->getBlockForest().getModificationStamp() )
 
    if( levelIndex == 1 )
       return;
@@ -546,10 +556,10 @@ template< typename Stencil >
 inline void NonUniformBufferedScheme<Stencil>::waitCommunicateEqualLevel  ( const uint_t level )
 {
    auto forest = blockForest_.lock();
-   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" );
-   WALBERLA_ASSERT_LESS( level, forest->getNumberOfLevels() );
-   WALBERLA_ASSERT_EQUAL( forest->getNumberOfLevels(), bufferSystem_[EQUAL_LEVEL].size() - uint_t(1) );
-   WALBERLA_ASSERT_EQUAL( forestModificationStamp_, forest->getBlockForest().getModificationStamp() );
+   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" )
+   WALBERLA_ASSERT_LESS( level, forest->getNumberOfLevels() )
+   WALBERLA_ASSERT_EQUAL( forest->getNumberOfLevels(), bufferSystem_[EQUAL_LEVEL].size() - uint_t(1) )
+   WALBERLA_ASSERT_EQUAL( forestModificationStamp_, forest->getBlockForest().getModificationStamp() )
 
    wait( EQUAL_LEVEL, level );
 }
@@ -560,11 +570,11 @@ template< typename Stencil >
 inline void NonUniformBufferedScheme<Stencil>::waitCommunicateCoarseToFine( const uint_t fineLevel )
 {
    auto forest = blockForest_.lock();
-   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" );
-   WALBERLA_ASSERT_GREATER( fineLevel, uint_t(0) );
-   WALBERLA_ASSERT_LESS( fineLevel, forest->getNumberOfLevels() );
-   WALBERLA_ASSERT_EQUAL( forest->getNumberOfLevels(), bufferSystem_[COARSE_TO_FINE].size() - uint_t(1) );
-   WALBERLA_ASSERT_EQUAL( forestModificationStamp_, forest->getBlockForest().getModificationStamp() );
+   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" )
+   WALBERLA_ASSERT_GREATER( fineLevel, uint_t(0) )
+   WALBERLA_ASSERT_LESS( fineLevel, forest->getNumberOfLevels() )
+   WALBERLA_ASSERT_EQUAL( forest->getNumberOfLevels(), bufferSystem_[COARSE_TO_FINE].size() - uint_t(1) )
+   WALBERLA_ASSERT_EQUAL( forestModificationStamp_, forest->getBlockForest().getModificationStamp() )
 
    wait( COARSE_TO_FINE, fineLevel );
 }
@@ -575,11 +585,11 @@ template< typename Stencil >
 inline void NonUniformBufferedScheme<Stencil>::waitCommunicateFineToCoarse( const uint_t fineLevel )
 {
    auto forest = blockForest_.lock();
-   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" );
-   WALBERLA_ASSERT_GREATER( fineLevel, uint_t(0) );
-   WALBERLA_ASSERT_LESS( fineLevel, forest->getNumberOfLevels() );
-   WALBERLA_ASSERT_EQUAL( forest->getNumberOfLevels(), bufferSystem_[FINE_TO_COARSE].size() - uint_t(1) );
-   WALBERLA_ASSERT_EQUAL( forestModificationStamp_, forest->getBlockForest().getModificationStamp() );
+   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" )
+   WALBERLA_ASSERT_GREATER( fineLevel, uint_t(0) )
+   WALBERLA_ASSERT_LESS( fineLevel, forest->getNumberOfLevels() )
+   WALBERLA_ASSERT_EQUAL( forest->getNumberOfLevels(), bufferSystem_[FINE_TO_COARSE].size() - uint_t(1) )
+   WALBERLA_ASSERT_EQUAL( forestModificationStamp_, forest->getBlockForest().getModificationStamp() )
 
    wait( FINE_TO_COARSE, fineLevel );
 }
@@ -619,7 +629,7 @@ void NonUniformBufferedScheme<Stencil>::startCommunicationEqualLevel( const uint
       std::map< uint_t, std::vector< SendBufferFunction > > sendFunctions;
 
       auto forest = blockForest_.lock();
-      WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" );
+      WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" )
 
       for( auto it = forest->begin(); it != forest->end(); ++it )
       {
@@ -638,7 +648,7 @@ void NonUniformBufferedScheme<Stencil>::startCommunicationEqualLevel( const uint
             if( !( block->neighborhoodSectionHasEquallySizedBlock(neighborIdx) ) )
                continue;
 
-            WALBERLA_ASSERT_EQUAL( block->getNeighborhoodSectionSize(neighborIdx), uint_t(1) );
+            WALBERLA_ASSERT_EQUAL( block->getNeighborhoodSectionSize(neighborIdx), uint_t(1) )
 
             const BlockID & receiverId = block->getNeighborId( neighborIdx, uint_t(0) );
 
@@ -648,13 +658,13 @@ void NonUniformBufferedScheme<Stencil>::startCommunicationEqualLevel( const uint
             if( block->neighborExistsLocally( neighborIdx, uint_t(0) ) )
             {
                auto neighbor = dynamic_cast< Block * >( forest->getBlock(receiverId) );
-               WALBERLA_ASSERT_EQUAL( neighbor->getProcess(), block->getProcess() );
+               WALBERLA_ASSERT_EQUAL( neighbor->getProcess(), block->getProcess() )
 
                for( auto packInfo = packInfos_.begin(); packInfo != packInfos_.end(); ++packInfo )
                {
                   if( localMode_ == BUFFER )
                   {
-                     SendBuffer buffer;
+                     SendBuffer const buffer;
                      localBuffers.push_back( buffer );
                      const uint_t bufferIndex = uint_c( localBuffers.size() ) - uint_t(1);
 
@@ -745,7 +755,7 @@ void NonUniformBufferedScheme<Stencil>::startCommunicationCoarseToFine( const ui
       std::set< uint_t > ranksToReceiveFrom;
 
       auto forest = blockForest_.lock();
-      WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" );
+      WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" )
       for( auto it = forest->begin(); it != forest->end(); ++it )
       {
          Block * block = dynamic_cast< Block * >( it.get() );
@@ -774,13 +784,13 @@ void NonUniformBufferedScheme<Stencil>::startCommunicationCoarseToFine( const ui
                   if( block->neighborExistsLocally( neighborIdx, n ) )
                   {
                      auto neighbor = dynamic_cast< Block * >( forest->getBlock(receiverId) );
-                     WALBERLA_ASSERT_EQUAL( neighbor->getProcess(), block->getProcess() );
+                     WALBERLA_ASSERT_EQUAL( neighbor->getProcess(), block->getProcess() )
 
                      for( auto packInfo = packInfos_.begin(); packInfo != packInfos_.end(); ++packInfo )
                      {
                         if( localMode_ == BUFFER )
                         {
-                           SendBuffer buffer;
+                           SendBuffer const buffer;
                            localBuffers.push_back( buffer );
                            const uint_t bufferIndex = uint_c( localBuffers.size() ) - uint_t(1);
 
@@ -829,7 +839,7 @@ void NonUniformBufferedScheme<Stencil>::startCommunicationCoarseToFine( const ui
                const auto neighborIdx = blockforest::getBlockNeighborhoodSectionIndex( *dir );
                if( block->neighborhoodSectionHasLargerBlock(neighborIdx) )
                {
-                  WALBERLA_ASSERT_EQUAL( block->getNeighborhoodSectionSize(neighborIdx), uint_t(1) );
+                  WALBERLA_ASSERT_EQUAL( block->getNeighborhoodSectionSize(neighborIdx), uint_t(1) )
                   if( block->neighborExistsRemotely( neighborIdx, uint_t(0) ) &&
                       selectable::isSetSelected( block->getNeighborState( neighborIdx, 0 ), requiredBlockSelectors_, incompatibleBlockSelectors_ ) )
                   {
@@ -890,7 +900,7 @@ void NonUniformBufferedScheme<Stencil>::startCommunicationFineToCoarse( const ui
       std::set< uint_t > ranksToReceiveFrom;
 
       auto forest = blockForest_.lock();
-      WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" );
+      WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" )
 
       for( auto it = forest->begin(); it != forest->end(); ++it )
       {
@@ -910,7 +920,7 @@ void NonUniformBufferedScheme<Stencil>::startCommunicationFineToCoarse( const ui
                if( !( block->neighborhoodSectionHasLargerBlock(neighborIdx) ) )
                   continue;
 
-               WALBERLA_ASSERT_EQUAL( block->getNeighborhoodSectionSize(neighborIdx), uint_t(1) );
+               WALBERLA_ASSERT_EQUAL( block->getNeighborhoodSectionSize(neighborIdx), uint_t(1) )
 
                const BlockID & receiverId = block->getNeighborId( neighborIdx, uint_t(0) );
 
@@ -920,13 +930,13 @@ void NonUniformBufferedScheme<Stencil>::startCommunicationFineToCoarse( const ui
                if( block->neighborExistsLocally( neighborIdx, uint_t(0) ) )
                {
                   auto neighbor = dynamic_cast< Block * >( forest->getBlock(receiverId) );
-                  WALBERLA_ASSERT_EQUAL( neighbor->getProcess(), block->getProcess() );
+                  WALBERLA_ASSERT_EQUAL( neighbor->getProcess(), block->getProcess() )
 
                   for( auto packInfo = packInfos_.begin(); packInfo != packInfos_.end(); ++packInfo )
                   {
                      if( localMode_ == BUFFER )
                      {
-                        SendBuffer buffer;
+                        SendBuffer const buffer;
                         localBuffers.push_back( buffer );
                         const uint_t bufferIndex = uint_c( localBuffers.size() ) - uint_t(1);
 
@@ -1144,7 +1154,7 @@ template< typename Stencil >
 void NonUniformBufferedScheme<Stencil>::receive( RecvBuffer & buffer )
 {
    auto forest = blockForest_.lock();
-   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" );
+   WALBERLA_CHECK_NOT_NULLPTR( forest, "Trying to access communication for a block storage object that doesn't exist anymore" )
 
    while( !buffer.isEmpty() )
    {
@@ -1183,7 +1193,7 @@ template< typename Stencil >
 void NonUniformBufferedScheme<Stencil>::localBufferPacking( const INDEX i, const uint_t j, const uint_t bufferIndex, const PackInfo & packInfo,
                                                             const Block * sender, const Block * receiver, const stencil::Direction & dir )
 {
-   WALBERLA_ASSERT_LESS( bufferIndex, localBuffers_[i][j].size() );
+   WALBERLA_ASSERT_LESS( bufferIndex, localBuffers_[i][j].size() )
 
    SendBuffer & buffer = localBuffers_[i][j][ bufferIndex ];
    buffer.clear();
@@ -1198,7 +1208,7 @@ void NonUniformBufferedScheme<Stencil>::localBufferPacking( const INDEX i, const
    }
    else
    {
-      WALBERLA_ASSERT( i == FINE_TO_COARSE );
+      WALBERLA_ASSERT( i == FINE_TO_COARSE )
       packInfo->packDataFineToCoarse( sender, receiver->getId(), dir, buffer );
    }
 }
@@ -1209,7 +1219,7 @@ template< typename Stencil >
 void NonUniformBufferedScheme<Stencil>::localBufferUnpacking( const INDEX i, const uint_t j, const uint_t bufferIndex, const PackInfo & packInfo,
                                                               Block * receiver, const Block * sender, const stencil::Direction & dir )
 {
-   WALBERLA_ASSERT_LESS( bufferIndex, localBuffers_[i][j].size() );
+   WALBERLA_ASSERT_LESS( bufferIndex, localBuffers_[i][j].size() )
 
    SendBuffer & sendBuffer = localBuffers_[i][j][ bufferIndex ];
    RecvBuffer recvBuffer( sendBuffer );
@@ -1224,7 +1234,7 @@ void NonUniformBufferedScheme<Stencil>::localBufferUnpacking( const INDEX i, con
    }
    else
    {
-      WALBERLA_ASSERT( i == FINE_TO_COARSE );
+      WALBERLA_ASSERT( i == FINE_TO_COARSE )
       packInfo->unpackDataFineToCoarse( receiver, sender->getId(), stencil::inverseDir[dir], recvBuffer );
    }
 }
diff --git a/src/blockforest/communication/NonUniformPackInfo.h b/src/blockforest/communication/NonUniformPackInfo.h
index 0b32369c654e4ca9642d88f5d85763f880b7e55d..73c3f760fbfb54b3af1be35fdd2d633e3495269e 100644
--- a/src/blockforest/communication/NonUniformPackInfo.h
+++ b/src/blockforest/communication/NonUniformPackInfo.h
@@ -106,13 +106,13 @@ protected:
 inline void NonUniformPackInfo::packDataEqualLevel( const Block * sender, stencil::Direction dir, mpi::SendBuffer & buffer ) const
 {
 #ifndef NDEBUG
-   size_t sizeBefore = buffer.size();
+   size_t const sizeBefore = buffer.size();
 #endif
 
    packDataEqualLevelImpl( sender, dir, buffer );
 
 #ifndef NDEBUG
-   size_t sizeAfter = buffer.size();
+   size_t const sizeAfter = buffer.size();
    if( constantDataExchange() )
    {
 #ifdef _OPENMP
@@ -125,7 +125,7 @@ inline void NonUniformPackInfo::packDataEqualLevel( const Block * sender, stenci
       if( dirEntry == sizeMap.end() )
          sizeMap[ uint_t(0) ] = sizeAfter - sizeBefore;
       else
-         WALBERLA_ASSERT_EQUAL( sizeMap[ uint_t(0) ], (sizeAfter - sizeBefore) );
+         WALBERLA_ASSERT_EQUAL( sizeMap[ uint_t(0) ], (sizeAfter - sizeBefore) )
 #ifdef _OPENMP
       }
 #endif
@@ -138,13 +138,13 @@ inline void NonUniformPackInfo::packDataEqualLevel( const Block * sender, stenci
 inline void NonUniformPackInfo::packDataCoarseToFine( const Block * coarseSender, const BlockID & fineReceiver, stencil::Direction dir, mpi::SendBuffer & buffer ) const
 {
 #ifndef NDEBUG
-   size_t sizeBefore = buffer.size();
+   size_t const sizeBefore = buffer.size();
 #endif
 
    packDataCoarseToFineImpl( coarseSender, fineReceiver, dir, buffer );
 
 #ifndef NDEBUG
-   size_t sizeAfter = buffer.size();
+   size_t const sizeAfter = buffer.size();
    if( constantDataExchange() )
    {
 #ifdef _OPENMP
@@ -157,7 +157,7 @@ inline void NonUniformPackInfo::packDataCoarseToFine( const Block * coarseSender
       if( dirEntry == sizeMap.end() )
          sizeMap[ fineReceiver.getBranchId() ] = sizeAfter - sizeBefore;
       else
-         WALBERLA_ASSERT_EQUAL( sizeMap[ fineReceiver.getBranchId() ], (sizeAfter - sizeBefore) );
+         WALBERLA_ASSERT_EQUAL( sizeMap[ fineReceiver.getBranchId() ], (sizeAfter - sizeBefore) )
 #ifdef _OPENMP
       }
 #endif
@@ -170,13 +170,13 @@ inline void NonUniformPackInfo::packDataCoarseToFine( const Block * coarseSender
 inline void NonUniformPackInfo::packDataFineToCoarse( const Block * fineSender, const BlockID & coarseReceiver, stencil::Direction dir, mpi::SendBuffer & buffer ) const
 {
 #ifndef NDEBUG
-   size_t sizeBefore = buffer.size();
+   size_t const sizeBefore = buffer.size();
 #endif
 
    packDataFineToCoarseImpl( fineSender, coarseReceiver, dir, buffer );
 
 #ifndef NDEBUG
-   size_t sizeAfter = buffer.size();
+   size_t const sizeAfter = buffer.size();
    if( constantDataExchange() )
    {
 #ifdef _OPENMP
@@ -189,7 +189,7 @@ inline void NonUniformPackInfo::packDataFineToCoarse( const Block * fineSender,
       if( dirEntry == sizeMap.end() )
          sizeMap[ uint_t(0) ] = sizeAfter - sizeBefore;
       else
-         WALBERLA_ASSERT_EQUAL( sizeMap[ uint_t(0) ], (sizeAfter - sizeBefore) );
+         WALBERLA_ASSERT_EQUAL( sizeMap[ uint_t(0) ], (sizeAfter - sizeBefore) )
 #ifdef _OPENMP
       }
 #endif
diff --git a/src/blockforest/communication/UniformBufferedScheme.h b/src/blockforest/communication/UniformBufferedScheme.h
index 8677b5f83afe7c4ba8bb047fbd95724bc7ce3ac3..7bc813cc5067a8d4336bc376dc643314defa1816 100644
--- a/src/blockforest/communication/UniformBufferedScheme.h
+++ b/src/blockforest/communication/UniformBufferedScheme.h
@@ -314,7 +314,7 @@ void UniformBufferedScheme<Stencil>::startCommunication()
                {
                   if( localMode_ == BUFFER )
                   {
-                     SendBuffer buffer;
+                     SendBuffer const buffer;
                      localBuffers_.push_back( buffer );
                      const uint_t index = uint_c( localBuffers_.size() ) - uint_t(1);
 
diff --git a/src/communication/UniformPackInfo.h b/src/communication/UniformPackInfo.h
index 5ec6db29d32dff36713ab903498048b450f748f2..aa110f9bdf5c51b37a57572cfbc800b004ab37b6 100644
--- a/src/communication/UniformPackInfo.h
+++ b/src/communication/UniformPackInfo.h
@@ -153,13 +153,13 @@ protected:
 inline void UniformPackInfo::packData( const IBlock * sender, stencil::Direction dir, mpi::SendBuffer & buffer ) const
 {
 #ifndef NDEBUG
-   size_t sizeBefore = buffer.size();
+   size_t const sizeBefore = buffer.size();
 #endif
 
    packDataImpl( sender, dir, buffer );
 
 #ifndef NDEBUG
-   size_t sizeAfter = buffer.size();
+   size_t const sizeAfter = buffer.size();
    if( constantDataExchange() )
    {
 #ifdef _OPENMP
@@ -171,7 +171,7 @@ inline void UniformPackInfo::packData( const IBlock * sender, stencil::Direction
       if( dirEntry == blockMap.end() )
          blockMap[ dir ] = sizeAfter - sizeBefore;
       else
-         WALBERLA_ASSERT_EQUAL( blockMap[ dir ], (sizeAfter - sizeBefore) );
+         WALBERLA_ASSERT_EQUAL( blockMap[ dir ], (sizeAfter - sizeBefore) )
 #ifdef _OPENMP
       }
 #endif
diff --git a/src/core/cell/Cell.h b/src/core/cell/Cell.h
index 8f41297b78a1ff66d4cc7a9f39f98692d11d1b49..f531430ea1733cca2ccf650a944bd92301d623cd 100644
--- a/src/core/cell/Cell.h
+++ b/src/core/cell/Cell.h
@@ -50,9 +50,10 @@ public:
    //@{
    Cell() = default;
    inline Cell( const cell_idx_t _x, const cell_idx_t _y, const cell_idx_t _z ) { cell[0] = _x; cell[1] = _y; cell[2] = _z; }
- //inline Cell( const int        _x, const int        _y, const int        _z );
+   inline Cell( const Vector3<cell_idx_t> _vec ) {cell[0] = _vec[0]; cell[1] = _vec[1]; cell[2] = _vec[2];}
+
    inline Cell( const uint_t     _x, const uint_t     _y, const uint_t     _z );
-   inline Cell( const Vector3<cell_idx_t>& vec ){ cell[0] = vec[0]; cell[1] = vec[1]; cell[2] = vec[2]; };
+   inline Cell( const Vector3<uint_t> _vec );
    //@}
 
    /*! \name Arithmetic operators */
@@ -102,15 +103,6 @@ std::ostream & operator<<( std::ostream & os, const Cell & cell );
 std::istream & operator>>( std::istream & is,       Cell & cell );
 //@}
 
-
-
-// inline Cell::Cell( const int _x, const int _y, const int _z ) {
-//
-//    x() = cell_idx_c( _x ); y() = cell_idx_c( _y ); z() = cell_idx_c( _z );
-// }
-
-
-
 inline Cell::Cell( const uint_t _x, const uint_t _y, const uint_t _z )
 {
    cell[0] = cell_idx_c( _x );
@@ -118,6 +110,12 @@ inline Cell::Cell( const uint_t _x, const uint_t _y, const uint_t _z )
    cell[2] = cell_idx_c( _z );
 }
 
+inline Cell::Cell( const Vector3<uint_t> _vec )
+{
+   cell[0] = cell_idx_c( _vec[0] );
+   cell[1] = cell_idx_c( _vec[1] );
+   cell[2] = cell_idx_c( _vec[2] );
+}
 
 
 /*******************************************************************************************************************//**
diff --git a/src/core/mpi/BufferSystem.h b/src/core/mpi/BufferSystem.h
index 6a531fa907e6f95ceb09d29b6ce31ecb9cbecf2e..04161810a408aab7bc0baf6ddb6159714333494a 100644
--- a/src/core/mpi/BufferSystem.h
+++ b/src/core/mpi/BufferSystem.h
@@ -151,7 +151,7 @@ public:
    void sendAll();
    void send( MPIRank rank );
 
-   iterator begin() { WALBERLA_ASSERT( communicationRunning_); return iterator( *this, true ); }
+   iterator begin() { WALBERLA_ASSERT( communicationRunning_) return iterator( *this, true ); }
    iterator end()   {                                          return iterator( *this, false); }
    //@}
    //*******************************************************************************************************************
@@ -190,7 +190,7 @@ public:
    //@{
    bool isSizeCommunicatedInNextStep() const { return (currentComm_ == &unknownSizeComm_); }
    bool isCommunicationRunning() const       { return communicationRunning_;               }
-   bool isReceiverInformationSet() const     { return currentComm_ != NULL;                }
+   bool isReceiverInformationSet() const     { return currentComm_ != nullptr;                }
    //@}
    //*******************************************************************************************************************
 
diff --git a/src/core/mpi/BufferSystem.impl.h b/src/core/mpi/BufferSystem.impl.h
index 4cbd884ba791bb4d07e3591dc9a6a0101ad657ff..183d29bd86b7412090916e0b2be92c3dc7a4c352 100644
--- a/src/core/mpi/BufferSystem.impl.h
+++ b/src/core/mpi/BufferSystem.impl.h
@@ -231,14 +231,14 @@ void GenericBufferSystem<Rb, Sb>::setReceiverInfo( const std::set<MPIRank> & ran
 template< typename Rb, typename Sb>
 void GenericBufferSystem<Rb, Sb>::setReceiverInfo( const std::map<MPIRank,MPISize> & ranksToRecvFrom )
 {
-   WALBERLA_ASSERT( ! communicationRunning_ );
+   WALBERLA_ASSERT( ! communicationRunning_ )
 
    recvInfos_.clear();
    for ( auto it = ranksToRecvFrom.begin(); it != ranksToRecvFrom.end(); ++it )
    {
       const MPIRank sender       = it->first;
       const MPISize senderSize   = it->second;
-      WALBERLA_ASSERT_GREATER( senderSize, 0 );
+      WALBERLA_ASSERT_GREATER( senderSize, 0 )
       recvInfos_[ sender ].size   = senderSize;
    }
 
diff --git a/src/core/mpi/BufferSystemHelper.h b/src/core/mpi/BufferSystemHelper.h
index 5603db56005c53eff6c7a5c5f2cbd369969b0c90..c505dfa0bd74f939500c1962458b4c1e2c9354fe 100644
--- a/src/core/mpi/BufferSystemHelper.h
+++ b/src/core/mpi/BufferSystemHelper.h
@@ -108,7 +108,7 @@ namespace internal {
       using typename AbstractCommunication<RecvBuffer_T, SendBuffer_T>::ReceiveInfo;
 
       KnownSizeCommunication( const MPI_Comm & communicator, int tag = 0 )
-           : AbstractCommunication<RecvBuffer_T, SendBuffer_T>( communicator, tag ), sending_(false), receiving_(false) {}
+           : AbstractCommunication<RecvBuffer_T, SendBuffer_T>( communicator, tag ){}
 
       ~KnownSizeCommunication() override = default;
 
@@ -121,8 +121,8 @@ namespace internal {
       MPIRank waitForNextReceive( std::map<MPIRank, ReceiveInfo> & recvInfos ) override;
 
    private:
-      bool sending_;
-      bool receiving_;
+      bool sending_{false};
+      bool receiving_{false};
 
       std::vector<MPI_Request> sendRequests_;
       std::vector<MPI_Request> recvRequests_;
@@ -136,7 +136,7 @@ namespace internal {
       using typename AbstractCommunication<RecvBuffer_T, SendBuffer_T>::ReceiveInfo;
 
       UnknownSizeCommunication( const MPI_Comm & communicator, int tag = 0 )
-           :  AbstractCommunication<RecvBuffer_T, SendBuffer_T>( communicator, tag ), sending_(false), receiving_(false) {}
+           :  AbstractCommunication<RecvBuffer_T, SendBuffer_T>( communicator, tag ){}
 
       ~UnknownSizeCommunication() override = default;
 
@@ -149,8 +149,8 @@ namespace internal {
       MPIRank waitForNextReceive( std::map<MPIRank, ReceiveInfo> & recvInfos ) override;
 
    private:
-      bool sending_;
-      bool receiving_;
+      bool sending_{false};
+      bool receiving_{false};
 
       std::vector<MPI_Request> sendRequests_;
       std::list<MPISize>       outgoingBufferForSizes_;
@@ -168,7 +168,7 @@ namespace internal {
       using typename AbstractCommunication<RecvBuffer_T, SendBuffer_T>::ReceiveInfo;
 
       UnknownSizeCommunicationIProbe( const MPI_Comm & communicator, int tag = 0 )
-           :  AbstractCommunication<RecvBuffer_T, SendBuffer_T>( communicator, tag ), sending_(false), receiving_(false) {}
+           :  AbstractCommunication<RecvBuffer_T, SendBuffer_T>( communicator, tag ){}
 
       ~UnknownSizeCommunicationIProbe() override = default;
 
@@ -181,8 +181,8 @@ namespace internal {
       MPIRank waitForNextReceive( std::map<MPIRank, ReceiveInfo> & recvInfos ) override;
 
    private:
-      bool sending_;
-      bool receiving_;
+      bool sending_{false};
+      bool receiving_{false};
       int  pendingReceives_;
 
       std::vector<MPI_Request> sendRequests_;
@@ -196,7 +196,7 @@ namespace internal {
       using typename AbstractCommunication<RecvBuffer_T, SendBuffer_T>::ReceiveInfo;
 
       NoMPICommunication( const MPI_Comm & communicator, int tag = 0 )
-         : AbstractCommunication<RecvBuffer_T, SendBuffer_T>( communicator, tag ), received_( false ) {}
+         : AbstractCommunication<RecvBuffer_T, SendBuffer_T>( communicator, tag ){}
 
       ~NoMPICommunication() override = default;
 
@@ -209,7 +209,7 @@ namespace internal {
       MPIRank waitForNextReceive( std::map<MPIRank, ReceiveInfo> & recvInfos ) override;
 
    private:
-      bool         received_;
+      bool         received_{ false };
       RecvBuffer_T tmpBuffer_;
    };
 
diff --git a/src/core/timing/Timer.h b/src/core/timing/Timer.h
index 89568b4f5ca8c509168c2d24ff99984fe42babee..32c1e7f300be9455c3be537050d12f04c50aa7d1 100644
--- a/src/core/timing/Timer.h
+++ b/src/core/timing/Timer.h
@@ -500,7 +500,7 @@ shared_ptr<Timer<TP> > getReduced( Timer<TP>& timer, ReduceType rt, int targetRa
       break;
 
    default:
-      WALBERLA_ABORT( "Unknown reduce type" );
+      WALBERLA_ABORT( "Unknown reduce type" )
       break;
    }
 
diff --git a/src/core/timing/TimingPool.cpp b/src/core/timing/TimingPool.cpp
index dff973201aa5e3976576c60038200cba4492fd6b..7539fffe3610c4fb5e7cb0846b5192f5e7887e70 100644
--- a/src/core/timing/TimingPool.cpp
+++ b/src/core/timing/TimingPool.cpp
@@ -116,7 +116,7 @@ shared_ptr<TimingPool<TP> > TimingPool<TP>::getReduced( ReduceType rt, int targe
          break;
 
       default:
-         WALBERLA_ABORT( "Unknown reduce type" );
+         WALBERLA_ABORT( "Unknown reduce type" )
          break;
    }
 
diff --git a/src/domain_decomposition/BlockDataHandling.h b/src/domain_decomposition/BlockDataHandling.h
index 0720eb572ffadafc2deb78b3733e4cf5ff225029..56b18521f0c65656b3b09b4ec5ff0a430c39c312 100644
--- a/src/domain_decomposition/BlockDataHandling.h
+++ b/src/domain_decomposition/BlockDataHandling.h
@@ -92,21 +92,21 @@ public:
    
    void serialize( IBlock * const, const BlockDataID &, mpi::SendBuffer & ) override
    {
-      WALBERLA_ABORT( "You are trying to serialize a block data item for which only an initialization function was registered" );
+      WALBERLA_ABORT( "You are trying to serialize a block data item for which only an initialization function was registered" )
 #ifdef __IBMCPP__
-      return NULL; // never reached, helps to suppress a warning from the IBM compiler
+      return nullptr; // never reached, helps to suppress a warning from the IBM compiler
 #endif
    }
    T * deserialize( IBlock * const ) override
    {
-      WALBERLA_ABORT( "You are trying to deserialize a block data item for which only an initialization function was registered" );
+      WALBERLA_ABORT( "You are trying to deserialize a block data item for which only an initialization function was registered" )
 #ifdef __IBMCPP__
-      return NULL; // never reached, helps to suppress a warning from the IBM compiler
+      return nullptr; // never reached, helps to suppress a warning from the IBM compiler
 #endif
    }
    void deserialize( IBlock * const, const BlockDataID &, mpi::RecvBuffer & ) override
    {
-      WALBERLA_ABORT( "You are trying to deserialize a block data item for which only an initialization function was registered" );
+      WALBERLA_ABORT( "You are trying to deserialize a block data item for which only an initialization function was registered" )
    }
 
 private:
@@ -175,27 +175,27 @@ public:
   
    BlockData * initialize( IBlock * const block ) override
    {
-      WALBERLA_ASSERT_NOT_NULLPTR( block );
+      WALBERLA_ASSERT_NOT_NULLPTR( block )
       T * ptr = dataHandling_->initialize( block );
       return ptr ? new BlockData( ptr ) : nullptr;
    }
    
    void serialize( IBlock * const block, const BlockDataID & id, mpi::SendBuffer & buffer ) override
    {
-      WALBERLA_ASSERT_NOT_NULLPTR( block );
+      WALBERLA_ASSERT_NOT_NULLPTR( block )
       dataHandling_->serialize( block, id, buffer );
    }
    
    BlockData * deserialize( IBlock * const block ) override
    {
-      WALBERLA_ASSERT_NOT_NULLPTR( block );
+      WALBERLA_ASSERT_NOT_NULLPTR( block )
       T * ptr = dataHandling_->deserialize( block );
       return ptr ? new BlockData( ptr ) : nullptr;
    }
    
    void deserialize( IBlock * const block, const BlockDataID & id, mpi::RecvBuffer & buffer ) override
    {
-      WALBERLA_ASSERT_NOT_NULLPTR( block );
+      WALBERLA_ASSERT_NOT_NULLPTR( block )
       dataHandling_->deserialize( block, id, buffer );
    }   
    
@@ -269,7 +269,7 @@ public:
                          " - block state: " << block->getState() << "\n"
                          " - global state: " << uid::globalState() << "\n"
                          " - additional state: " << state << "\n" 
-                         " - \"selector\": " << selection );
+                         " - \"selector\": " << selection )
       }
       
       return dataHandling;
diff --git a/src/domain_decomposition/IBlock.h b/src/domain_decomposition/IBlock.h
index 06e48b6905194a71ed3970550ca65ee525876080..ef563cc057b2e9157330420f09beb1ad74050168 100644
--- a/src/domain_decomposition/IBlock.h
+++ b/src/domain_decomposition/IBlock.h
@@ -110,7 +110,7 @@ public:
       WALBERLA_ABORT( "BlockData access type violation! (The block data you added is of a different type than the block data you are trying to access!)" )
 #endif
 #ifdef __IBMCPP__
-      return NULL; // never reached, helps to suppress a warning from the IBM compiler
+      return nullptr; // never reached, helps to suppress a warning from the IBM compiler
 #endif
    }
 
@@ -212,8 +212,6 @@ public:
    friend class           BlockStorage;
    friend class StructuredBlockStorage;
 
-public:
-
    virtual const IBlockID& getId() const = 0;
 
    bool operator==( const IBlock& rhs ) const;
@@ -466,7 +464,7 @@ inline const T* IBlock::uncheckedFastGetData( const ConstBlockDataID & index ) c
    WALBERLA_ASSERT_LESS( uint_t( index ), data_.size() );
 
    if( data_[index] == nullptr )
-      return NULL;
+      return nullptr;
 
    return data_[index]->template uncheckedFastGet< T >();
 }
diff --git a/src/domain_decomposition/StructuredBlockStorage.h b/src/domain_decomposition/StructuredBlockStorage.h
index 574634255f49d6f969c82a108c98f0dde7582132..146a5eadb5186fb283652c834d4579ca91482e73 100644
--- a/src/domain_decomposition/StructuredBlockStorage.h
+++ b/src/domain_decomposition/StructuredBlockStorage.h
@@ -284,9 +284,9 @@ public:
 
 
 
-   real_t dx( const uint_t level = 0 ) const { WALBERLA_ASSERT_LESS( level, dx_.size() ); return dx_[ level ]; } ///< cell size on level "level" in x direction
-   real_t dy( const uint_t level = 0 ) const { WALBERLA_ASSERT_LESS( level, dy_.size() ); return dy_[ level ]; } ///< cell size on level "level" in y direction
-   real_t dz( const uint_t level = 0 ) const { WALBERLA_ASSERT_LESS( level, dz_.size() ); return dz_[ level ]; } ///< cell size on level "level" in z direction
+   real_t dx( const uint_t level = 0 ) const { WALBERLA_ASSERT_LESS( level, dx_.size() ) return dx_[ level ]; } ///< cell size on level "level" in x direction
+   real_t dy( const uint_t level = 0 ) const { WALBERLA_ASSERT_LESS( level, dy_.size() ) return dy_[ level ]; } ///< cell size on level "level" in y direction
+   real_t dz( const uint_t level = 0 ) const { WALBERLA_ASSERT_LESS( level, dz_.size() ) return dz_[ level ]; } ///< cell size on level "level" in z direction
 
    void mapToPeriodicDomain( Cell& cell, const uint_t level = 0 ) const; // -> for documentation of this function see StructuredBlockStorage.cpp
 
@@ -354,7 +354,7 @@ public:
    /// Returns the block data ID required for accessing the cell bounding box of blocks - fails in debug mode if no block cell bounding boxes
    /// have been created via "createCellBoundingBoxes()". (remember: every block resides on exactly one grid level, and all blocks managed by a
    //  structured block storage are assigned a corresponding cell bounding box as block data once "createCellBoundingBoxes()" is called.)
-   inline ConstBlockDataID getBlockCellBBId() const { WALBERLA_ASSERT( blockCellBBCreated_ ); return blockCellBBId_; }
+   inline ConstBlockDataID getBlockCellBBId() const { WALBERLA_ASSERT( blockCellBBCreated_ ) return blockCellBBId_; }
 
    inline const CellInterval& getBlockCellBB( const IBlock& block ) const;
 
@@ -488,7 +488,7 @@ inline bool StructuredBlockStorage::operator==( const StructuredBlockStorage& rh
 
 inline const CellInterval& StructuredBlockStorage::getDomainCellBB( const uint_t level ) const {
 
-   WALBERLA_ASSERT_LESS( level, domainCellBB_.size() );
+   WALBERLA_ASSERT_LESS( level, domainCellBB_.size() )
 
    return domainCellBB_[ level ];
 }
@@ -497,7 +497,7 @@ inline const CellInterval& StructuredBlockStorage::getDomainCellBB( const uint_t
 
 inline uint_t StructuredBlockStorage::getNumberOfXCells( const uint_t level ) const {
 
-   WALBERLA_ASSERT_LESS( level, domainCellBB_.size() );
+   WALBERLA_ASSERT_LESS( level, domainCellBB_.size() )
 
    return uint_c( domainCellBB_[ level ].xMax() + 1 );
 }
@@ -506,7 +506,7 @@ inline uint_t StructuredBlockStorage::getNumberOfXCells( const uint_t level ) co
 
 inline uint_t StructuredBlockStorage::getNumberOfYCells( const uint_t level ) const {
 
-   WALBERLA_ASSERT_LESS( level, domainCellBB_.size() );
+   WALBERLA_ASSERT_LESS( level, domainCellBB_.size() )
 
    return uint_c( domainCellBB_[ level ].yMax() + 1 );
 }
@@ -515,7 +515,7 @@ inline uint_t StructuredBlockStorage::getNumberOfYCells( const uint_t level ) co
 
 inline uint_t StructuredBlockStorage::getNumberOfZCells( const uint_t level ) const {
 
-   WALBERLA_ASSERT_LESS( level, domainCellBB_.size() );
+   WALBERLA_ASSERT_LESS( level, domainCellBB_.size() )
 
    return uint_c( domainCellBB_[ level ].zMax() + 1 );
 }
@@ -524,8 +524,8 @@ inline uint_t StructuredBlockStorage::getNumberOfZCells( const uint_t level ) co
 
 inline uint_t StructuredBlockStorage::getNumberOfCells( const uint_t index, const uint_t level ) const {
 
-   WALBERLA_ASSERT_LESS( index, uint_t(3) );
-   WALBERLA_ASSERT_LESS( level, domainCellBB_.size() );
+   WALBERLA_ASSERT_LESS( index, uint_t(3) )
+   WALBERLA_ASSERT_LESS( level, domainCellBB_.size() )
 
    return uint_c( domainCellBB_[ level ].max()[ index ] + 1 );
 }
@@ -689,7 +689,9 @@ inline AABB StructuredBlockStorage::getAABBFromCellBB( const CellInterval& cellB
 //**********************************************************************************************************************
 inline const IBlock* StructuredBlockStorage::getBlock( const Cell& cell, const uint_t level ) const {
 
-   real_t x, y, z;
+   real_t x;
+   real_t y;
+   real_t z;
    getCellCenter( x, y, z, cell, level );
 
    const IBlock* block = blockStorage_->getBlock(x,y,z);
@@ -712,7 +714,9 @@ inline const IBlock* StructuredBlockStorage::getBlock( const Cell& cell, const u
 //**********************************************************************************************************************
 inline IBlock* StructuredBlockStorage::getBlock( const Cell& cell, const uint_t level ) {
 
-   real_t x, y, z;
+   real_t x;
+   real_t y;
+   real_t z;
    getCellCenter( x, y, z, cell, level );
 
    IBlock* block = blockStorage_->getBlock(x,y,z);
@@ -736,8 +740,8 @@ inline IBlock* StructuredBlockStorage::getBlock( const Cell& cell, const uint_t
 //**********************************************************************************************************************
 inline const CellInterval& StructuredBlockStorage::getBlockCellBB( const IBlock& block ) const
 {
-   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) );
-   WALBERLA_ASSERT( blockCellBBCreated_ );
+   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) )
+   WALBERLA_ASSERT( blockCellBBCreated_ )
 
    return *(block.uncheckedFastGetData< CellInterval >( blockCellBBId_ ));
 }
@@ -769,12 +773,12 @@ inline Cell StructuredBlockStorage::getBlockLocalCell( const IBlock& block, cons
 //**********************************************************************************************************************
 inline void StructuredBlockStorage::getBlockLocalCell( Cell& localCell, const IBlock& block, const real_t x, const real_t y, const real_t z ) const
 {
-   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) );
+   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) )
 
    const AABB & aabb  = block.getAABB();
    const uint_t level = getLevel( block );
 
-   WALBERLA_ASSERT_LESS( level, levels_ );
+   WALBERLA_ASSERT_LESS( level, levels_ )
 
    localCell.x() = cell_idx_c( std::floor( ( x - aabb.xMin() ) / dx( level ) ) );
    localCell.y() = cell_idx_c( std::floor( ( y - aabb.yMin() ) / dy( level ) ) );
@@ -805,12 +809,12 @@ inline Vector3< real_t > StructuredBlockStorage::getBlockLocalCellCenter( const
 //**********************************************************************************************************************
 inline void StructuredBlockStorage::getBlockLocalCellCenter( const IBlock & block, const Cell & localCell, real_t & x, real_t & y, real_t & z ) const
 {
-   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) );
+   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) )
 
    const AABB & aabb  = block.getAABB();
    const uint_t level = getLevel( block );
 
-   WALBERLA_ASSERT_LESS( level, levels_ );
+   WALBERLA_ASSERT_LESS( level, levels_ )
 
    x = aabb.xMin() + ( real_c( localCell.x() ) + real_c(0.5) ) * dx( level );
    y = aabb.yMin() + ( real_c( localCell.y() ) + real_c(0.5) ) * dy( level );
@@ -842,12 +846,12 @@ inline AABB StructuredBlockStorage::getBlockLocalCellAABB( const IBlock & block,
 //**********************************************************************************************************************
 inline void StructuredBlockStorage::getBlockLocalCellAABB( const IBlock & block, const Cell & localCell, AABB & aabb ) const
 {
-   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) );
+   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) )
 
    const AABB& blockAABB = block.getAABB();
    const uint_t level = getLevel( block );
 
-   WALBERLA_ASSERT_LESS( level, levels_ );
+   WALBERLA_ASSERT_LESS( level, levels_ )
 
    const real_t x = blockAABB.xMin() + real_c( localCell.x() ) * dx( level );
    const real_t y = blockAABB.yMin() + real_c( localCell.y() ) * dy( level );
@@ -866,7 +870,7 @@ inline void StructuredBlockStorage::getBlockLocalCellAABB( const IBlock & block,
 //**********************************************************************************************************************
 inline void StructuredBlockStorage::transformGlobalToBlockLocal( Vector3<real_t> & local, const IBlock& block, const Vector3<real_t> & global ) const
 {
-   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) );
+   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) )
 
    const uint_t level = getLevel( block );
 
@@ -886,7 +890,7 @@ inline void StructuredBlockStorage::transformGlobalToBlockLocal( Vector3<real_t>
 //**********************************************************************************************************************
 inline void StructuredBlockStorage::transformGlobalToBlockLocal( Vector3<real_t> & point, const IBlock& block ) const
 {
-   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) );
+   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) )
 
    const uint_t level = getLevel( block );
 
@@ -906,7 +910,7 @@ inline void StructuredBlockStorage::transformGlobalToBlockLocal( Vector3<real_t>
 //**********************************************************************************************************************
 inline void StructuredBlockStorage::transformBlockLocalToGlobal( Vector3<real_t> & global, const IBlock& block, const Vector3<real_t> & local ) const
 {
-   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) );
+   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) )
 
    const uint_t level = getLevel( block );
 
@@ -927,7 +931,7 @@ inline void StructuredBlockStorage::transformBlockLocalToGlobal( Vector3<real_t>
 //**********************************************************************************************************************
 inline void StructuredBlockStorage::transformBlockLocalToGlobal( Vector3<real_t> &  point, const IBlock& block ) const
 {
-   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) );
+   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) )
 
    const uint_t level = getLevel( block );
       
@@ -949,7 +953,7 @@ inline void StructuredBlockStorage::transformBlockLocalToGlobal( Vector3<real_t>
 //**********************************************************************************************************************
 inline void StructuredBlockStorage::transformGlobalToBlockLocalCell( Cell& local, const IBlock& block, const Cell& global ) const {
 
-   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) );
+   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) )
 
    const CellInterval& cellBB = getBlockCellBB( block );
 
@@ -981,7 +985,7 @@ inline void StructuredBlockStorage::transformGlobalToBlockLocalCell( Cell& cell,
 //**********************************************************************************************************************
 inline void StructuredBlockStorage::transformBlockLocalToGlobalCell( Cell& global, const IBlock& block, const Cell& local ) const {
 
-   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) );
+   WALBERLA_ASSERT_EQUAL( blockStorage_.get(), &(block.getBlockStorage()) )
 
    const CellInterval& cellBB = getBlockCellBB( block );
 
diff --git a/src/field/AddToStorage.h b/src/field/AddToStorage.h
index 9477c4ff7c0b76e876b6e5fc0fe7da08eb36a7bc..d1ef11d921e4e305ada51f590a0a1035be5d6ec5 100644
--- a/src/field/AddToStorage.h
+++ b/src/field/AddToStorage.h
@@ -163,6 +163,24 @@ BlockDataID addToStorage( const shared_ptr< BlockStorage_T > & blocks,
 }
 
 
+template< typename GhostLayerField_T, typename BlockStorage_T >
+BlockDataID addToStorage( const shared_ptr< BlockStorage_T > & blocks,
+                         const std::string & identifier,
+                         const typename GhostLayerField_T::value_type & initValue,
+                         const Layout layout,
+                         const uint_t nrOfGhostLayers,
+                         const shared_ptr< field::FieldAllocator<typename GhostLayerField_T::value_type> > alloc)
+{
+   auto alwaysInitialize = false;
+   auto initFunction = std::function< void ( GhostLayerField_T * field, IBlock * const block ) >();
+   auto requiredSelectors = Set<SUID>::emptySet();
+   auto incompatibleSelectors = Set<SUID>::emptySet();
+   auto calculateSize = internal::defaultSize;
+
+   return internal::AddToStorage< GhostLayerField_T, BlockStorage_T >::add( blocks, identifier, initValue, layout, nrOfGhostLayers,
+                                                                            alwaysInitialize, initFunction, requiredSelectors, incompatibleSelectors, calculateSize, alloc );
+}
+
 
 template< typename GhostLayerField_T, typename BlockStorage_T >
 BlockDataID addToStorage( const shared_ptr< BlockStorage_T > & blocks,
@@ -173,9 +191,9 @@ BlockDataID addToStorage( const shared_ptr< BlockStorage_T > & blocks,
                           const bool alwaysInitialize,
                           const Set<SUID> & requiredSelectors, const Set<SUID> & incompatibleSelectors = Set<SUID>::emptySet() )
 {
+   auto initFunction = std::function< void ( GhostLayerField_T * field, IBlock * const block ) >();
    return addToStorage< GhostLayerField_T >( blocks, identifier, initValue, layout, nrOfGhostLayers, alwaysInitialize,
-                                             std::function< void ( GhostLayerField_T * field, IBlock * const block ) >(),
-                                             requiredSelectors, incompatibleSelectors );
+                                             initFunction, requiredSelectors, incompatibleSelectors );
 }
 
 
@@ -210,9 +228,9 @@ BlockDataID addToStorage( const shared_ptr< BlockStorage_T > & blocks,
                           const bool alwaysInitialize,
                           const Set<SUID> & requiredSelectors, const Set<SUID> & incompatibleSelectors = Set<SUID>::emptySet() )
 {
+   auto initFunction = std::function< void ( GhostLayerField_T * field, IBlock * const block ) >();
    return addToStorage< GhostLayerField_T >( blocks, identifier, initValue, layout, nrOfGhostLayers, alwaysInitialize,
-                                             std::function< void ( GhostLayerField_T * field, IBlock * const block ) >(),
-                                             requiredSelectors, incompatibleSelectors, calculateSize );
+                                             initFunction, requiredSelectors, incompatibleSelectors, calculateSize );
 }
 
 
diff --git a/src/field/GhostLayerField.h b/src/field/GhostLayerField.h
index f5fe40fe57e18e042c61c74f126c7d6792ebe11c..345d497cdc4c04a789d46d391b909f3fb0322aa5 100644
--- a/src/field/GhostLayerField.h
+++ b/src/field/GhostLayerField.h
@@ -202,6 +202,20 @@ namespace field {
       //@}
       //****************************************************************************************************************
 
+      //** TimestepInformation *****************************************************************************************
+      /*! \name TimestepCounter */
+      //@{
+      inline uint8_t advanceTimestep()
+      {
+         timestepCounter_ = (timestepCounter_ + 1) & 1;
+         return timestepCounter_;
+      }
+      inline uint8_t getTimestep() const { return timestepCounter_; }
+      inline uint8_t getTimestepPlusOne() const { return (timestepCounter_ + 1) & 1; }
+      inline bool isEvenTimeStep() const {return (((timestepCounter_) &1) ^ 1); }
+      //@}
+      //****************************************************************************************************************
+
    protected:
       GhostLayerField( );
 
@@ -221,6 +235,8 @@ namespace field {
 
       template <typename T2, uint_t fSize2>
       friend class GhostLayerField;
+
+      uint8_t timestepCounter_;
    };
 
 } // namespace field
diff --git a/src/field/GhostLayerField.impl.h b/src/field/GhostLayerField.impl.h
index d594274e78e27034b7a3d495e02a49dcaf8da28c..1a8b758ca7a9f717ed0786c4edbb7ed24195a410 100644
--- a/src/field/GhostLayerField.impl.h
+++ b/src/field/GhostLayerField.impl.h
@@ -44,7 +44,7 @@ namespace field {
     *******************************************************************************************************************/
    template<typename T, uint_t fSize_>
    GhostLayerField<T,fSize_>::GhostLayerField( )
-      : gl_(0)
+      : gl_(0), timestepCounter_(0)
    {
    }
 
@@ -128,6 +128,7 @@ namespace field {
                                           const Layout & l, const shared_ptr<FieldAllocator<T> > &alloc)
     {
        gl_ = gl;
+       timestepCounter_ = uint8_c(0);
        uint_t innerGhostLayerSize = ( l == fzyx ) ? gl : uint_t(0);
        Field<T,fSize_>::init( _xSize + 2*gl ,
                               _ySize + 2*gl,
@@ -689,7 +690,7 @@ namespace field {
    template<typename T, uint_t fSize_>
    GhostLayerField<T,fSize_>::GhostLayerField(const GhostLayerField<T,fSize_> & other)
       : Field<T,fSize_>::Field(other),
-        gl_( other.gl_ )
+        gl_( other.gl_ ), timestepCounter_( other.timestepCounter_ )
    {
    }
 
@@ -700,7 +701,7 @@ namespace field {
    template <typename T2, uint_t fSize2>
    GhostLayerField<T,fSize_>::GhostLayerField(const GhostLayerField<T2,fSize2> & other)
       : Field<T,fSize_>::Field(other),
-        gl_( other.gl_ )
+        gl_( other.gl_ ), timestepCounter_( other.timestepCounter_ )
    {
    }
 
@@ -756,11 +757,11 @@ namespace field {
 
       // Assert that there is still space for ghost-layers after slicing
 
-      WALBERLA_ASSERT_GREATER_EQUAL( uint_c(this->xOff()), gl_ );
+      WALBERLA_ASSERT_GREATER_EQUAL( uint_c(this->xOff()), gl_ )
       WALBERLA_ASSERT_GREATER_EQUAL( this->xAllocSize() - uint_c(this->xOff()) - this->xSize(), gl_ );
-      WALBERLA_ASSERT_GREATER_EQUAL( uint_c(this->yOff()), gl_ );
+      WALBERLA_ASSERT_GREATER_EQUAL( uint_c(this->yOff()), gl_ )
       WALBERLA_ASSERT_GREATER_EQUAL( this->yAllocSize() - uint_c(this->yOff()) - this->ySize(), gl_ );
-      WALBERLA_ASSERT_GREATER_EQUAL( uint_c(this->zOff()), gl_ );
+      WALBERLA_ASSERT_GREATER_EQUAL( uint_c(this->zOff()), gl_ )
       WALBERLA_ASSERT_GREATER_EQUAL( this->zAllocSize() - uint_c(this->zOff()) - this->zSize(), gl_ );
 
    }
@@ -771,11 +772,11 @@ namespace field {
       Field<T,fSize_>::shiftCoordinates( cx, cy, cz );
 
       // Assert that there is still space for ghost-layers after slicing
-      WALBERLA_ASSERT_GREATER_EQUAL( uint_c(this->xOff()), gl_ );
+      WALBERLA_ASSERT_GREATER_EQUAL( uint_c(this->xOff()), gl_ )
       WALBERLA_ASSERT_GREATER_EQUAL( this->xAllocSize() - uint_c(this->xOff()) - this->xSize(), gl_ );
-      WALBERLA_ASSERT_GREATER_EQUAL( uint_c(this->yOff()), gl_ );
+      WALBERLA_ASSERT_GREATER_EQUAL( uint_c(this->yOff()), gl_ )
       WALBERLA_ASSERT_GREATER_EQUAL( this->yAllocSize() - uint_c(this->yOff()) - this->ySize(), gl_ );
-      WALBERLA_ASSERT_GREATER_EQUAL( uint_c(this->zOff()), gl_ );
+      WALBERLA_ASSERT_GREATER_EQUAL( uint_c(this->zOff()), gl_ )
       WALBERLA_ASSERT_GREATER_EQUAL( this->zAllocSize() - uint_c(this->zOff()) - this->zSize(), gl_ );
    }
 
diff --git a/src/field/blockforest/BlockDataHandling.h b/src/field/blockforest/BlockDataHandling.h
index c01306d4079bb68e2d59c5c3ae516947808b6575..5113b895a881e3c7ba29c4f541c4801a4d6f6dfe 100644
--- a/src/field/blockforest/BlockDataHandling.h
+++ b/src/field/blockforest/BlockDataHandling.h
@@ -92,12 +92,12 @@ protected:
 
    void sizeCheck( const uint_t xSize, const uint_t ySize, const uint_t zSize )
    {
-      WALBERLA_CHECK( (xSize & uint_t(1)) == uint_t(0), "The x-size of your field must be divisible by 2." );
-      WALBERLA_CHECK( (ySize & uint_t(1)) == uint_t(0), "The y-size of your field must be divisible by 2." );
+      WALBERLA_CHECK( (xSize & uint_t(1)) == uint_t(0), "The x-size of your field must be divisible by 2." )
+      WALBERLA_CHECK( (ySize & uint_t(1)) == uint_t(0), "The y-size of your field must be divisible by 2." )
       if( Pseudo2D )
-      { WALBERLA_CHECK( zSize == uint_t(1), "The z-size of your field must be equal to 1 (pseudo 2D mode)." ); }
+      { WALBERLA_CHECK( zSize == uint_t(1), "The z-size of your field must be equal to 1 (pseudo 2D mode)." ) }
       else
-      { WALBERLA_CHECK( (zSize & uint_t(1)) == uint_t(0), "The z-size of your field must be divisible by 2." ); }
+      { WALBERLA_CHECK( (zSize & uint_t(1)) == uint_t(0), "The z-size of your field must be divisible by 2." ) }
    }
    
    InitializationFunction_T initFunction_;
@@ -110,7 +110,7 @@ template< typename Field_T, bool Pseudo2D >
 inline void BlockDataHandling< Field_T, Pseudo2D >::serialize( IBlock * const block, const BlockDataID & id, mpi::SendBuffer & buffer )
 {
    Field_T * field = block->template getData< Field_T >(id);
-   WALBERLA_ASSERT_NOT_NULLPTR( field );
+   WALBERLA_ASSERT_NOT_NULLPTR( field )
 
 #ifndef NDEBUG
    buffer << field->xSize() << field->ySize() << field->zSize() << field->fSize();
@@ -126,7 +126,7 @@ template< typename Field_T, bool Pseudo2D >
 void BlockDataHandling< Field_T, Pseudo2D >::serializeCoarseToFine( Block * const block, const BlockDataID & id, mpi::SendBuffer & buffer, const uint_t child )
 {
    Field_T * field = block->template getData< Field_T >(id);
-   WALBERLA_ASSERT_NOT_NULLPTR( field );
+   WALBERLA_ASSERT_NOT_NULLPTR( field )
 
    const uint_t xSize = field->xSize();
    const uint_t ySize = field->ySize();
@@ -161,7 +161,7 @@ template< typename Field_T, bool Pseudo2D >
 void BlockDataHandling< Field_T, Pseudo2D >::serializeFineToCoarse( Block * const block, const BlockDataID & id, mpi::SendBuffer & buffer )
 {
    Field_T * field = block->template getData< Field_T >(id);
-   WALBERLA_ASSERT_NOT_NULLPTR( field );
+   WALBERLA_ASSERT_NOT_NULLPTR( field )
 
    const uint_t xSize = field->xSize();
    const uint_t ySize = field->ySize();
@@ -210,10 +210,10 @@ inline void BlockDataHandling< Field_T, Pseudo2D >::deserialize( IBlock * const
    uint_t zSender( uint_t(0) );
    uint_t fSender( uint_t(0) );
    buffer >> xSender >> ySender >> zSender >> fSender;
-   WALBERLA_ASSERT_EQUAL( xSender, field->xSize() );
-   WALBERLA_ASSERT_EQUAL( ySender, field->ySize() );
-   WALBERLA_ASSERT_EQUAL( zSender, field->zSize() );
-   WALBERLA_ASSERT_EQUAL( fSender, field->fSize() );
+   WALBERLA_ASSERT_EQUAL( xSender, field->xSize() )
+   WALBERLA_ASSERT_EQUAL( ySender, field->ySize() )
+   WALBERLA_ASSERT_EQUAL( zSender, field->zSize() )
+   WALBERLA_ASSERT_EQUAL( fSender, field->fSize() )
 #endif
 
    for( auto it = field->begin(); it != field->end(); ++it )
@@ -240,14 +240,14 @@ void BlockDataHandling< Field_T, Pseudo2D >::deserializeCoarseToFine( Block * co
    uint_t zSender( uint_t(0) );
    uint_t fSender( uint_t(0) );
    buffer >> branchId >> xSender >> ySender >> zSender >> fSender;
-   WALBERLA_ASSERT_EQUAL( branchId, block->getId().getBranchId() );
-   WALBERLA_ASSERT_EQUAL( xSender, xSize / uint_t(2) );
-   WALBERLA_ASSERT_EQUAL( ySender, ySize / uint_t(2) );
+   WALBERLA_ASSERT_EQUAL( branchId, block->getId().getBranchId() )
+   WALBERLA_ASSERT_EQUAL( xSender, xSize / uint_t(2) )
+   WALBERLA_ASSERT_EQUAL( ySender, ySize / uint_t(2) )
    if( Pseudo2D )
-   { WALBERLA_ASSERT_EQUAL( zSender, zSize ); }
+   { WALBERLA_ASSERT_EQUAL( zSender, zSize ) }
    else
-   { WALBERLA_ASSERT_EQUAL( zSender, zSize / uint_t(2) ); }
-   WALBERLA_ASSERT_EQUAL( fSender, fSize );
+   { WALBERLA_ASSERT_EQUAL( zSender, zSize / uint_t(2) ) }
+   WALBERLA_ASSERT_EQUAL( fSender, fSize )
 #endif
 
    for( cell_idx_t z = cell_idx_t(0); z < cell_idx_c( zSize ); z += cell_idx_t(2) ) {
@@ -295,14 +295,14 @@ void BlockDataHandling< Field_T, Pseudo2D >::deserializeFineToCoarse( Block * co
    uint_t zSender( uint_t(0) );
    uint_t fSender( uint_t(0) );
    buffer >> branchId >> xSender >> ySender >> zSender >> fSender;
-   WALBERLA_ASSERT_EQUAL( branchId, child );
-   WALBERLA_ASSERT_EQUAL( xSender, xSize / uint_t(2) );
-   WALBERLA_ASSERT_EQUAL( ySender, ySize / uint_t(2) );
+   WALBERLA_ASSERT_EQUAL( branchId, child )
+   WALBERLA_ASSERT_EQUAL( xSender, xSize / uint_t(2) )
+   WALBERLA_ASSERT_EQUAL( ySender, ySize / uint_t(2) )
    if( Pseudo2D )
-   { WALBERLA_ASSERT_EQUAL( zSender, zSize ); }
+   { WALBERLA_ASSERT_EQUAL( zSender, zSize ) }
    else
-   { WALBERLA_ASSERT_EQUAL( zSender, zSize / uint_t(2) ); }
-   WALBERLA_ASSERT_EQUAL( fSender, fSize );
+   { WALBERLA_ASSERT_EQUAL( zSender, zSize / uint_t(2) ) }
+   WALBERLA_ASSERT_EQUAL( fSender, fSize )
 #endif
 
    const cell_idx_t zBegin = Pseudo2D ? cell_idx_t(0) : ( (child & uint_t(4)) ? ( cell_idx_c( zSize ) / cell_idx_t(2) ) : cell_idx_t(0) );
@@ -437,7 +437,7 @@ protected:
    GhostLayerField_T * allocate( IBlock * const block ) override
    {
       auto blocks = blocks_.lock();
-      WALBERLA_CHECK_NOT_NULLPTR( blocks, "Trying to access 'DefaultBlockDataHandling' for a block storage object that doesn't exist anymore" );
+      WALBERLA_CHECK_NOT_NULLPTR( blocks, "Trying to access 'DefaultBlockDataHandling' for a block storage object that doesn't exist anymore" )
       const Vector3< uint_t > size = calculateSize_( blocks, block );
       return internal::allocate< GhostLayerField_T >( size[0], size[1], size[2],
                                                       nrOfGhostLayers_, initValue_, layout_, alloc_ );
diff --git a/src/field/communication/StencilRestrictedPackInfo.h b/src/field/communication/StencilRestrictedPackInfo.h
index b82050340a359f0d8c91bb96343a4f0198116c7d..3ad7f9da2fcf231d2548b4dd2bdafd060269bec8 100644
--- a/src/field/communication/StencilRestrictedPackInfo.h
+++ b/src/field/communication/StencilRestrictedPackInfo.h
@@ -73,10 +73,10 @@ void StencilRestrictedPackInfo<GhostLayerField_T, Stencil>::unpackData( IBlock *
       return;
 
    GhostLayerField_T * pdfField = receiver->getData< GhostLayerField_T >( fieldId_ );
-   WALBERLA_ASSERT_NOT_NULLPTR( pdfField );
-   WALBERLA_ASSERT_EQUAL( pdfField->nrOfGhostLayers(), 1 );
+   WALBERLA_ASSERT_NOT_NULLPTR( pdfField )
+   WALBERLA_ASSERT_EQUAL( pdfField->nrOfGhostLayers(), 1 )
 
-   stencil::Direction packerDirection = stencil::inverseDir[dir];
+   stencil::Direction const packerDirection = stencil::inverseDir[dir];
 
    for(auto i = pdfField->beginGhostLayerOnlyXYZ(dir); i != pdfField->end(); ++i )
       for(uint_t f = 0; f < Stencil::d_per_d_length[packerDirection]; ++f)
@@ -94,7 +94,7 @@ void StencilRestrictedPackInfo<GhostLayerField_T, Stencil>::communicateLocal( co
    const GhostLayerField_T * sf = sender  ->getData< GhostLayerField_T >( fieldId_ );
          GhostLayerField_T * rf = receiver->getData< GhostLayerField_T >( fieldId_ );
 
-   WALBERLA_ASSERT_EQUAL( sf->xyzSize(), rf->xyzSize() );
+   WALBERLA_ASSERT_EQUAL( sf->xyzSize(), rf->xyzSize() )
 
    typename GhostLayerField_T::const_iterator srcIter = sf->beginSliceBeforeGhostLayerXYZ(dir);
    typename GhostLayerField_T::iterator       dstIter = rf->beginGhostLayerOnlyXYZ(stencil::inverseDir[dir]);
@@ -107,8 +107,8 @@ void StencilRestrictedPackInfo<GhostLayerField_T, Stencil>::communicateLocal( co
       ++srcIter;
       ++dstIter;
    }
-   WALBERLA_ASSERT( srcIter == sf->end() );
-   WALBERLA_ASSERT( dstIter == rf->end() );
+   WALBERLA_ASSERT( srcIter == sf->end() )
+   WALBERLA_ASSERT( dstIter == rf->end() )
 }
 
 
@@ -120,8 +120,8 @@ void StencilRestrictedPackInfo<GhostLayerField_T, Stencil>::packDataImpl( const
       return;
 
    const GhostLayerField_T * pdfField = sender->getData< GhostLayerField_T >( fieldId_ );
-   WALBERLA_ASSERT_NOT_NULLPTR( pdfField );
-   WALBERLA_ASSERT_EQUAL( pdfField->nrOfGhostLayers(), 1 );
+   WALBERLA_ASSERT_NOT_NULLPTR( pdfField )
+   WALBERLA_ASSERT_EQUAL( pdfField->nrOfGhostLayers(), 1 )
 
    for( auto i = pdfField->beginSliceBeforeGhostLayerXYZ(dir); i != pdfField->end(); ++i )
       for(uint_t f = 0; f < Stencil::d_per_d_length[dir]; ++f)
diff --git a/src/geometry/InitBoundaryHandling.h b/src/geometry/InitBoundaryHandling.h
index fe6817d3ead08c2b7bb144f9564dd8ea8d03809d..bcea3de9305e52f37a43cb11dec6f8ea73e6ff49 100644
--- a/src/geometry/InitBoundaryHandling.h
+++ b/src/geometry/InitBoundaryHandling.h
@@ -141,6 +141,21 @@ void setNonBoundaryCellsToDomain( StructuredBlockStorage & blocks, BlockDataID f
    }
 }
 
+template<typename FlagField_T>
+void setNonBoundaryCellsToDomain( StructuredBlockStorage & blocks, BlockDataID flagFieldID,
+                                 field::FlagUID fluidFlagID, cell_idx_t numGhostLayers)
+{
+   for( auto blockIt = blocks.begin(); blockIt != blocks.end(); ++blockIt )
+   {
+      auto flagField = blockIt->template getData<FlagField_T>( flagFieldID );
+      auto fluidFlag = flagField->getOrRegisterFlag(fluidFlagID);
+      for( auto it = flagField->beginWithGhostLayerXYZ(numGhostLayers); it != flagField->end(); ++it )
+         if ( *it == 0 )
+            addFlag(it, fluidFlag);
+   }
+}
+
+
 
 } // namespace geometry
 } // namespace walberla
diff --git a/src/gpu/AddGPUFieldToStorage.impl.h b/src/gpu/AddGPUFieldToStorage.impl.h
index e016f93fb47c34f0073d87813c96506bf11f58f8..610b853265cf7b94ad5be81bb1bd9444ce2b008b 100644
--- a/src/gpu/AddGPUFieldToStorage.impl.h
+++ b/src/gpu/AddGPUFieldToStorage.impl.h
@@ -93,5 +93,3 @@ namespace gpu
 
 } // namespace gpu
 } // namespace walberla
-
-
diff --git a/src/gpu/CMakeLists.txt b/src/gpu/CMakeLists.txt
index 1790af12b470b84a9559de60931b88523956a694..a8e58ab49e46aac0914d9b5b8f482855f9b50d2a 100644
--- a/src/gpu/CMakeLists.txt
+++ b/src/gpu/CMakeLists.txt
@@ -35,7 +35,7 @@ target_sources( gpu
       FieldIndexing.impl.h
       Kernel.h
       ParallelStreams.h
-      CudaRAII.h
+      GPURAII.h
       DeviceSelectMPI.cpp
       )
 
diff --git a/src/gpu/ErrorChecking.h b/src/gpu/ErrorChecking.h
index 0d1316eccf17130e717824559a2f0e3051c834b2..a80ef03343712257efcfc512b2fa7062bbcab21c 100644
--- a/src/gpu/ErrorChecking.h
+++ b/src/gpu/ErrorChecking.h
@@ -28,16 +28,18 @@
 #include "gpu/GPUWrapper.h"
 
 namespace walberla {
-namespace gpu
-{
+namespace gpu {
 
 
 #define WALBERLA_GPU_CHECK(ans) { ::walberla::gpu::checkForError((ans), __FILE__, __LINE__); }
+#define WALBERLA_GPU_CHECK_LAST_ERROR() {::walberla::gpu::checkForLastError(__FILE__, __LINE__);}
 
 
 
 inline void checkForError( gpuError_t code, const std::string & callerPath, const int line )
 {
+   // Oftentimes CUDA functions return an error code (if error has occurred) This function converts the error string in human-readable output.
+   // For general error checking use checkForLastError
   if(code != gpuSuccess)
   {
     std::stringstream ss;
@@ -46,6 +48,21 @@ inline void checkForError( gpuError_t code, const std::string & callerPath, cons
   }
 }
 
+#ifndef NDEBUG
+inline void checkForLastError( const std::string & callerPath, const int line )
+{
+   // Forces immediate checking with a synchronizing. This breaks asynchrony/concurrency structure. Thus, only in debug mode executed.
+   gpuError_t code = gpuGetLastError();
+   if(code != gpuSuccess)
+   {
+      std::stringstream ss;
+      ss << "CUDA Error: " << code << " " << cudaGetErrorName(code) << ": " << cudaGetErrorString( code );
+      Abort::instance()->abort( ss.str(), callerPath, line );
+   }
+}
+#else
+inline void checkForLastError( const std::string & /*callerPath*/, const int /*line*/ ){}
+#endif
 
 
 } // namespace gpu
diff --git a/src/gpu/GPUField.h b/src/gpu/GPUField.h
index a286b8dca2cca9a49d2a93455e865744efe649b5..ecc9ccc5b1dfff0468c676b1262247a9df36add9 100755
--- a/src/gpu/GPUField.h
+++ b/src/gpu/GPUField.h
@@ -125,9 +125,9 @@ namespace gpu
       bool operator==( const GPUField & other ) const;
 
       void getGhostRegion( stencil::Direction d, CellInterval & ci,
-                           cell_idx_t thickness, bool fullSlice ) const;
+                           cell_idx_t thickness, bool fullSlice = false ) const;
       void getSliceBeforeGhostLayer(stencil::Direction d, CellInterval & ci,
-                                    cell_idx_t thickness, bool fullSlice ) const
+                                    cell_idx_t thickness, bool fullSlice = false ) const
       {
          getSlice( d, ci, 0, thickness, fullSlice );
       }
@@ -140,6 +140,20 @@ namespace gpu
       T       * dataAt(cell_idx_t x, cell_idx_t y, cell_idx_t z, cell_idx_t f);
       const T * dataAt(cell_idx_t x, cell_idx_t y, cell_idx_t z, cell_idx_t f) const;
 
+      //** TimestepInformation *****************************************************************************************
+      /*! \name TimestepCounter */
+      //@{
+      inline uint8_t advanceTimestep()
+      {
+         timestepCounter_ = (timestepCounter_ + 1) & 1;
+         return timestepCounter_;
+      }
+      inline uint8_t getTimestep() const { return timestepCounter_; }
+      inline uint8_t getTimestepPlusOne() const { return (timestepCounter_ + 1) & 1; }
+      inline bool isEvenTimeStep() const {return (((timestepCounter_) &1) ^ 1); }
+      //@}
+      //****************************************************************************************************************
+
    protected:
       gpuPitchedPtr pitchedPtr_;
       uint_t         nrOfGhostLayers_;
@@ -152,6 +166,7 @@ namespace gpu
       uint_t         fAllocSize_;
       Layout         layout_;
       bool           usePitchedMem_;
+      uint8_t        timestepCounter_;
    };
 
 
diff --git a/src/gpu/GPUField.impl.h b/src/gpu/GPUField.impl.h
index 221440f5c953485f7bf45b30730980d36837cf3a..9c1242aa92dcecf30ff0a1520faf151723ce2fd1 100644
--- a/src/gpu/GPUField.impl.h
+++ b/src/gpu/GPUField.impl.h
@@ -34,7 +34,7 @@ GPUField<T>::GPUField( uint_t _xSize, uint_t _ySize, uint_t _zSize, uint_t _fSiz
                        uint_t _nrOfGhostLayers, const Layout & _layout, bool usePitchedMem )
    : nrOfGhostLayers_( _nrOfGhostLayers ),
      xSize_( _xSize), ySize_( _ySize ), zSize_( _zSize ), fSize_( _fSize ),
-     layout_( _layout ), usePitchedMem_( usePitchedMem )
+     layout_( _layout ), usePitchedMem_( usePitchedMem ), timestepCounter_(0)
 {
    gpuExtent extent;
    if ( layout_ == zyxf )
@@ -61,12 +61,13 @@ GPUField<T>::GPUField( uint_t _xSize, uint_t _ySize, uint_t _zSize, uint_t _fSiz
    }
    else
    {
-      pitchedPtr_ = make_gpuPitchedPtr( nullptr, extent.width, extent.width, extent.height );
+      pitchedPtr_ = make_gpuPitchedPtr(nullptr, extent.width, extent.width, extent.height );
       WALBERLA_GPU_CHECK ( gpuMalloc( &pitchedPtr_.ptr, extent.width * extent.height * extent.depth ) )
    }
 
-   // allocation size is stored in pitched pointer which stores the amount of padded region in bytes
-   // we keep track of the size in #elements
+   // allocation size is stored in pitched pointer
+   // pitched pointer stores the amount of padded region in bytes
+   // but we keep track of the size in #elements
    WALBERLA_ASSERT_EQUAL( pitchedPtr_.pitch % sizeof(T), 0 )
    if ( layout_ == field::fzyx )
    {
diff --git a/src/gpu/CudaRAII.h b/src/gpu/GPURAII.h
similarity index 100%
rename from src/gpu/CudaRAII.h
rename to src/gpu/GPURAII.h
diff --git a/src/gpu/GPUWrapper.h b/src/gpu/GPUWrapper.h
index 1abbc22895d7b9d6284c0a2661bcb14ca1421aeb..48fcc2e1064ce32c525eed2c43896195cf059784 100644
--- a/src/gpu/GPUWrapper.h
+++ b/src/gpu/GPUWrapper.h
@@ -31,6 +31,7 @@
     #define gpuGetErrorName cudaGetErrorName
     #define gpuGetErrorString cudaGetErrorString
     #define gpuPeekAtLastError cudaPeekAtLastError
+    #define gpuGetLastError cudaGetLastError
 
     #define gpuMalloc cudaMalloc
     #define gpuMallocHost cudaMallocHost
@@ -87,6 +88,7 @@
     #define gpuGetErrorName hipGetErrorName
     #define gpuGetErrorString hipGetErrorString
     #define gpuPeekAtLastError hipPeekAtLastError
+    #define gpuGetLastError hipGetLastError
 
     #define gpuMalloc hipMalloc
     #define gpuMallocHost hipHostMalloc
diff --git a/src/gpu/HostFieldAllocator.h b/src/gpu/HostFieldAllocator.h
index 98892aebadc7fd1c4b03ffc38f16bd26631e4123..2b7311addf3a8a7dce8c5804d9bf3ecaee9a7501 100644
--- a/src/gpu/HostFieldAllocator.h
+++ b/src/gpu/HostFieldAllocator.h
@@ -36,7 +36,7 @@ namespace gpu
    * Allocator that allocates a CPU! field using gpuHostAlloc without padding
    *
    * Uses gpuHostAlloc for the allocation - which allocates page-locked memory that is faster to transfer to the GPU
-   * This allocator should be used for CPU fields that are often transfered to GPU and back
+   * This allocator should be used for CPU fields that are often transferred to GPU and back
    *
    * \ingroup gpu
    *
diff --git a/src/gpu/ParallelStreams.h b/src/gpu/ParallelStreams.h
index fd83932766abfe6d0d177a2a09995a4d98f3ff77..0eca060569adf0e404c510f7847d9348f535df0f 100644
--- a/src/gpu/ParallelStreams.h
+++ b/src/gpu/ParallelStreams.h
@@ -19,11 +19,11 @@
 //
 //======================================================================================================================
 #pragma once
-#include "gpu/ErrorChecking.h"
-#include "gpu/CudaRAII.h"
-
 #include <vector>
 
+#include "gpu/ErrorChecking.h"
+#include "gpu/GPURAII.h"
+
 namespace walberla {
 namespace gpu
 {
diff --git a/src/gpu/communication/CMakeLists.txt b/src/gpu/communication/CMakeLists.txt
index 98bbff2016d5b9d3dd6c26de2774888e1e0cc257..7b9c0cced315353be228779dc3e4dfc96764efc7 100644
--- a/src/gpu/communication/CMakeLists.txt
+++ b/src/gpu/communication/CMakeLists.txt
@@ -7,5 +7,7 @@ target_sources( gpu
     GPUPackInfo.h
     CustomMemoryBuffer.h
     UniformGPUScheme.h
-    GeneratedGPUPackInfo.h     
+    NonUniformGPUScheme.h
+    GeneratedGPUPackInfo.h
+    GeneratedNonUniformGPUPackInfo.h
     )
diff --git a/src/gpu/communication/CustomMemoryBuffer.h b/src/gpu/communication/CustomMemoryBuffer.h
index 26a6743f3ef4aacb436a5e08ceff69ed68241ef0..e01e873708d84788fcecfb33a83ab3616b07c752 100644
--- a/src/gpu/communication/CustomMemoryBuffer.h
+++ b/src/gpu/communication/CustomMemoryBuffer.h
@@ -62,7 +62,7 @@ namespace communication {
    class CustomMemoryBuffer
    {
    public:
-      typedef uint8_t ElementType;
+      using ElementType = uint8_t;
 
       explicit CustomMemoryBuffer();
       explicit CustomMemoryBuffer( std::size_t initSize );
@@ -74,6 +74,7 @@ namespace communication {
       inline std::size_t allocSize() const { return std::size_t(end_ - begin_); }
       inline std::size_t size() const { return std::size_t(cur_ - begin_); }
       ElementType *ptr() const { return begin_; }
+      ElementType *cur() const { return cur_; }
 
       inline void clear() { cur_ = begin_; }
 
diff --git a/src/gpu/communication/GPUPackInfo.h b/src/gpu/communication/GPUPackInfo.h
index 3922690445c7f11fb8375596a9f9f740c076727a..c34600f29b2219088c29b0d5ff2e9fb1dc4a1142 100644
--- a/src/gpu/communication/GPUPackInfo.h
+++ b/src/gpu/communication/GPUPackInfo.h
@@ -43,10 +43,7 @@
 #include "gpu/GPUWrapper.h"
 #include "gpu/communication/CustomMemoryBuffer.h"
 
-namespace walberla {
-namespace gpu
-{
-namespace communication {
+namespace walberla::gpu::communication {
 
 
 /**
@@ -299,8 +296,4 @@ uint_t GPUPackInfo<GPUField_T>::numberOfGhostLayersToCommunicate( const GPUField
    }
 }
 
-
-
-} // namespace communication
-} // namespace gpu
-} // namespace walberla
+} // namespace walberla::gpu::communication
diff --git a/src/gpu/communication/GeneratedGPUPackInfo.h b/src/gpu/communication/GeneratedGPUPackInfo.h
index 9ca4afb9c575446ea8734d0cbf302819b5160b05..f5f6c98b60b529045a1877a435fcacacb9359a95 100644
--- a/src/gpu/communication/GeneratedGPUPackInfo.h
+++ b/src/gpu/communication/GeneratedGPUPackInfo.h
@@ -19,28 +19,25 @@
 //
 //======================================================================================================================
 
-
 #pragma once
 #include "domain_decomposition/IBlock.h"
 
-#include "stencil/Directions.h"
-
 #include "gpu/GPUWrapper.h"
 
-namespace walberla {
-namespace gpu
-{
+#include "stencil/Directions.h"
 
+namespace walberla::gpu {
 
 class GeneratedGPUPackInfo
 {
 public:
+  GeneratedGPUPackInfo() = default;
+  virtual ~GeneratedGPUPackInfo() = default;
+
    virtual void pack  ( stencil::Direction dir, unsigned char *buffer, IBlock *block, gpuStream_t stream ) = 0;
+   virtual void communicateLocal  ( stencil::Direction dir, const IBlock *sender, IBlock *receiver, gpuStream_t stream ) = 0;
    virtual void unpack( stencil::Direction dir, unsigned char *buffer, IBlock *block, gpuStream_t stream ) = 0;
    virtual uint_t size( stencil::Direction dir, IBlock *block ) = 0;
 };
 
-
-
-} //namespace gpu
-} //namespace walberla
\ No newline at end of file
+} //namespace walberla::gpu
\ No newline at end of file
diff --git a/src/gpu/communication/GeneratedNonUniformGPUPackInfo.h b/src/gpu/communication/GeneratedNonUniformGPUPackInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..f6b39d9b0fe0dd1b9c90c5d63eb7b8ca00bd3d0f
--- /dev/null
+++ b/src/gpu/communication/GeneratedNonUniformGPUPackInfo.h
@@ -0,0 +1,159 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file GeneratedNonUniformGPUPackInfo.h
+//! \ingroup gpu
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "blockforest/Block.h"
+#include "blockforest/BlockID.h"
+
+#include "gpu/GPUWrapper.h"
+#include "gpu/communication/CustomMemoryBuffer.h"
+
+#include "stencil/Directions.h"
+
+using GpuBuffer_T = walberla::gpu::communication::GPUMemoryBuffer;
+
+
+namespace walberla::gpu {
+
+
+class GeneratedNonUniformGPUPackInfo
+{
+ public:
+   using VoidFunction                  = std::function< void( gpuStream_t) >;
+   GeneratedNonUniformGPUPackInfo() = default;
+   virtual ~GeneratedNonUniformGPUPackInfo() = default;
+
+   virtual bool constantDataExchange() const = 0;
+   virtual bool threadsafeReceiving() const = 0;
+
+   inline void packDataEqualLevel( const Block * sender, stencil::Direction dir, GpuBuffer_T & buffer) const;
+   virtual void unpackDataEqualLevel( Block * receiver, stencil::Direction dir, GpuBuffer_T & buffer) = 0;
+   virtual void communicateLocalEqualLevel( const Block * sender, Block * receiver, stencil::Direction dir, gpuStream_t stream) = 0;
+   virtual void getLocalEqualLevelCommFunction( std::vector< VoidFunction >& commFunctions, const Block * sender, Block * receiver, stencil::Direction dir) = 0;
+
+   inline  void packDataCoarseToFine        ( const Block * coarseSender, const BlockID & fineReceiver, stencil::Direction dir, GpuBuffer_T & buffer) const;
+   virtual void unpackDataCoarseToFine      (       Block * fineReceiver, const BlockID & coarseSender, stencil::Direction dir, GpuBuffer_T & buffer) = 0;
+   virtual void communicateLocalCoarseToFine( const Block * coarseSender, Block * fineReceiver, stencil::Direction dir ) = 0;
+   virtual void communicateLocalCoarseToFine( const Block * coarseSender, Block * fineReceiver, stencil::Direction dir, GpuBuffer_T & buffer, gpuStream_t stream) = 0;
+   virtual void getLocalCoarseToFineCommFunction( std::vector< VoidFunction >& commFunctions, const Block * coarseSender, Block * fineReceiver, stencil::Direction dir, GpuBuffer_T & buffer) = 0;
+
+   inline  void packDataFineToCoarse        ( const Block * fineSender,     const BlockID & coarseReceiver, stencil::Direction dir, GpuBuffer_T & buffer) const;
+   virtual void unpackDataFineToCoarse      (       Block * coarseReceiver, const BlockID & fineSender,     stencil::Direction dir, GpuBuffer_T & buffer) = 0;
+   virtual void communicateLocalFineToCoarse( const Block * fineSender, Block * coarseReceiver, stencil::Direction dir) = 0;
+   virtual void communicateLocalFineToCoarse( const Block * fineSender, Block * coarseReceiver, stencil::Direction dir, GpuBuffer_T & buffer, gpuStream_t stream) = 0;
+   virtual void getLocalFineToCoarseCommFunction( std::vector< VoidFunction >& commFunctions, const Block * fineSender, Block * coarseReceiver, stencil::Direction dir, GpuBuffer_T & buffer) = 0;
+
+   virtual uint_t sizeEqualLevelSend( const Block * sender, stencil::Direction dir) = 0;
+   virtual uint_t sizeCoarseToFineSend ( const Block * coarseSender, const BlockID & fineReceiver, stencil::Direction dir) = 0;
+   virtual uint_t sizeFineToCoarseSend ( const Block * fineSender, stencil::Direction dir) = 0;
+
+
+#ifndef NDEBUG
+   void clearBufferSizeCheckMap() { bufferSize_.clear(); }
+#endif
+
+ protected:
+   virtual void packDataEqualLevelImpl(const Block* sender, stencil::Direction dir, GpuBuffer_T & buffer) const = 0;
+   virtual void packDataCoarseToFineImpl(const Block* coarseSender, const BlockID& fineReceiver, stencil::Direction dir, GpuBuffer_T & buffer) const = 0;
+   virtual void packDataFineToCoarseImpl(const Block* fineSender, const BlockID& coarseReceiver, stencil::Direction dir, GpuBuffer_T & buffer) const = 0;
+
+#ifndef NDEBUG
+   mutable std::map< const Block *, std::map< stencil::Direction, std::map< uint_t, size_t > > > bufferSize_;
+#endif
+
+};
+
+inline void GeneratedNonUniformGPUPackInfo::packDataEqualLevel( const Block * sender, stencil::Direction dir, GpuBuffer_T & buffer ) const
+{
+#ifndef NDEBUG
+   size_t const sizeBefore = buffer.size();
+#endif
+
+   packDataEqualLevelImpl( sender, dir, buffer );
+
+#ifndef NDEBUG
+size_t const sizeAfter = buffer.size();
+if( constantDataExchange() )
+{
+      auto & blockMap = bufferSize_[ sender ];
+      auto & sizeMap  = blockMap[ dir ];
+      auto dirEntry = sizeMap.find( uint_t(0) );
+      if( dirEntry == sizeMap.end() )
+         sizeMap[ uint_t(0) ] = sizeAfter - sizeBefore;
+      else
+         WALBERLA_ASSERT_EQUAL( sizeMap[ uint_t(0) ], (sizeAfter - sizeBefore) )
+}
+#endif
+}
+
+
+
+inline void GeneratedNonUniformGPUPackInfo::packDataCoarseToFine( const Block * coarseSender, const BlockID & fineReceiver, stencil::Direction dir, GpuBuffer_T & buffer ) const
+{
+#ifndef NDEBUG
+   size_t const sizeBefore = buffer.size();
+#endif
+
+   packDataCoarseToFineImpl( coarseSender, fineReceiver, dir, buffer );
+
+#ifndef NDEBUG
+size_t const sizeAfter = buffer.size();
+if( constantDataExchange() )
+{
+      auto & blockMap = bufferSize_[ coarseSender ];
+      auto & sizeMap  = blockMap[ dir ];
+      auto dirEntry = sizeMap.find( fineReceiver.getBranchId() );
+      if( dirEntry == sizeMap.end() )
+         sizeMap[ fineReceiver.getBranchId() ] = sizeAfter - sizeBefore;
+      else
+         WALBERLA_ASSERT_EQUAL( sizeMap[ fineReceiver.getBranchId() ], (sizeAfter - sizeBefore) )
+}
+#endif
+}
+
+
+
+inline void GeneratedNonUniformGPUPackInfo::packDataFineToCoarse( const Block * fineSender, const BlockID & coarseReceiver, stencil::Direction dir, GpuBuffer_T & buffer ) const
+{
+#ifndef NDEBUG
+   size_t const sizeBefore = buffer.size();
+#endif
+
+   packDataFineToCoarseImpl( fineSender, coarseReceiver, dir, buffer );
+
+#ifndef NDEBUG
+size_t const sizeAfter = buffer.size();
+if( constantDataExchange() )
+{
+      auto & blockMap = bufferSize_[ fineSender ];
+      auto & sizeMap  = blockMap[ dir ];
+      auto dirEntry = sizeMap.find( uint_t(0) );
+      if( dirEntry == sizeMap.end() )
+         sizeMap[ uint_t(0) ] = sizeAfter - sizeBefore;
+      else
+         WALBERLA_ASSERT_EQUAL( sizeMap[ uint_t(0) ], (sizeAfter - sizeBefore) )
+}
+#endif
+}
+
+
+} //namespace walberla::gpu
\ No newline at end of file
diff --git a/src/gpu/communication/MemcpyPackInfo.h b/src/gpu/communication/MemcpyPackInfo.h
index c5e58d2a395d71337409f9e8da4b5e039a79dd6c..6c15988f4f2687275fea7f0f8be36b2e7d99fcf6 100644
--- a/src/gpu/communication/MemcpyPackInfo.h
+++ b/src/gpu/communication/MemcpyPackInfo.h
@@ -11,20 +11,17 @@
 #include "gpu/GPUWrapper.h"
 #include "gpu/communication/GeneratedGPUPackInfo.h"
 
-namespace walberla {
-namespace gpu
-{
-namespace communication {
+namespace walberla::gpu::communication {
 
 template<typename GPUFieldType>
 class MemcpyPackInfo : public ::walberla::gpu::GeneratedGPUPackInfo
 {
 public:
-    MemcpyPackInfo( BlockDataID pdfsID_ )
-        : pdfsID(pdfsID_) {};
-    virtual ~MemcpyPackInfo() = default;
+    MemcpyPackInfo( BlockDataID pdfsID_ ) : pdfsID(pdfsID_) {};
+    ~MemcpyPackInfo() override = default;
 
     void pack  (stencil::Direction dir, unsigned char * buffer, IBlock * block, gpuStream_t stream) override;
+    void communicateLocal  ( stencil::Direction dir, const IBlock *sender, IBlock *receiver, gpuStream_t stream ) override;
     void unpack(stencil::Direction dir, unsigned char * buffer, IBlock * block, gpuStream_t stream) override;
     uint_t size(stencil::Direction dir, IBlock * block) override;
 
@@ -36,8 +33,6 @@ private:
     uint_t numberOfGhostLayersToCommunicate( const GPUFieldType * const field ) const;
 };
 
-} // namespace communication
-} // namespace gpu
-} // namespace walberla
+} // namespace walberla::gpu::communication
 
 #include "MemcpyPackInfo.impl.h"
diff --git a/src/gpu/communication/MemcpyPackInfo.impl.h b/src/gpu/communication/MemcpyPackInfo.impl.h
index 486871d4e0e7563e8b890b91bfc5aa814775d74a..2110933cda5322828f40cc14b471be5c6a309bfe 100644
--- a/src/gpu/communication/MemcpyPackInfo.impl.h
+++ b/src/gpu/communication/MemcpyPackInfo.impl.h
@@ -23,7 +23,7 @@ void MemcpyPackInfo< GPUFieldType >::pack(stencil::Direction dir, unsigned char
    WALBERLA_ASSERT_NOT_NULLPTR( fieldPtr )
    // 
    cell_idx_t nrOfGhostLayers = cell_idx_c( numberOfGhostLayersToCommunicate( fieldPtr ) );
-   CellInterval fieldCi = field::getGhostRegion( *fieldPtr, dir, nrOfGhostLayers, false );
+   CellInterval fieldCi = field::getSliceBeforeGhostLayer( *fieldPtr, dir, nrOfGhostLayers, false );
 
    // Base offsets into the buffer and GPUField, respectively
    auto dstOffset = std::make_tuple( uint_c(0), uint_c(0), uint_c(0), uint_c(0) );
@@ -65,6 +65,65 @@ void MemcpyPackInfo< GPUFieldType >::pack(stencil::Direction dir, unsigned char
    }
 }
 
+template<typename GPUFieldType>
+void MemcpyPackInfo< GPUFieldType >::communicateLocal( stencil::Direction dir, const IBlock* sender, IBlock* receiver, gpuStream_t stream )
+{
+   // WALBERLA_ABORT("The MemcpyPackInfo does not provide a thread safe local communication. Thus is can not be used in local mode. To use it set local useLocalCommunication to false in the communication scheme")
+
+
+   // Extract field data pointer from the block
+   const GPUFieldType * senderFieldPtr = sender->getData< GPUFieldType >( pdfsID );
+   const GPUFieldType * receiverFieldPtr = receiver->getData< GPUFieldType >( pdfsID );
+   WALBERLA_ASSERT_NOT_NULLPTR( senderFieldPtr )
+   WALBERLA_ASSERT_NOT_NULLPTR( receiverFieldPtr )
+
+   //
+   cell_idx_t nrOfGhostLayers = cell_idx_c( numberOfGhostLayersToCommunicate( senderFieldPtr ) );
+   WALBERLA_ASSERT_EQUAL(nrOfGhostLayers, cell_idx_c( numberOfGhostLayersToCommunicate( receiverFieldPtr )))
+   WALBERLA_ASSERT_EQUAL(senderFieldPtr->layout(), receiverFieldPtr->layout() )
+   WALBERLA_ASSERT_EQUAL(senderFieldPtr->fSize(), receiverFieldPtr->fSize() )
+
+   CellInterval senderCi = field::getSliceBeforeGhostLayer( *senderFieldPtr, dir, nrOfGhostLayers, false );
+   CellInterval receiverCi = field::getGhostRegion( *receiverFieldPtr, stencil::inverseDir[dir], nrOfGhostLayers, false );
+
+   // Base offsets into the buffer and GPUField, respectively
+   auto srcOffset = std::make_tuple( uint_c(senderCi.xMin() + nrOfGhostLayers),
+                                     uint_c(senderCi.yMin() + nrOfGhostLayers),
+                                     uint_c(senderCi.zMin() + nrOfGhostLayers),
+                                     uint_c(0) );
+
+   auto dstOffset = std::make_tuple( uint_c(receiverCi.xMin() + nrOfGhostLayers),
+                                     uint_c(receiverCi.yMin() + nrOfGhostLayers),
+                                     uint_c(receiverCi.zMin() + nrOfGhostLayers),
+                                     uint_c(0) );
+
+
+   // Size of data to pack, in terms of elements of the field
+   auto intervalSize = std::make_tuple( senderCi.xSize(), senderCi.ySize(),
+                                        senderCi.zSize(), senderFieldPtr->fSize() );
+
+   WALBERLA_ASSERT_EQUAL(intervalSize, std::make_tuple( receiverCi.xSize(), receiverCi.ySize(), receiverCi.zSize(), receiverFieldPtr->fSize() ))
+
+   if ( senderFieldPtr->layout() == field::fzyx )
+   {
+      const uint_t dstAllocSizeZ = receiverFieldPtr->zAllocSize();
+      const uint_t srcAllocSizeZ = senderFieldPtr->zAllocSize();
+
+      copyDevToDevFZYX( receiverFieldPtr->pitchedPtr(), senderFieldPtr->pitchedPtr(), dstOffset, srcOffset,
+                       dstAllocSizeZ, srcAllocSizeZ, sizeof(typename GPUFieldType::value_type),
+                       intervalSize, stream );
+   }
+   else
+   {
+      const uint_t dstAllocSizeZ = receiverFieldPtr->yAllocSize();
+      const uint_t srcAllocSizeZ = senderFieldPtr->yAllocSize();
+
+      copyDevToDevZYXF( receiverFieldPtr->pitchedPtr(), senderFieldPtr->pitchedPtr(), dstOffset, srcOffset,
+                       dstAllocSizeZ, srcAllocSizeZ, sizeof(typename GPUFieldType::value_type),
+                       intervalSize, stream );
+   }
+}
+
 template<typename GPUFieldType>
 void MemcpyPackInfo< GPUFieldType >::unpack(stencil::Direction dir, unsigned char * byte_buffer,
                                             IBlock * block, gpuStream_t stream)
@@ -75,7 +134,6 @@ void MemcpyPackInfo< GPUFieldType >::unpack(stencil::Direction dir, unsigned cha
    cell_idx_t nrOfGhostLayers = cell_idx_c( numberOfGhostLayersToCommunicate( fieldPtr ) );
 
    CellInterval fieldCi = field::getGhostRegion( *fieldPtr, dir, nrOfGhostLayers, false );
-
    auto dstOffset = std::make_tuple( uint_c(fieldCi.xMin() + nrOfGhostLayers),
                                      uint_c(fieldCi.yMin() + nrOfGhostLayers),
                                      uint_c(fieldCi.zMin() + nrOfGhostLayers),
@@ -208,7 +266,7 @@ uint_t MemcpyPackInfo< GPUFieldType >::size(stencil::Direction dir, IBlock * blo
 
     return ci.numCells() * elementsPerCell * sizeof(typename GPUFieldType::value_type);
     */
-    uint_t totalCells = ci.xSize() * ci.ySize() * ci.zSize() * pdfs->fSize() * sizeof(typename GPUFieldType::value_type);
+    uint_t totalCells = ci.numCells() * pdfs->fSize() * sizeof(typename GPUFieldType::value_type);
     return totalCells;
 }
 
diff --git a/src/gpu/communication/NonUniformGPUScheme.h b/src/gpu/communication/NonUniformGPUScheme.h
new file mode 100644
index 0000000000000000000000000000000000000000..28a8fd0d0b7534e30957e938389f57d28d7ebeef
--- /dev/null
+++ b/src/gpu/communication/NonUniformGPUScheme.h
@@ -0,0 +1,961 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NonUniformGPUScheme.h
+//! \ingroup gpu
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "blockforest/StructuredBlockForest.h"
+
+#include "core/mpi/BufferSystem.h"
+#include "core/mpi/MPIWrapper.h"
+
+#include "domain_decomposition/IBlock.h"
+
+#include "stencil/Directions.h"
+
+#include <thread>
+
+#include "gpu/ErrorChecking.h"
+#include "gpu/GPURAII.h"
+#include "gpu/GPUWrapper.h"
+#include "gpu/ParallelStreams.h"
+#include "gpu/communication/CustomMemoryBuffer.h"
+#include "gpu/communication/GeneratedNonUniformGPUPackInfo.h"
+
+namespace walberla::gpu::communication
+{
+
+template< typename Stencil >
+class NonUniformGPUScheme
+{
+ public:
+   enum INDEX { EQUAL_LEVEL = 0, COARSE_TO_FINE = 1, FINE_TO_COARSE = 2 };
+
+   using CpuBuffer_T = walberla::gpu::communication::PinnedMemoryBuffer;
+   using GpuBuffer_T = walberla::gpu::communication::GPUMemoryBuffer;
+
+   explicit NonUniformGPUScheme(const weak_ptr< StructuredBlockForest >& bf, bool sendDirectlyFromGPU = false,
+                                const int tag = 5432);
+
+   explicit NonUniformGPUScheme(const weak_ptr< StructuredBlockForest >& bf, const Set< SUID >& requiredBlockSelectors,
+                                const Set< SUID >& incompatibleBlockSelectors, bool sendDirectlyFromGPU = false,
+                                const int tag = 5432);
+
+   ~NonUniformGPUScheme();
+
+   //** Pack Info Registration *****************************************************************************************
+   /*! \name Pack Info Registration */
+   //@{
+   void addPackInfo(const shared_ptr< GeneratedNonUniformGPUPackInfo >& pi);
+   //@}
+   //*******************************************************************************************************************
+
+   inline void communicateEqualLevel(const uint_t level);
+   inline void communicateCoarseToFine(const uint_t fineLevel);
+   inline void communicateFineToCoarse(const uint_t fineLevel);
+
+   std::function<void()>  communicateEqualLevelFunctor(const uint_t level) {
+      return [level, this](){ NonUniformGPUScheme::communicateEqualLevel(level);};
+   }
+   std::function<void()>  communicateCoarseToFineFunctor(const uint_t fineLevel) {
+      return [fineLevel, this](){ NonUniformGPUScheme::communicateCoarseToFine(fineLevel);};
+   }
+   std::function<void()>  communicateFineToCoarseFunctor(const uint_t fineLevel) {
+      return [fineLevel, this](){ NonUniformGPUScheme::communicateFineToCoarse(fineLevel);};
+   }
+
+   inline void startCommunicateEqualLevel(const uint_t level);
+   inline void startCommunicateCoarseToFine(const uint_t fineLevel);
+   inline void startCommunicateFineToCoarse(const uint_t fineLevel);
+
+   inline void waitCommunicateEqualLevel(const uint_t level);
+   inline void waitCommunicateCoarseToFine(const uint_t fineLevel);
+   inline void waitCommunicateFineToCoarse(const uint_t fineLevel);
+
+ private:
+   void setupCommunication();
+
+   void init();
+   void refresh();
+
+   bool isAnyCommunicationInProgress() const;
+
+   void startCommunicationEqualLevel(const uint_t index, std::set< uint_t >& participatingLevels);
+   void startCommunicationCoarseToFine(const uint_t index, const uint_t coarsestLevel);
+   void startCommunicationFineToCoarse(const uint_t index, const uint_t finestLevel);
+
+   weak_ptr< StructuredBlockForest > blockForest_;
+   uint_t forestModificationStamp_;
+
+   std::vector< std::vector< bool > > communicationInProgress_;
+   bool sendFromGPU_;
+   int baseTag_;
+
+   std::vector< std::vector< mpi::GenericBufferSystem< CpuBuffer_T, CpuBuffer_T > > > bufferSystemCPU_;
+   std::vector< std::vector< mpi::GenericBufferSystem< GpuBuffer_T, GpuBuffer_T > > > bufferSystemGPU_;
+   std::vector< std::vector< GpuBuffer_T > > localBuffer_;
+
+   std::vector< shared_ptr< GeneratedNonUniformGPUPackInfo > > packInfos_;
+
+   ParallelStreams parallelSectionManager_;
+
+   struct Header
+   {
+      BlockID receiverId;
+      BlockID senderId;
+      stencil::Direction dir;
+   };
+   std::vector< std::vector< std::map< mpi::MPIRank, std::vector< Header > > > > headers_;
+
+   Set< SUID > requiredBlockSelectors_;
+   Set< SUID > incompatibleBlockSelectors_;
+};
+
+template< typename Stencil >
+NonUniformGPUScheme< Stencil >::NonUniformGPUScheme(const weak_ptr< StructuredBlockForest >& bf, bool sendDirectlyFromGPU,
+                                                    const int tag)
+   : blockForest_(bf), sendFromGPU_(sendDirectlyFromGPU), baseTag_(tag), parallelSectionManager_(-1),
+     requiredBlockSelectors_(Set< SUID >::emptySet()), incompatibleBlockSelectors_(Set< SUID >::emptySet())
+{
+   init();
+}
+
+template< typename Stencil >
+NonUniformGPUScheme< Stencil >::NonUniformGPUScheme(const weak_ptr< StructuredBlockForest >& bf,
+                                                    const Set< SUID >& requiredBlockSelectors,
+                                                    const Set< SUID >& incompatibleBlockSelectors,
+                                                    bool sendDirectlyFromGPU, const int tag)
+   : blockForest_(bf), requiredBlockSelectors_(requiredBlockSelectors),
+     incompatibleBlockSelectors_(incompatibleBlockSelectors), sendFromGPU_(sendDirectlyFromGPU), baseTag_(tag),
+     parallelSectionManager_(-1)
+{
+   init();
+}
+
+template< typename Stencil >
+void NonUniformGPUScheme< Stencil >::init()
+{
+   bufferSystemCPU_.resize(3);
+   bufferSystemGPU_.resize(3);
+   localBuffer_.resize(3);
+   headers_.resize(3);
+
+   communicationInProgress_.resize(3);
+
+   refresh();
+}
+
+template< typename Stencil >
+void NonUniformGPUScheme< Stencil >::refresh()
+{
+   WALBERLA_ASSERT(!isAnyCommunicationInProgress())
+
+   auto forest = blockForest_.lock();
+   WALBERLA_CHECK_NOT_NULLPTR(forest,
+                              "Trying to access communication for a block storage object that doesn't exist anymore")
+   const uint_t levels = forest->getNumberOfLevels();
+
+   for (uint_t i = 0; i != 3; ++i)
+   {
+      bufferSystemCPU_[i].clear();
+      bufferSystemGPU_[i].clear();
+      localBuffer_[i].clear();
+      headers_[i].clear();
+      headers_[i].resize(size_t(levels + uint_t(1)));
+
+      for (uint_t j = 0; j <= levels; ++j)
+      {
+         headers_[i][j].clear();
+         bufferSystemCPU_[i].emplace_back(
+            mpi::MPIManager::instance()->comm(), baseTag_ + int_c(i * levels + j));
+         bufferSystemGPU_[i].emplace_back(
+            mpi::MPIManager::instance()->comm(), baseTag_ + int_c(i * levels + j));
+         localBuffer_[i].emplace_back();
+      }
+
+      communicationInProgress_[i].resize(size_t(levels + uint_t(1)), false);
+   }
+
+#ifndef NDEBUG
+   for (auto p = packInfos_.begin(); p != packInfos_.end(); ++p)
+      (*p)->clearBufferSizeCheckMap();
+#endif
+
+   forestModificationStamp_ = forest->getBlockForest().getModificationStamp();
+}
+
+template< typename Stencil >
+inline void NonUniformGPUScheme< Stencil >::communicateEqualLevel(const uint_t level)
+{
+   startCommunicateEqualLevel(level);
+   waitCommunicateEqualLevel(level);
+}
+
+template< typename Stencil >
+inline void NonUniformGPUScheme< Stencil >::communicateCoarseToFine(const uint_t fineLevel)
+{
+   startCommunicateCoarseToFine(fineLevel);
+   waitCommunicateCoarseToFine(fineLevel);
+}
+
+template< typename Stencil >
+inline void NonUniformGPUScheme< Stencil >::communicateFineToCoarse(const uint_t fineLevel)
+{
+   startCommunicateFineToCoarse(fineLevel);
+   waitCommunicateFineToCoarse(fineLevel);
+}
+
+template< typename Stencil >
+inline void NonUniformGPUScheme< Stencil >::startCommunicateEqualLevel(const uint_t level)
+{
+   auto forest = blockForest_.lock();
+   WALBERLA_CHECK_NOT_NULLPTR(forest,
+                              "Trying to access communication for a block storage object that doesn't exist anymore")
+   WALBERLA_ASSERT_LESS(level, forest->getNumberOfLevels())
+
+   if (forestModificationStamp_ != forest->getBlockForest().getModificationStamp()) refresh();
+
+   std::set< uint_t > participatingLevels;
+   participatingLevels.insert(level);
+
+   startCommunicationEqualLevel(level, participatingLevels);
+}
+
+template< typename Stencil >
+inline void NonUniformGPUScheme< Stencil >::startCommunicateCoarseToFine(const uint_t fineLevel)
+{
+   auto forest = blockForest_.lock();
+   WALBERLA_CHECK_NOT_NULLPTR(forest,
+                              "Trying to access communication for a block storage object that doesn't exist anymore")
+   WALBERLA_ASSERT_GREATER(fineLevel, uint_t(0))
+   WALBERLA_ASSERT_LESS(fineLevel, forest->getNumberOfLevels())
+
+   if (forestModificationStamp_ != forest->getBlockForest().getModificationStamp()) refresh();
+
+   const uint_t coarsestLevel = fineLevel - uint_t(1);
+
+   startCommunicationCoarseToFine(fineLevel, coarsestLevel);
+}
+
+template< typename Stencil >
+inline void NonUniformGPUScheme< Stencil >::startCommunicateFineToCoarse(const uint_t fineLevel)
+{
+   auto forest = blockForest_.lock();
+   WALBERLA_CHECK_NOT_NULLPTR(forest,
+                              "Trying to access communication for a block storage object that doesn't exist anymore")
+   WALBERLA_ASSERT_GREATER(fineLevel, uint_t(0))
+   WALBERLA_ASSERT_LESS(fineLevel, forest->getNumberOfLevels())
+
+   if (forestModificationStamp_ != forest->getBlockForest().getModificationStamp()) refresh();
+
+   const uint_t finestLevel   = fineLevel;
+
+   startCommunicationFineToCoarse(fineLevel, finestLevel);
+}
+
+template< typename Stencil >
+void NonUniformGPUScheme< Stencil >::startCommunicationEqualLevel(const uint_t index,
+                                                                  std::set< uint_t >& participatingLevels)
+{
+   if (packInfos_.empty()) return;
+
+   WALBERLA_ASSERT(!communicationInProgress_[EQUAL_LEVEL][index])
+   communicationInProgress_[EQUAL_LEVEL][index] = true;
+
+   auto forest = blockForest_.lock();
+
+   // Schedule Receives
+   if (sendFromGPU_)
+      bufferSystemGPU_[EQUAL_LEVEL][index].scheduleReceives();
+   else
+      bufferSystemCPU_[EQUAL_LEVEL][index].scheduleReceives();
+
+   if (!sendFromGPU_)
+      for (auto it : headers_[EQUAL_LEVEL][index])
+         bufferSystemGPU_[EQUAL_LEVEL][index].sendBuffer(it.first).clear();
+
+   // Start filling send buffers
+   {
+      for (auto& iBlock : *forest)
+      {
+         auto senderBlock = dynamic_cast< Block* >(&iBlock);
+
+         if (!selectable::isSetSelected(senderBlock->getState(), requiredBlockSelectors_, incompatibleBlockSelectors_))
+            continue;
+
+         if (participatingLevels.find(senderBlock->getLevel()) == participatingLevels.end())
+            continue;
+
+         for (auto dir = Stencil::beginNoCenter(); dir != Stencil::end(); ++dir)
+         {
+            const auto neighborIdx = blockforest::getBlockNeighborhoodSectionIndex(*dir);
+
+            if (!(senderBlock->neighborhoodSectionHasEquallySizedBlock(neighborIdx)))
+               continue;
+            WALBERLA_ASSERT_EQUAL(senderBlock->getNeighborhoodSectionSize(neighborIdx), uint_t(1))
+            if (!selectable::isSetSelected(senderBlock->getNeighborState(neighborIdx, uint_t(0)),requiredBlockSelectors_, incompatibleBlockSelectors_))
+               continue;
+
+            if( senderBlock->neighborExistsLocally( neighborIdx, uint_t(0) ) )
+            {
+               auto receiverBlock = dynamic_cast< Block * >( forest->getBlock( senderBlock->getNeighborId( neighborIdx, uint_t(0) )) );
+               for (auto& pi : packInfos_)
+               {
+                  pi->communicateLocalEqualLevel(senderBlock, receiverBlock, *dir, nullptr);
+               }
+            }
+            else
+            {
+               auto nProcess              = mpi::MPIRank(senderBlock->getNeighborProcess(neighborIdx, uint_t(0)));
+               GpuBuffer_T& gpuDataBuffer = bufferSystemGPU_[EQUAL_LEVEL][index].sendBuffer(nProcess);
+
+               for (auto& pi : packInfos_)
+               {
+                  WALBERLA_ASSERT_NOT_NULLPTR(gpuDataBuffer.cur())
+                  WALBERLA_ASSERT_GREATER_EQUAL(gpuDataBuffer.allocSize() - gpuDataBuffer.size(), pi->sizeEqualLevelSend(senderBlock, *dir))
+
+                  pi->packDataEqualLevel(senderBlock, *dir, gpuDataBuffer);
+
+                  if (!sendFromGPU_)
+                  {
+                     auto gpuDataPtr = gpuDataBuffer.cur();
+                     auto size = pi->sizeEqualLevelSend(senderBlock, *dir);
+                     auto cpuDataPtr = bufferSystemCPU_[EQUAL_LEVEL][index].sendBuffer(nProcess).advanceNoResize(size);
+                     WALBERLA_ASSERT_NOT_NULLPTR(cpuDataPtr)
+                     WALBERLA_GPU_CHECK(gpuMemcpyAsync(cpuDataPtr, gpuDataPtr, size, gpuMemcpyDeviceToHost))
+                  }
+               }
+            }
+         }
+      }
+   }
+
+   // wait for packing to finish
+   WALBERLA_GPU_CHECK( gpuDeviceSynchronize() )
+
+
+   if (sendFromGPU_)
+      bufferSystemGPU_[EQUAL_LEVEL][index].sendAll();
+   else
+      bufferSystemCPU_[EQUAL_LEVEL][index].sendAll();
+
+   communicationInProgress_[EQUAL_LEVEL][index] = true;
+}
+
+template< typename Stencil >
+void NonUniformGPUScheme< Stencil >::startCommunicationCoarseToFine(const uint_t index, const uint_t coarsestLevel)
+{
+   if (packInfos_.empty()) return;
+   WALBERLA_ASSERT(!communicationInProgress_[COARSE_TO_FINE][index])
+   communicationInProgress_[COARSE_TO_FINE][index] = true;
+
+   auto forest = blockForest_.lock();
+
+   // Schedule Receives
+   if (sendFromGPU_)
+      bufferSystemGPU_[COARSE_TO_FINE][index].scheduleReceives();
+   else
+      bufferSystemCPU_[COARSE_TO_FINE][index].scheduleReceives();
+
+   if (!sendFromGPU_)
+      for (auto it : headers_[COARSE_TO_FINE][index])
+         bufferSystemGPU_[COARSE_TO_FINE][index].sendBuffer(it.first).clear();
+
+   // Start filling send buffers
+   {
+      for (auto& iBlock : *forest)
+      {
+         auto coarseBlock = dynamic_cast< Block* >(&iBlock);
+         auto nLevel      = coarseBlock->getLevel();
+
+         if (!selectable::isSetSelected(coarseBlock->getState(), requiredBlockSelectors_, incompatibleBlockSelectors_))
+            continue;
+
+         if (nLevel != coarsestLevel) continue;
+
+         for (auto dir = Stencil::beginNoCenter(); dir != Stencil::end(); ++dir)
+         {
+            const auto neighborIdx = blockforest::getBlockNeighborhoodSectionIndex(*dir);
+
+            if (coarseBlock->getNeighborhoodSectionSize(neighborIdx) == uint_t(0)) continue;
+            if (!(coarseBlock->neighborhoodSectionHasSmallerBlocks(neighborIdx))) continue;
+
+            for (uint_t n = 0; n != coarseBlock->getNeighborhoodSectionSize(neighborIdx); ++n)
+            {
+               const BlockID& fineReceiverId = coarseBlock->getNeighborId(neighborIdx, n);
+               if (!selectable::isSetSelected(coarseBlock->getNeighborState(neighborIdx, n), requiredBlockSelectors_,
+                                              incompatibleBlockSelectors_))
+                  continue;
+
+               if( coarseBlock->neighborExistsLocally( neighborIdx, n ) )
+               {
+                  auto fineReceiverBlock = dynamic_cast< Block * >( forest->getBlock( fineReceiverId ) );
+                  //                  for (auto& pi : packInfos_)
+                  //                  {
+                  //                     pi->communicateLocalCoarseToFine(coarseBlock, fineReceiverBlock, *dir);
+                  //                  }
+
+                  GpuBuffer_T& gpuDataBuffer = localBuffer_[COARSE_TO_FINE][index];
+                  WALBERLA_ASSERT_NOT_NULLPTR(gpuDataBuffer.cur())
+
+                  for (auto& pi : packInfos_)
+                  {
+                     WALBERLA_ASSERT_GREATER_EQUAL(gpuDataBuffer.allocSize() - gpuDataBuffer.size(), pi->sizeCoarseToFineSend(coarseBlock, fineReceiverId, *dir))
+                     pi->communicateLocalCoarseToFine(coarseBlock, fineReceiverBlock, *dir, gpuDataBuffer, nullptr);
+                  }
+               }
+               else
+               {
+                  auto nProcess              = mpi::MPIRank(coarseBlock->getNeighborProcess(neighborIdx, n));
+                  GpuBuffer_T& gpuDataBuffer = bufferSystemGPU_[COARSE_TO_FINE][index].sendBuffer(nProcess);
+                  for (auto& pi : packInfos_)
+                  {
+                     WALBERLA_ASSERT_NOT_NULLPTR(gpuDataBuffer.cur())
+                     WALBERLA_ASSERT_GREATER_EQUAL(gpuDataBuffer.allocSize() - gpuDataBuffer.size(), pi->sizeCoarseToFineSend(coarseBlock, fineReceiverId, *dir))
+
+                     pi->packDataCoarseToFine(coarseBlock, fineReceiverId, *dir, gpuDataBuffer);
+
+                     if (!sendFromGPU_)
+                     {
+                        auto gpuDataPtr = gpuDataBuffer.cur();
+                        auto size = pi->sizeCoarseToFineSend(coarseBlock, fineReceiverId, *dir);
+                        auto cpuDataPtr =
+                           bufferSystemCPU_[COARSE_TO_FINE][index].sendBuffer(nProcess).advanceNoResize(size);
+                        WALBERLA_ASSERT_NOT_NULLPTR(cpuDataPtr)
+                        WALBERLA_GPU_CHECK(gpuMemcpyAsync(cpuDataPtr, gpuDataPtr, size, gpuMemcpyDeviceToHost))
+                     }
+                  }
+               }
+            }
+         }
+         localBuffer_[COARSE_TO_FINE][index].clear();
+      }
+   }
+
+   // wait for packing to finish
+   WALBERLA_GPU_CHECK( gpuDeviceSynchronize() )
+
+   if (sendFromGPU_)
+      bufferSystemGPU_[COARSE_TO_FINE][index].sendAll();
+   else
+      bufferSystemCPU_[COARSE_TO_FINE][index].sendAll();
+
+   communicationInProgress_[COARSE_TO_FINE][index] = true;
+}
+
+template< typename Stencil >
+void NonUniformGPUScheme< Stencil >::startCommunicationFineToCoarse(const uint_t index, const uint_t finestLevel)
+{
+   if (packInfos_.empty()) return;
+
+   WALBERLA_ASSERT(!communicationInProgress_[FINE_TO_COARSE][index])
+
+   communicationInProgress_[FINE_TO_COARSE][index] = true;
+
+   auto forest = blockForest_.lock();
+
+   // Schedule Receives
+   if (sendFromGPU_)
+      bufferSystemGPU_[FINE_TO_COARSE][index].scheduleReceives();
+   else
+      bufferSystemCPU_[FINE_TO_COARSE][index].scheduleReceives();
+
+   if (!sendFromGPU_)
+      for (auto it : headers_[FINE_TO_COARSE][index])
+         bufferSystemGPU_[FINE_TO_COARSE][index].sendBuffer(it.first).clear();
+
+   // Start filling send buffers
+   {
+      for (auto& iBlock : *forest)
+      {
+         auto fineBlock = dynamic_cast< Block* >(&iBlock);
+         auto nLevel    = fineBlock->getLevel();
+
+         if (!selectable::isSetSelected(fineBlock->getState(), requiredBlockSelectors_, incompatibleBlockSelectors_))
+            continue;
+
+         if (nLevel != finestLevel) continue;
+
+         for (auto dir = Stencil::beginNoCenter(); dir != Stencil::end(); ++dir)
+         {
+            const auto neighborIdx = blockforest::getBlockNeighborhoodSectionIndex(*dir);
+
+            if (fineBlock->getNeighborhoodSectionSize(neighborIdx) == uint_t(0)) continue;
+            if (!(fineBlock->neighborhoodSectionHasLargerBlock(neighborIdx))) continue;
+            WALBERLA_ASSERT_EQUAL(fineBlock->getNeighborhoodSectionSize(neighborIdx), uint_t(1))
+
+            const BlockID& coarseReceiverId = fineBlock->getNeighborId(neighborIdx, uint_t(0));
+            if (!selectable::isSetSelected(fineBlock->getNeighborState(neighborIdx, uint_t(0)), requiredBlockSelectors_,
+                                           incompatibleBlockSelectors_))
+               continue;
+            if( fineBlock->neighborExistsLocally( neighborIdx, uint_t(0) ) )
+            {
+               auto coarseReceiverBlock = dynamic_cast< Block * >( forest->getBlock( coarseReceiverId ) );
+               //               for (auto& pi : packInfos_)
+               //               {
+               //                  pi->communicateLocalFineToCoarse(fineBlock, coarseReceiverBlock, *dir);
+               //               }
+
+               GpuBuffer_T& gpuDataBuffer = localBuffer_[FINE_TO_COARSE][index];
+               WALBERLA_ASSERT_NOT_NULLPTR(gpuDataBuffer.cur())
+               for (auto& pi : packInfos_)
+               {
+                  WALBERLA_ASSERT_GREATER_EQUAL(gpuDataBuffer.allocSize() - gpuDataBuffer.size(), pi->sizeFineToCoarseSend(fineBlock, *dir))
+                  pi->communicateLocalFineToCoarse(fineBlock, coarseReceiverBlock, *dir, gpuDataBuffer, nullptr);
+               }
+            }
+            else
+            {
+               auto nProcess              = mpi::MPIRank(fineBlock->getNeighborProcess(neighborIdx, uint_t(0)));
+               GpuBuffer_T& gpuDataBuffer = bufferSystemGPU_[FINE_TO_COARSE][index].sendBuffer(nProcess);
+
+               for (auto& pi : packInfos_)
+               {
+                  WALBERLA_ASSERT_NOT_NULLPTR(gpuDataBuffer.cur())
+                  WALBERLA_ASSERT_GREATER_EQUAL(gpuDataBuffer.allocSize() - gpuDataBuffer.size(), pi->sizeFineToCoarseSend(fineBlock, *dir))
+
+                  pi->packDataFineToCoarse(fineBlock, coarseReceiverId, *dir, gpuDataBuffer);
+
+                  if (!sendFromGPU_)
+                  {
+                     auto gpuDataPtr = gpuDataBuffer.cur();
+                     auto size = pi->sizeFineToCoarseSend(fineBlock, *dir);
+                     auto cpuDataPtr = bufferSystemCPU_[FINE_TO_COARSE][index].sendBuffer(nProcess).advanceNoResize(size);
+                     WALBERLA_ASSERT_NOT_NULLPTR(cpuDataPtr)
+                     WALBERLA_GPU_CHECK(gpuMemcpyAsync(cpuDataPtr, gpuDataPtr, size, gpuMemcpyDeviceToHost))
+                  }
+               }
+            }
+         }
+         localBuffer_[FINE_TO_COARSE][index].clear();
+      }
+   }
+
+   // wait for packing to finish
+   WALBERLA_GPU_CHECK( gpuDeviceSynchronize() )
+
+   if (sendFromGPU_)
+      bufferSystemGPU_[FINE_TO_COARSE][index].sendAll();
+   else
+      bufferSystemCPU_[FINE_TO_COARSE][index].sendAll();
+
+   communicationInProgress_[FINE_TO_COARSE][index] = true;
+}
+
+template< typename Stencil >
+void NonUniformGPUScheme< Stencil >::waitCommunicateEqualLevel(const uint_t level)
+{
+   if (!communicationInProgress_[EQUAL_LEVEL][level] || packInfos_.empty()) return;
+
+   auto forest = blockForest_.lock();
+   WALBERLA_CHECK_NOT_NULLPTR(forest,
+                              "Trying to access communication for a block storage object that doesn't exist anymore")
+   WALBERLA_ASSERT_LESS(level, forest->getNumberOfLevels())
+
+   if (sendFromGPU_)
+   {
+      // auto parallelSection = parallelSectionManager_.parallelSection( nullptr );
+      for (auto recvInfo = bufferSystemGPU_[EQUAL_LEVEL][level].begin();
+           recvInfo != bufferSystemGPU_[EQUAL_LEVEL][level].end(); ++recvInfo)
+      {
+         recvInfo.buffer().clear();
+         for (auto& header : headers_[EQUAL_LEVEL][level][recvInfo.rank()])
+         {
+            auto block = dynamic_cast< Block* >(forest->getBlock(header.receiverId));
+
+            for (auto& pi : packInfos_)
+            {
+               GpuBuffer_T& gpuDataBuffer = recvInfo.buffer();
+               WALBERLA_ASSERT_NOT_NULLPTR(gpuDataBuffer.cur())
+               // parallelSection.run([&](auto s) {
+               pi->unpackDataEqualLevel(block, stencil::inverseDir[header.dir], gpuDataBuffer);
+               // });
+            }
+         }
+      }
+   }
+   else
+   {
+      for (auto recvInfo = bufferSystemCPU_[EQUAL_LEVEL][level].begin();
+           recvInfo != bufferSystemCPU_[EQUAL_LEVEL][level].end(); ++recvInfo)
+      {
+         auto& gpuBuffer = bufferSystemGPU_[EQUAL_LEVEL][level].sendBuffer(recvInfo.rank());
+
+         recvInfo.buffer().clear();
+         gpuBuffer.clear();
+         for (auto& header : headers_[EQUAL_LEVEL][level][recvInfo.rank()])
+         {
+            auto block       = dynamic_cast< Block* >(forest->getBlock(header.receiverId));
+            auto senderBlock = dynamic_cast< Block* >(forest->getBlock(header.senderId));
+
+            for (auto& pi : packInfos_)
+            {
+               auto size       = pi->sizeEqualLevelSend(senderBlock, header.dir);
+               auto cpuDataPtr = recvInfo.buffer().advanceNoResize(size);
+               auto gpuDataPtr = gpuBuffer.cur(); // advanceNoResize( size );
+               WALBERLA_ASSERT_NOT_NULLPTR(cpuDataPtr)
+               WALBERLA_ASSERT_NOT_NULLPTR(gpuDataPtr)
+
+               WALBERLA_GPU_CHECK(gpuMemcpyAsync(gpuDataPtr, cpuDataPtr, size, gpuMemcpyHostToDevice, nullptr))
+               pi->unpackDataEqualLevel(block, stencil::inverseDir[header.dir], gpuBuffer);
+            }
+         }
+      }
+      WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+   }
+   communicationInProgress_[EQUAL_LEVEL][level] = false;
+}
+
+template< typename Stencil >
+void NonUniformGPUScheme< Stencil >::waitCommunicateCoarseToFine(const uint_t fineLevel)
+{
+   if (!communicationInProgress_[COARSE_TO_FINE][fineLevel] || packInfos_.empty()) return;
+
+   WALBERLA_ASSERT_GREATER(fineLevel, uint_t(0))
+
+   auto forest = blockForest_.lock();
+   WALBERLA_CHECK_NOT_NULLPTR(forest,
+                              "Trying to access communication for a block storage object that doesn't exist anymore")
+   WALBERLA_ASSERT_LESS(fineLevel, forest->getNumberOfLevels())
+
+   if (sendFromGPU_)
+   {
+      // auto parallelSection = parallelSectionManager_.parallelSection( nullptr );
+      for (auto recvInfo = bufferSystemGPU_[COARSE_TO_FINE][fineLevel].begin();
+           recvInfo != bufferSystemGPU_[COARSE_TO_FINE][fineLevel].end(); ++recvInfo)
+      {
+         recvInfo.buffer().clear();
+         for (auto& header : headers_[COARSE_TO_FINE][fineLevel][recvInfo.rank()])
+         {
+            auto block       = dynamic_cast< Block* >(forest->getBlock(header.receiverId));
+            auto senderBlock = dynamic_cast< Block* >(forest->getBlock(header.senderId));
+
+            for (auto& pi : packInfos_)
+            {
+               // auto size = pi->sizeCoarseToFineSend( senderBlock, block->getId(), header.dir );
+               GpuBuffer_T& gpuDataBuffer = recvInfo.buffer();
+               WALBERLA_ASSERT_NOT_NULLPTR(gpuDataBuffer.cur())
+               // parallelSection.run([&](auto s) {
+               pi->unpackDataCoarseToFine(block, senderBlock->getId(), stencil::inverseDir[header.dir], gpuDataBuffer);
+               // });
+            }
+         }
+      }
+   }
+   else
+   {
+      auto parallelSection = parallelSectionManager_.parallelSection(nullptr);
+      for (auto recvInfo = bufferSystemCPU_[COARSE_TO_FINE][fineLevel].begin();
+           recvInfo != bufferSystemCPU_[COARSE_TO_FINE][fineLevel].end(); ++recvInfo)
+      {
+         auto& gpuBuffer = bufferSystemGPU_[COARSE_TO_FINE][fineLevel].sendBuffer(recvInfo.rank());
+
+         recvInfo.buffer().clear();
+         gpuBuffer.clear();
+         for (auto& header : headers_[COARSE_TO_FINE][fineLevel][recvInfo.rank()])
+         {
+            auto block       = dynamic_cast< Block* >(forest->getBlock(header.receiverId));
+            auto senderBlock = dynamic_cast< Block* >(forest->getBlock(header.senderId));
+
+            for (auto& pi : packInfos_)
+            {
+               auto size       = pi->sizeCoarseToFineSend(senderBlock, block->getId(), header.dir);
+               auto cpuDataPtr = recvInfo.buffer().advanceNoResize(size);
+               auto gpuDataPtr = gpuBuffer.cur(); // advanceNoResize( size );
+               WALBERLA_ASSERT_NOT_NULLPTR(cpuDataPtr)
+               WALBERLA_ASSERT_NOT_NULLPTR(gpuDataPtr)
+
+               parallelSection.run([&](auto s) {
+                  WALBERLA_GPU_CHECK(gpuMemcpyAsync(gpuDataPtr, cpuDataPtr, size, gpuMemcpyHostToDevice, s))
+                  pi->unpackDataCoarseToFine(block, senderBlock->getId(), stencil::inverseDir[header.dir], gpuBuffer);
+               });
+            }
+         }
+      }
+   }
+   communicationInProgress_[COARSE_TO_FINE][fineLevel] = false;
+}
+
+template< typename Stencil >
+void NonUniformGPUScheme< Stencil >::waitCommunicateFineToCoarse(const uint_t fineLevel)
+{
+   if (!communicationInProgress_[FINE_TO_COARSE][fineLevel] || packInfos_.empty()) return;
+
+   WALBERLA_ASSERT_GREATER(fineLevel, uint_t(0))
+
+   auto forest = blockForest_.lock();
+   WALBERLA_CHECK_NOT_NULLPTR(forest,
+                              "Trying to access communication for a block storage object that doesn't exist anymore")
+   WALBERLA_ASSERT_LESS(fineLevel, forest->getNumberOfLevels())
+   // WALBERLA_ASSERT_EQUAL( forestModificationStamp_, forest->getBlockForest().getModificationStamp() );
+
+   if (sendFromGPU_)
+   {
+      // auto parallelSection = parallelSectionManager_.parallelSection( nullptr );
+      for (auto recvInfo = bufferSystemGPU_[FINE_TO_COARSE][fineLevel].begin();
+           recvInfo != bufferSystemGPU_[FINE_TO_COARSE][fineLevel].end(); ++recvInfo)
+      {
+         recvInfo.buffer().clear();
+         for (auto& header : headers_[FINE_TO_COARSE][fineLevel][recvInfo.rank()])
+         {
+            auto block       = dynamic_cast< Block* >(forest->getBlock(header.receiverId));
+            auto senderBlock = dynamic_cast< Block* >(forest->getBlock(header.senderId));
+
+            for (auto& pi : packInfos_)
+            {
+               GpuBuffer_T& gpuDataBuffer = recvInfo.buffer();
+               WALBERLA_ASSERT_NOT_NULLPTR(gpuDataBuffer.cur())
+               // parallelSection.run([&](auto s) {
+               pi->unpackDataFineToCoarse(block, senderBlock->getId(), stencil::inverseDir[header.dir], gpuDataBuffer);
+               // });
+            }
+         }
+      }
+   }
+   else
+   {
+      auto parallelSection = parallelSectionManager_.parallelSection(nullptr);
+      for (auto recvInfo = bufferSystemCPU_[FINE_TO_COARSE][fineLevel].begin();
+           recvInfo != bufferSystemCPU_[FINE_TO_COARSE][fineLevel].end(); ++recvInfo)
+      {
+         auto& gpuBuffer = bufferSystemGPU_[FINE_TO_COARSE][fineLevel].sendBuffer(recvInfo.rank());
+
+         recvInfo.buffer().clear();
+         gpuBuffer.clear();
+         for (auto& header : headers_[FINE_TO_COARSE][fineLevel][recvInfo.rank()])
+         {
+            auto block       = dynamic_cast< Block* >(forest->getBlock(header.receiverId));
+            auto senderBlock = dynamic_cast< Block* >(forest->getBlock(header.senderId));
+
+            for (auto& pi : packInfos_)
+            {
+               auto size       = pi->sizeFineToCoarseSend(senderBlock, header.dir);
+               auto cpuDataPtr = recvInfo.buffer().advanceNoResize(size);
+               auto gpuDataPtr = gpuBuffer.cur(); // advanceNoResize( size );
+               WALBERLA_ASSERT_NOT_NULLPTR(cpuDataPtr)
+               WALBERLA_ASSERT_NOT_NULLPTR(gpuDataPtr)
+
+               parallelSection.run([&](auto s) {
+                  WALBERLA_GPU_CHECK(gpuMemcpyAsync(gpuDataPtr, cpuDataPtr, size, gpuMemcpyHostToDevice, s))
+                  pi->unpackDataFineToCoarse(block, senderBlock->getId(), stencil::inverseDir[header.dir], gpuBuffer);
+               });
+            }
+         }
+      }
+   }
+   communicationInProgress_[FINE_TO_COARSE][fineLevel] = false;
+}
+
+template< typename Stencil >
+void NonUniformGPUScheme< Stencil >::setupCommunication()
+{
+   WALBERLA_ASSERT_GREATER(packInfos_.size(), uint_c(0),
+                           "You have not registered a packInfo yet, thus setupCommunication does not work yet.")
+   auto forest = blockForest_.lock();
+   WALBERLA_CHECK_NOT_NULLPTR(forest,
+                              "Trying to access communication for a block storage object that doesn't exist anymore")
+   const uint_t levels = forest->getNumberOfLevels();
+
+   std::vector< std::vector< std::map< mpi::MPIRank, mpi::MPISize > > >
+      receiverInfo; // how many bytes to send to each neighbor
+   std::vector< std::vector< mpi::BufferSystem > > headerExchangeBs;
+
+   receiverInfo.resize(3);
+   receiverInfo[EQUAL_LEVEL].resize(levels + uint_c(1));
+   receiverInfo[COARSE_TO_FINE].resize(levels + uint_c(1));
+   receiverInfo[FINE_TO_COARSE].resize(levels + uint_c(1));
+
+   std::vector< std::vector< mpi::MPISize > > localBufferSize;
+
+   headerExchangeBs.resize(3);
+   localBufferSize.resize(3);
+
+   for (uint_t j = 0; j <= levels; ++j)
+   {
+      headerExchangeBs[EQUAL_LEVEL].push_back(mpi::BufferSystem(mpi::MPIManager::instance()->comm(), 123));
+      headerExchangeBs[COARSE_TO_FINE].push_back(mpi::BufferSystem(mpi::MPIManager::instance()->comm(), 123));
+      headerExchangeBs[FINE_TO_COARSE].push_back(mpi::BufferSystem(mpi::MPIManager::instance()->comm(), 123));
+
+      localBufferSize[EQUAL_LEVEL].push_back(mpi::MPISize(0));
+      localBufferSize[COARSE_TO_FINE].push_back(mpi::MPISize(0));
+      localBufferSize[FINE_TO_COARSE].push_back(mpi::MPISize(0));
+   }
+
+   for (auto& iBlock : *forest)
+   {
+      auto block = dynamic_cast< Block* >(&iBlock);
+      if (!selectable::isSetSelected(block->getState(), requiredBlockSelectors_, incompatibleBlockSelectors_)) continue;
+
+      const BlockID& senderId = block->getId();
+      auto nLevel             = block->getLevel();
+
+      for (auto dir = Stencil::beginNoCenter(); dir != Stencil::end(); ++dir)
+      {
+         // skip if block has no neighbors in this direction
+         const auto neighborIdx = blockforest::getBlockNeighborhoodSectionIndex(*dir);
+         if (block->getNeighborhoodSectionSize(neighborIdx) == uint_t(0)) continue;
+
+         if (block->neighborhoodSectionHasEquallySizedBlock(neighborIdx))
+         {
+            WALBERLA_ASSERT_EQUAL(block->getNeighborhoodSectionSize(neighborIdx), uint_t(1))
+            if (!selectable::isSetSelected(block->getNeighborState(neighborIdx, uint_t(0)), requiredBlockSelectors_,
+                                           incompatibleBlockSelectors_))
+               continue;
+            if( block->neighborExistsLocally( neighborIdx, uint_t(0) ) )
+               continue;
+
+            const BlockID& receiverId = block->getNeighborId(neighborIdx, uint_t(0));
+            auto nProcess             = mpi::MPIRank(block->getNeighborProcess(neighborIdx, uint_t(0)));
+
+            for (auto& pi : packInfos_)
+            {
+               receiverInfo[EQUAL_LEVEL][nLevel][nProcess] += mpi::MPISize(pi->sizeEqualLevelSend(block, *dir));
+            }
+
+            auto& headerBuffer = headerExchangeBs[EQUAL_LEVEL][nLevel].sendBuffer(nProcess);
+            receiverId.toBuffer(headerBuffer);
+            senderId.toBuffer(headerBuffer);
+            headerBuffer << *dir;
+         }
+         else if (block->neighborhoodSectionHasSmallerBlocks(neighborIdx))
+         {
+            auto fineLevel = nLevel + uint_c(1); // For indexing always the fineLevel is taken to be consistent.
+            WALBERLA_ASSERT_LESS(fineLevel, levels)
+
+            for (uint_t n = 0; n != block->getNeighborhoodSectionSize(neighborIdx); ++n)
+            {
+               const BlockID& receiverId = block->getNeighborId(neighborIdx, n);
+               if (!selectable::isSetSelected(block->getNeighborState(neighborIdx, n), requiredBlockSelectors_,
+                                              incompatibleBlockSelectors_))
+                  continue;
+               if( block->neighborExistsLocally( neighborIdx, n ) )
+               {
+                  for (auto& pi : packInfos_)
+                     localBufferSize[COARSE_TO_FINE][fineLevel] += mpi::MPISize(pi->sizeCoarseToFineSend(block, receiverId, *dir));
+                  continue;
+               }
+
+               auto nProcess = mpi::MPIRank(block->getNeighborProcess(neighborIdx, n));
+               for (auto& pi : packInfos_)
+                  receiverInfo[COARSE_TO_FINE][fineLevel][nProcess] +=
+                     mpi::MPISize(pi->sizeCoarseToFineSend(block, receiverId, *dir));
+               auto& headerBuffer = headerExchangeBs[COARSE_TO_FINE][fineLevel].sendBuffer(nProcess);
+               receiverId.toBuffer(headerBuffer);
+               senderId.toBuffer(headerBuffer);
+               headerBuffer << *dir;
+            }
+         }
+         else if (block->neighborhoodSectionHasLargerBlock(neighborIdx))
+         {
+            WALBERLA_ASSERT_EQUAL(block->getNeighborhoodSectionSize(neighborIdx), uint_t(1))
+
+            const BlockID& receiverId = block->getNeighborId(neighborIdx, uint_t(0));
+            if (!selectable::isSetSelected(block->getNeighborState(neighborIdx, uint_t(0)), requiredBlockSelectors_,
+                                           incompatibleBlockSelectors_))
+               continue;
+
+            if( block->neighborExistsLocally( neighborIdx, uint_t(0) ) )
+            {
+               for (auto& pi : packInfos_)
+                  localBufferSize[FINE_TO_COARSE][nLevel] += mpi::MPISize(pi->sizeFineToCoarseSend(block, *dir));
+               continue;
+            }
+
+            auto nProcess = mpi::MPIRank(block->getNeighborProcess(neighborIdx, uint_t(0)));
+            for (auto& pi : packInfos_)
+               receiverInfo[FINE_TO_COARSE][nLevel][nProcess] += mpi::MPISize(pi->sizeFineToCoarseSend(block, *dir));
+
+            auto& headerBuffer = headerExchangeBs[FINE_TO_COARSE][nLevel].sendBuffer(nProcess);
+            receiverId.toBuffer(headerBuffer);
+            senderId.toBuffer(headerBuffer);
+            headerBuffer << *dir;
+         }
+      }
+   }
+
+   for (uint_t i = 0; i != 3; ++i)
+   {
+      for (uint_t j = 0; j <= levels; ++j)
+      {
+         headerExchangeBs[i][j].setReceiverInfoFromSendBufferState(false, true);
+         headerExchangeBs[i][j].sendAll();
+         for (auto recvIter = headerExchangeBs[i][j].begin(); recvIter != headerExchangeBs[i][j].end(); ++recvIter)
+         {
+            auto& headerVector = headers_[i][j][recvIter.rank()];
+            auto& buffer       = recvIter.buffer();
+            while (buffer.size())
+            {
+               Header header;
+               header.receiverId.fromBuffer(buffer);
+               header.senderId.fromBuffer(buffer);
+               buffer >> header.dir;
+               headerVector.push_back(header);
+            }
+         }
+
+         bufferSystemCPU_[i][j].setReceiverInfo(receiverInfo[i][j]);
+         bufferSystemGPU_[i][j].setReceiverInfo(receiverInfo[i][j]);
+
+         for (auto it : receiverInfo[i][j])
+         {
+            bufferSystemCPU_[i][j].sendBuffer(it.first).resize(size_t(it.second));
+            bufferSystemGPU_[i][j].sendBuffer(it.first).resize(size_t(it.second));
+         }
+         if (localBufferSize[i][j] > 0)
+            localBuffer_[i][j].resize(size_t(localBufferSize[i][j]));
+      }
+   }
+
+   forestModificationStamp_      = forest->getBlockForest().getModificationStamp();
+}
+
+template< typename Stencil >
+bool NonUniformGPUScheme< Stencil >::isAnyCommunicationInProgress() const
+{
+   for (auto caseIt = communicationInProgress_.begin(); caseIt != communicationInProgress_.end(); ++caseIt)
+      for (auto levelIt = caseIt->begin(); levelIt != caseIt->end(); ++levelIt)
+         if (*levelIt) return true;
+
+   return false;
+}
+
+template< typename Stencil >
+NonUniformGPUScheme< Stencil >::~NonUniformGPUScheme()
+{
+   for (uint_t i = 0; i != bufferSystemGPU_[EQUAL_LEVEL].size(); ++i)
+   {
+      waitCommunicateEqualLevel(i);
+      waitCommunicateCoarseToFine(i);
+      waitCommunicateFineToCoarse(i);
+   }
+}
+
+template< typename Stencil >
+void NonUniformGPUScheme< Stencil >::addPackInfo(const shared_ptr< GeneratedNonUniformGPUPackInfo >& pi)
+{
+   if (isAnyCommunicationInProgress())
+   {
+      WALBERLA_ABORT("You may not add a PackInfo to a NonUniformBufferedScheme if any communication is in progress!")
+   }
+   packInfos_.push_back(pi);
+   setupCommunication();
+}
+
+} // namespace walberla::gpu::communication
diff --git a/src/gpu/communication/UniformGPUScheme.h b/src/gpu/communication/UniformGPUScheme.h
index e53e6772b4ccd5a3e04da5a632cbafd8ceb206d3..5c9604ccd8cc00e5cdb2d9f9c1085ace2f2e44a5 100644
--- a/src/gpu/communication/UniformGPUScheme.h
+++ b/src/gpu/communication/UniformGPUScheme.h
@@ -32,7 +32,7 @@
 
 #include <thread>
 
-#include "gpu/CudaRAII.h"
+#include "gpu/GPURAII.h"
 #include "gpu/GPUWrapper.h"
 #include "gpu/ParallelStreams.h"
 #include "gpu/communication/CustomMemoryBuffer.h"
@@ -51,12 +51,14 @@ namespace communication {
    public:
        explicit UniformGPUScheme( weak_ptr<StructuredBlockForest> bf,
                                   bool sendDirectlyFromGPU = false,
+                                  bool useLocalCommunication = true,
                                   const int tag = 5432 );
 
        explicit UniformGPUScheme( weak_ptr<StructuredBlockForest> bf,
                                  const Set<SUID> & requiredBlockSelectors,
                                  const Set<SUID> & incompatibleBlockSelectors,
                                  bool sendDirectlyFromGPU = false,
+                                 bool useLocalCommunication = true,
                                  const int tag = 5432 );
 
        void addPackInfo( const shared_ptr<GeneratedGPUPackInfo> &pi );
@@ -71,7 +73,6 @@ namespace communication {
        std::function<void()> getStartCommunicateFunctor( gpuStream_t stream = nullptr );
        std::function<void()> getWaitFunctor( gpuStream_t stream = nullptr );
 
-
    private:
        void setupCommunication();
 
@@ -81,6 +82,7 @@ namespace communication {
        bool setupBeforeNextCommunication_;
        bool communicationInProgress_;
        bool sendFromGPU_;
+       bool useLocalCommunication_;
 
        using CpuBuffer_T = gpu::communication::PinnedMemoryBuffer;
        using GpuBuffer_T = gpu::communication::GPUMemoryBuffer;
diff --git a/src/gpu/communication/UniformGPUScheme.impl.h b/src/gpu/communication/UniformGPUScheme.impl.h
index c8e81cb23e14e15e7fd79b5ec7fb052137b600a9..a12017cf77eca51af31bd967df2af914ae7f28a1 100644
--- a/src/gpu/communication/UniformGPUScheme.impl.h
+++ b/src/gpu/communication/UniformGPUScheme.impl.h
@@ -30,11 +30,13 @@ namespace communication {
    template<typename Stencil>
    UniformGPUScheme<Stencil>::UniformGPUScheme( weak_ptr <StructuredBlockForest> bf,
                                                 bool sendDirectlyFromGPU,
+                                                bool useLocalCommunication,
                                                 const int tag )
         : blockForest_( bf ),
           setupBeforeNextCommunication_( true ),
           communicationInProgress_( false ),
           sendFromGPU_( sendDirectlyFromGPU ),
+          useLocalCommunication_(useLocalCommunication),
           bufferSystemCPU_( mpi::MPIManager::instance()->comm(), tag ),
           bufferSystemGPU_( mpi::MPIManager::instance()->comm(), tag ),
           parallelSectionManager_( -1 ),
@@ -47,11 +49,13 @@ namespace communication {
                                                 const Set<SUID> & requiredBlockSelectors,
                                                 const Set<SUID> & incompatibleBlockSelectors,
                                                 bool sendDirectlyFromGPU,
+                                                bool useLocalCommunication,
                                                 const int tag )
       : blockForest_( bf ),
         setupBeforeNextCommunication_( true ),
         communicationInProgress_( false ),
         sendFromGPU_( sendDirectlyFromGPU ),
+        useLocalCommunication_(useLocalCommunication),
         bufferSystemCPU_( mpi::MPIManager::instance()->comm(), tag ),
         bufferSystemGPU_( mpi::MPIManager::instance()->comm(), tag ),
         parallelSectionManager_( -1 ),
@@ -86,28 +90,40 @@ namespace communication {
          auto parallelSection = parallelSectionManager_.parallelSection( stream );
          for( auto &iBlock : *forest )
          {
-            auto block = dynamic_cast< Block * >( &iBlock );
+            auto senderBlock = dynamic_cast< Block * >( &iBlock );
 
-            if( !selectable::isSetSelected( block->getState(), requiredBlockSelectors_, incompatibleBlockSelectors_ ) )
+            if( !selectable::isSetSelected( senderBlock->getState(), requiredBlockSelectors_, incompatibleBlockSelectors_ ) )
                continue;
 
             for( auto dir = Stencil::beginNoCenter(); dir != Stencil::end(); ++dir )
             {
                const auto neighborIdx = blockforest::getBlockNeighborhoodSectionIndex( *dir );
-               if( block->getNeighborhoodSectionSize( neighborIdx ) == uint_t( 0 ))
+
+               if( senderBlock->getNeighborhoodSectionSize( neighborIdx ) == uint_t( 0 ))
                   continue;
-               auto nProcess = mpi::MPIRank( block->getNeighborProcess( neighborIdx, uint_t( 0 )));
 
-               if( !selectable::isSetSelected( block->getNeighborState( neighborIdx, uint_t(0) ), requiredBlockSelectors_, incompatibleBlockSelectors_ ) )
+               if( !selectable::isSetSelected( senderBlock->getNeighborState( neighborIdx, uint_t(0) ), requiredBlockSelectors_, incompatibleBlockSelectors_ ) )
                   continue;
 
-               for( auto &pi : packInfos_ )
+               if( senderBlock->neighborExistsLocally( neighborIdx, uint_t(0) ) && useLocalCommunication_ )
                {
-                  parallelSection.run([&](auto s) {
-                     auto size = pi->size( *dir, block );
+                  auto receiverBlock = dynamic_cast< Block * >( forest->getBlock( senderBlock->getNeighborId( neighborIdx, uint_t(0) )) );
+                  for (auto& pi : packInfos_)
+                  {
+                     pi->communicateLocal(*dir, senderBlock, receiverBlock, stream);
+                  }
+               }
+               else
+               {
+                  auto nProcess = mpi::MPIRank( senderBlock->getNeighborProcess( neighborIdx, uint_t( 0 )));
+
+                  for( auto &pi : packInfos_ )
+                  {
+                     parallelSection.run([&](auto s) {
+                     auto size = pi->size( *dir, senderBlock );
                      auto gpuDataPtr = bufferSystemGPU_.sendBuffer( nProcess ).advanceNoResize( size );
                      WALBERLA_ASSERT_NOT_NULLPTR( gpuDataPtr )
-                     pi->pack( *dir, gpuDataPtr, block, s );
+                     pi->pack( *dir, gpuDataPtr, senderBlock, s );
 
                      if( !sendFromGPU_ )
                      {
@@ -115,12 +131,12 @@ namespace communication {
                         WALBERLA_ASSERT_NOT_NULLPTR( cpuDataPtr )
                         WALBERLA_GPU_CHECK( gpuMemcpyAsync( cpuDataPtr, gpuDataPtr, size, gpuMemcpyDeviceToHost, s ))
                      }
-                  });
+                     });
+                  }
                }
             }
          }
       }
-
       // wait for packing to finish
       WALBERLA_GPU_CHECK( gpuStreamSynchronize( stream ) );
 
@@ -181,7 +197,6 @@ namespace communication {
                   auto gpuDataPtr = gpuBuffer.advanceNoResize( size );
                   WALBERLA_ASSERT_NOT_NULLPTR( cpuDataPtr )
                   WALBERLA_ASSERT_NOT_NULLPTR( gpuDataPtr )
-
                   parallelSection.run([&](auto s) {
                      WALBERLA_GPU_CHECK( gpuMemcpyAsync( gpuDataPtr, cpuDataPtr, size,
                                                            gpuMemcpyHostToDevice, s ))
@@ -192,6 +207,7 @@ namespace communication {
          }
       }
 
+      WALBERLA_GPU_CHECK( gpuDeviceSynchronize() )
       communicationInProgress_ = false;
    }
 
@@ -216,6 +232,7 @@ namespace communication {
          for( auto dir = Stencil::beginNoCenter(); dir != Stencil::end(); ++dir ) {
             // skip if block has no neighbors in this direction
             const auto neighborIdx = blockforest::getBlockNeighborhoodSectionIndex( *dir );
+
             if( block->getNeighborhoodSectionSize( neighborIdx ) == uint_t( 0 ))
                continue;
 
@@ -229,6 +246,9 @@ namespace communication {
             if( !selectable::isSetSelected( block->getNeighborState( neighborIdx, uint_t(0) ), requiredBlockSelectors_, incompatibleBlockSelectors_ ) )
                continue;
 
+            if( block->neighborExistsLocally( neighborIdx, uint_t(0) ) && useLocalCommunication_ )
+               continue;
+
             auto nProcess = mpi::MPIRank( block->getNeighborProcess( neighborIdx, uint_t( 0 )));
 
             for( auto &pi : packInfos_ )
@@ -287,7 +307,7 @@ namespace communication {
    }
 
    template< typename Stencil >
-   std::function<void()> UniformGPUScheme<Stencil>::getWaitFunctor(gpuStream_t stream)
+   std::function<void()> UniformGPUScheme<Stencil>::getWaitFunctor(cudaStream_t stream)
    {
       return [this, stream]() { wait( stream ); };
    }
diff --git a/src/lbm_generated/CMakeLists.txt b/src/lbm_generated/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2513a58f2e646025fa86107409058a7576a3f62f
--- /dev/null
+++ b/src/lbm_generated/CMakeLists.txt
@@ -0,0 +1,25 @@
+add_library( lbm_generated)
+
+target_link_libraries( lbm_generated
+        PUBLIC
+        blockforest
+        boundary
+        communication
+        core
+        domain_decomposition
+        field
+        geometry
+        gui
+        stencil
+        timeloop
+        vtk
+        )
+
+add_subdirectory( boundary )
+add_subdirectory( communication )
+add_subdirectory( gpu )
+add_subdirectory( evaluation )
+add_subdirectory( field )
+add_subdirectory( refinement )
+add_subdirectory( storage_specification )
+add_subdirectory( sweep_collection )
\ No newline at end of file
diff --git a/src/lbm_generated/boundary/CMakeLists.txt b/src/lbm_generated/boundary/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..201337a88fa1547002e0266b8837f369cf893b59
--- /dev/null
+++ b/src/lbm_generated/boundary/CMakeLists.txt
@@ -0,0 +1,25 @@
+target_sources( lbm_generated
+        PRIVATE
+        D3Q19BoundaryCollection.h
+        D3Q27BoundaryCollection.h
+        FreeSlipD3Q19.h
+        FreeSlipD3Q19.cpp
+        FreeSlipD3Q27.h
+        FreeSlipD3Q27.cpp
+        FixedDensityD3Q19.h
+        FixedDensityD3Q19.cpp
+        FixedDensityD3Q27.h
+        FixedDensityD3Q27.cpp
+        NoSlipD3Q19.h
+        NoSlipD3Q19.cpp
+        NoSlipD3Q27.h
+        NoSlipD3Q27.cpp
+        OutflowD3Q19.h
+        OutflowD3Q19.cpp
+        OutflowD3Q27.h
+        OutflowD3Q27.cpp
+        UBBD3Q19.h
+        UBBD3Q19.cpp
+        UBBD3Q27.h
+        UBBD3Q27.cpp
+    )
diff --git a/src/lbm_generated/boundary/D3Q19BoundaryCollection.h b/src/lbm_generated/boundary/D3Q19BoundaryCollection.h
new file mode 100644
index 0000000000000000000000000000000000000000..eb1a23fb52be36ec0471bf05989512724acdc477
--- /dev/null
+++ b/src/lbm_generated/boundary/D3Q19BoundaryCollection.h
@@ -0,0 +1,123 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file D3Q19BoundaryCollection.h
+//! \\author lbmpy
+//======================================================================================================================
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "domain_decomposition/IBlock.h"
+
+#include "OutflowD3Q19.h"
+#include "FixedDensityD3Q19.h"
+#include "FreeSlipD3Q19.h"
+#include "NoSlipD3Q19.h"
+#include "UBBD3Q19.h"
+
+
+
+namespace walberla{
+namespace lbm {
+
+template <typename FlagField_T>
+class D3Q19BoundaryCollection
+{
+ public:
+   enum Type { ALL = 0, INNER = 1, OUTER = 2 };
+
+
+   D3Q19BoundaryCollection(const shared_ptr<StructuredBlockForest> & blocks, BlockDataID flagID_, BlockDataID pdfsID_, FlagUID domainUID_, double density, double u_x, double u_y, double u_z)
+      : blocks_(blocks), flagID(flagID_), pdfsID(pdfsID_), domainUID(domainUID_)
+   {
+      OutflowD3Q19Object = std::make_shared< lbm::OutflowD3Q19 >(blocks, pdfsID);
+      FixedDensityD3Q19Object = std::make_shared< lbm::FixedDensityD3Q19 >(blocks, pdfsID, density);
+      FreeSlipD3Q19Object = std::make_shared< lbm::FreeSlipD3Q19 >(blocks, pdfsID);
+      NoSlipD3Q19Object = std::make_shared< lbm::NoSlipD3Q19 >(blocks, pdfsID);
+      UBBD3Q19Object = std::make_shared< lbm::UBBD3Q19 >(blocks, pdfsID, u_x, u_y, u_z);
+      
+
+      OutflowD3Q19Object->fillFromFlagField<FlagField_T>(blocks, flagID, walberla::FlagUID("Outflow"), domainUID);
+      FixedDensityD3Q19Object->fillFromFlagField<FlagField_T>(blocks, flagID, walberla::FlagUID("FixedDensity"), domainUID);
+      FreeSlipD3Q19Object->fillFromFlagField<FlagField_T>(blocks, flagID, walberla::FlagUID("FreeSlip"), domainUID);
+      NoSlipD3Q19Object->fillFromFlagField<FlagField_T>(blocks, flagID, walberla::FlagUID("NoSlip"), domainUID);
+      UBBD3Q19Object->fillFromFlagField<FlagField_T>(blocks, flagID, walberla::FlagUID("UBB"), domainUID);
+      
+   }
+
+   void run (IBlock * block)
+   {
+      OutflowD3Q19Object->run(block);
+      FixedDensityD3Q19Object->run(block);
+      FreeSlipD3Q19Object->run(block);
+      NoSlipD3Q19Object->run(block);
+      UBBD3Q19Object->run(block);
+      
+   }
+
+   void inner (IBlock * block)
+   {
+      OutflowD3Q19Object->inner(block);
+      FixedDensityD3Q19Object->inner(block);
+      FreeSlipD3Q19Object->inner(block);
+      NoSlipD3Q19Object->inner(block);
+      UBBD3Q19Object->inner(block);
+      
+   }
+
+   void outer (IBlock * block)
+   {
+      OutflowD3Q19Object->outer(block);
+      FixedDensityD3Q19Object->outer(block);
+      FreeSlipD3Q19Object->outer(block);
+      NoSlipD3Q19Object->outer(block);
+      UBBD3Q19Object->outer(block);
+      
+   }
+
+   void operator() (IBlock * block)
+   {
+      run(block);
+   }
+
+   std::function<void (IBlock *)> getSweep(Type type = Type::ALL)
+   {
+      switch (type)
+      {
+      case Type::INNER:
+         return [this](IBlock* block) { this->inner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { this->outer(block); };
+      default:
+         return [this](IBlock* block) { this->run(block); };
+      }
+   }
+
+   weak_ptr< StructuredBlockStorage > blocks_;
+   BlockDataID flagID;
+   BlockDataID pdfsID;
+   walberla::FlagUID domainUID;
+
+   shared_ptr<lbm::OutflowD3Q19> OutflowD3Q19Object;
+   shared_ptr<lbm::FixedDensityD3Q19> FixedDensityD3Q19Object;
+   shared_ptr<lbm::FreeSlipD3Q19> FreeSlipD3Q19Object;
+   shared_ptr<lbm::NoSlipD3Q19> NoSlipD3Q19Object;
+   shared_ptr<lbm::UBBD3Q19> UBBD3Q19Object;
+   
+};
+
+}
+}
diff --git a/src/lbm_generated/boundary/D3Q27BoundaryCollection.h b/src/lbm_generated/boundary/D3Q27BoundaryCollection.h
new file mode 100644
index 0000000000000000000000000000000000000000..3428689bda22764cf3552e641d4c1f2656bab37a
--- /dev/null
+++ b/src/lbm_generated/boundary/D3Q27BoundaryCollection.h
@@ -0,0 +1,123 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file D3Q27BoundaryCollection.h
+//! \\author lbmpy
+//======================================================================================================================
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "domain_decomposition/IBlock.h"
+
+#include "OutflowD3Q27.h"
+#include "FixedDensityD3Q27.h"
+#include "FreeSlipD3Q27.h"
+#include "NoSlipD3Q27.h"
+#include "UBBD3Q27.h"
+
+
+
+namespace walberla{
+namespace lbm {
+
+template <typename FlagField_T>
+class D3Q27BoundaryCollection
+{
+ public:
+   enum Type { ALL = 0, INNER = 1, OUTER = 2 };
+
+
+   D3Q27BoundaryCollection(const shared_ptr<StructuredBlockForest> & blocks, BlockDataID flagID_, BlockDataID pdfsID_, FlagUID domainUID_, double density, double u_x, double u_y, double u_z)
+      : blocks_(blocks), flagID(flagID_), pdfsID(pdfsID_), domainUID(domainUID_)
+   {
+      OutflowD3Q27Object = std::make_shared< lbm::OutflowD3Q27 >(blocks, pdfsID);
+      FixedDensityD3Q27Object = std::make_shared< lbm::FixedDensityD3Q27 >(blocks, pdfsID, density);
+      FreeSlipD3Q27Object = std::make_shared< lbm::FreeSlipD3Q27 >(blocks, pdfsID);
+      NoSlipD3Q27Object = std::make_shared< lbm::NoSlipD3Q27 >(blocks, pdfsID);
+      UBBD3Q27Object = std::make_shared< lbm::UBBD3Q27 >(blocks, pdfsID, u_x, u_y, u_z);
+      
+
+      OutflowD3Q27Object->fillFromFlagField<FlagField_T>(blocks, flagID, walberla::FlagUID("Outflow"), domainUID);
+      FixedDensityD3Q27Object->fillFromFlagField<FlagField_T>(blocks, flagID, walberla::FlagUID("FixedDensity"), domainUID);
+      FreeSlipD3Q27Object->fillFromFlagField<FlagField_T>(blocks, flagID, walberla::FlagUID("FreeSlip"), domainUID);
+      NoSlipD3Q27Object->fillFromFlagField<FlagField_T>(blocks, flagID, walberla::FlagUID("NoSlip"), domainUID);
+      UBBD3Q27Object->fillFromFlagField<FlagField_T>(blocks, flagID, walberla::FlagUID("UBB"), domainUID);
+      
+   }
+
+   void run (IBlock * block)
+   {
+      OutflowD3Q27Object->run(block);
+      FixedDensityD3Q27Object->run(block);
+      FreeSlipD3Q27Object->run(block);
+      NoSlipD3Q27Object->run(block);
+      UBBD3Q27Object->run(block);
+      
+   }
+
+   void inner (IBlock * block)
+   {
+      OutflowD3Q27Object->inner(block);
+      FixedDensityD3Q27Object->inner(block);
+      FreeSlipD3Q27Object->inner(block);
+      NoSlipD3Q27Object->inner(block);
+      UBBD3Q27Object->inner(block);
+      
+   }
+
+   void outer (IBlock * block)
+   {
+      OutflowD3Q27Object->outer(block);
+      FixedDensityD3Q27Object->outer(block);
+      FreeSlipD3Q27Object->outer(block);
+      NoSlipD3Q27Object->outer(block);
+      UBBD3Q27Object->outer(block);
+      
+   }
+
+   void operator() (IBlock * block)
+   {
+      run(block);
+   }
+
+   std::function<void (IBlock *)> getSweep(Type type = Type::ALL)
+   {
+      switch (type)
+      {
+      case Type::INNER:
+         return [this](IBlock* block) { this->inner(block); };
+      case Type::OUTER:
+         return [this](IBlock* block) { this->outer(block); };
+      default:
+         return [this](IBlock* block) { this->run(block); };
+      }
+   }
+
+   weak_ptr< StructuredBlockStorage > blocks_;
+   BlockDataID flagID;
+   BlockDataID pdfsID;
+   walberla::FlagUID domainUID;
+
+   shared_ptr<lbm::OutflowD3Q27> OutflowD3Q27Object;
+   shared_ptr<lbm::FixedDensityD3Q27> FixedDensityD3Q27Object;
+   shared_ptr<lbm::FreeSlipD3Q27> FreeSlipD3Q27Object;
+   shared_ptr<lbm::NoSlipD3Q27> NoSlipD3Q27Object;
+   shared_ptr<lbm::UBBD3Q27> UBBD3Q27Object;
+   
+};
+
+}
+}
diff --git a/src/lbm_generated/boundary/FixedDensityD3Q19.cpp b/src/lbm_generated/boundary/FixedDensityD3Q19.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e449704f5a0bfa4932344fef2a8cab378770592f
--- /dev/null
+++ b/src/lbm_generated/boundary/FixedDensityD3Q19.cpp
@@ -0,0 +1,141 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FixedDensityD3Q19.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+#include "FixedDensityD3Q19.h"
+
+
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_fixeddensityd3q19_even {
+static FUNC_PREFIX void fixeddensityd3q19_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double density, int32_t indexVectorSize)
+{
+   
+   const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
+   const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 }; 
+   const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 }; 
+   const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   const double rho = density;
+   const double delta_rho = rho - 1.0;
+   for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
+   {
+      const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
+      const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
+      const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
+      const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
+      const double vel0Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3];
+      const double vel1Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3];
+      const double vel2Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 5*_stride_pdfs_3];
+      const double u_0 = vel0Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 3*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3];
+      const double u_1 = vel1Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 2*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3];
+      const double u_2 = vel2Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3];
+      const double u0Mu1 = u_0 + u_1*-1.0;
+      const double u0Pu1 = u_0 + u_1;
+      const double u1Pu2 = u_1 + u_2;
+      const double u1Mu2 = u_1 + u_2*-1.0;
+      const double u0Mu2 = u_0 + u_2*-1.0;
+      const double u0Pu2 = u_0 + u_2;
+      const double f_eq_common = delta_rho - 1.0*(u_0*u_0) - 1.0*(u_1*u_1) - 1.0*(u_2*u_2);
+      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = -1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir] + 2.0*((((dir) == (0))) ? (f_eq_common*0.33333333333333331): ((((dir) == (1)) || ((dir) == (2))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_1*u_1)): ((((dir) == (3)) || ((dir) == (4))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_0*u_0)): ((((dir) == (5)) || ((dir) == (6))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_2*u_2)): ((((dir) == (7))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)): ((((dir) == (8)) || ((dir) == (9))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)): ((((dir) == (10))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)): ((((dir) == (11))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)): ((((dir) == (12))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)): ((((dir) == (13))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)): ((((dir) == (14))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)): ((((dir) == (15))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)): ((((dir) == (16))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)): ((((dir) == (17))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)): ((((dir) == (18))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)): (0.0))))))))))))))));
+   }
+}
+}
+
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+
+void FixedDensityD3Q19::run_impl(IBlock * block, IndexVectors::Type type)
+{
+   auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
+   int32_t indexVectorSize = int32_c( indexVectors->indexVector(type).size() );
+   if( indexVectorSize == 0)
+      return;
+
+   
+   auto pointer = indexVectors->pointerCpu(type);
+   
+
+   uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
+
+   auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+
+   uint8_t timestep = pdfs->getTimestep();
+   auto & density = density_;
+   WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
+    double * RESTRICT  _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    if(((timestep & 1) ^ 1)) {
+        internal_fixeddensityd3q19_even::fixeddensityd3q19_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, density, indexVectorSize);
+    } else {
+        internal_fixeddensityd3q19_even::fixeddensityd3q19_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, density, indexVectorSize);
+    }
+}
+
+void FixedDensityD3Q19::run(IBlock * block)
+{
+   run_impl(block, IndexVectors::ALL);
+}
+
+void FixedDensityD3Q19::inner(IBlock * block)
+{
+   run_impl(block, IndexVectors::INNER);
+}
+
+void FixedDensityD3Q19::outer(IBlock * block)
+{
+   run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace lbm
+} // namespace walberla
+
diff --git a/src/lbm_generated/boundary/FixedDensityD3Q19.h b/src/lbm_generated/boundary/FixedDensityD3Q19.h
new file mode 100644
index 0000000000000000000000000000000000000000..b4575d189724633c503fc0ba94a004c5b07ef9c2
--- /dev/null
+++ b/src/lbm_generated/boundary/FixedDensityD3Q19.h
@@ -0,0 +1,509 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FixedDensityD3Q19.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "field/GhostLayerField.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "blockforest/StructuredBlockForest.h"
+#include "field/FlagField.h"
+#include "core/debug/Debug.h"
+
+#include <set>
+#include <vector>
+
+
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace lbm {
+
+
+class FixedDensityD3Q19
+{
+public:
+    struct IndexInfo { 
+        int32_t x;
+        int32_t y;
+        int32_t z;
+        int32_t dir;
+        IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_) : x(x_), y(y_), z(z_), dir(dir_) {}
+        bool operator==(const IndexInfo & o) const {
+            return x == o.x && y == o.y && z == o.z && dir == o.dir;
+        }
+    };
+
+
+
+    class IndexVectors
+    {
+    public:
+        using CpuIndexVector = std::vector<IndexInfo>;
+
+        enum Type {
+            ALL = 0,
+            INNER = 1,
+            OUTER = 2,
+            NUM_TYPES = 3
+        };
+
+        IndexVectors() = default;
+        bool operator==(IndexVectors const &other) const { return other.cpuVectors_ == cpuVectors_; }
+
+        CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
+        IndexInfo * pointerCpu(Type t)  { return cpuVectors_[t].data(); }
+
+        void syncGPU()
+        {
+            
+        }
+
+    private:
+        std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+
+        
+    };
+
+    FixedDensityD3Q19( const shared_ptr<StructuredBlockForest> & blocks,
+                   BlockDataID pdfsID_, double density)
+        : pdfsID(pdfsID_), density_(density)
+    {
+        auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
+        indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_FixedDensityD3Q19");
+    };
+
+    void run (IBlock * block);
+
+    void operator() (IBlock * block)
+    {
+        run(block);
+    }
+
+    void inner (IBlock * block);
+
+    void outer (IBlock * block);
+
+    std::function<void (IBlock *)> getSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->run(b); };
+    }
+
+    std::function<void (IBlock *)> getInnerSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->inner(b); };
+    }
+
+    std::function<void (IBlock *)> getOuterSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->outer(b); };
+    }
+
+    template<typename FlagField_T>
+    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID)
+    {
+        for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+            fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
+    }
+
+
+    template<typename FlagField_T>
+    void fillFromFlagField(IBlock * block, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID )
+    {
+        auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
+        auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+        auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+        auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+        auto * flagField = block->getData< FlagField_T > ( flagFieldID );
+        
+
+        if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) ))
+            return;
+
+        auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+        auto domainFlag = flagField->getFlag(domainFlagUID);
+
+        auto inner = flagField->xyzSize();
+        inner.expand( cell_idx_t(-1) );
+
+        indexVectorAll.clear();
+        indexVectorInner.clear();
+        indexVectorOuter.clear();
+
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  0 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  1 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  2 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  3 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  5 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  6 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  7 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  9 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  11 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  12 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  13 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  15 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  16 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  17 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        
+        
+
+        indexVectors->syncGPU();
+    }
+
+private:
+    void run_impl(IBlock * block, IndexVectors::Type type);
+
+    BlockDataID indexVectorID;
+    
+public:
+    BlockDataID pdfsID;
+    double density_;
+};
+
+
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/src/lbm_generated/boundary/FixedDensityD3Q27.cpp b/src/lbm_generated/boundary/FixedDensityD3Q27.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3ff43bc5efa34a0ba88e8205440f46e5fa6db94b
--- /dev/null
+++ b/src/lbm_generated/boundary/FixedDensityD3Q27.cpp
@@ -0,0 +1,140 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FixedDensityD3Q27.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+#include "FixedDensityD3Q27.h"
+
+
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_fixeddensityd3q27_even {
+static FUNC_PREFIX void fixeddensityd3q27_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double density, int32_t indexVectorSize)
+{
+   
+   const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; 
+   const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1,1,-1,1,-1,1,-1,1,-1 }; 
+   const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0,1,1,-1,-1,1,1,-1,-1 }; 
+   const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   const double rho = density;
+   const double delta_rho = rho - 1.0;
+   for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
+   {
+      const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
+      const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
+      const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
+      const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
+      const double vel0Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3];
+      const double vel1Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3];
+      const double vel2Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 5*_stride_pdfs_3];
+      const double u_0 = vel0Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 3*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3];
+      const double u_1 = vel1Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 2*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3];
+      const double u_2 = vel2Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3];
+      const double u0Mu1 = u_0 + u_1*-1.0;
+      const double u0Pu1 = u_0 + u_1;
+      const double u1Pu2 = u_1 + u_2;
+      const double u1Mu2 = u_1 + u_2*-1.0;
+      const double u0Mu2 = u_0 + u_2*-1.0;
+      const double u0Pu2 = u_0 + u_2;
+      const double f_eq_common = delta_rho - 1.5*(u_0*u_0) - 1.5*(u_1*u_1) - 1.5*(u_2*u_2);
+      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = -1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir] + 2.0*((((dir) == (0))) ? (f_eq_common*0.29629629629629628): ((((dir) == (1)) || ((dir) == (2))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_1*u_1)): ((((dir) == (3)) || ((dir) == (4))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_0*u_0)): ((((dir) == (5)) || ((dir) == (6))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_2*u_2)): ((((dir) == (7))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu1*u0Mu1)): ((((dir) == (8)) || ((dir) == (9))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu1*u0Pu1)): ((((dir) == (10))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu1*u0Mu1)): ((((dir) == (11))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Pu2*u1Pu2)): ((((dir) == (12))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Mu2*u1Mu2)): ((((dir) == (13))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu2*u0Mu2)): ((((dir) == (14))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu2*u0Pu2)): ((((dir) == (15))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Mu2*u1Mu2)): ((((dir) == (16))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Pu2*u1Pu2)): ((((dir) == (17))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu2*u0Pu2)): ((((dir) == (18))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu2*u0Mu2)): ((((dir) == (19))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (20))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (21))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (22)) || ((dir) == (23))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (24))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (25))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (26))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): (0.0)))))))))))))))))))))));
+   }
+}
+}
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+
+void FixedDensityD3Q27::run_impl(IBlock * block, IndexVectors::Type type)
+{
+   auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
+   int32_t indexVectorSize = int32_c( indexVectors->indexVector(type).size() );
+   if( indexVectorSize == 0)
+      return;
+
+   
+   auto pointer = indexVectors->pointerCpu(type);
+   
+
+   uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
+
+   auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+
+   uint8_t timestep = pdfs->getTimestep();
+   auto & density = density_;
+   WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
+    double * RESTRICT  _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    if(((timestep & 1) ^ 1)) {
+        internal_fixeddensityd3q27_even::fixeddensityd3q27_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, density, indexVectorSize);
+    } else {
+        internal_fixeddensityd3q27_even::fixeddensityd3q27_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, density, indexVectorSize);
+    }
+}
+
+void FixedDensityD3Q27::run(IBlock * block)
+{
+   run_impl(block, IndexVectors::ALL);
+}
+
+void FixedDensityD3Q27::inner(IBlock * block)
+{
+   run_impl(block, IndexVectors::INNER);
+}
+
+void FixedDensityD3Q27::outer(IBlock * block)
+{
+   run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace lbm
+} // namespace walberla
+
diff --git a/src/lbm_generated/boundary/FixedDensityD3Q27.h b/src/lbm_generated/boundary/FixedDensityD3Q27.h
new file mode 100644
index 0000000000000000000000000000000000000000..359540d25af2be0c78b85ad591c27aaba8d48de8
--- /dev/null
+++ b/src/lbm_generated/boundary/FixedDensityD3Q27.h
@@ -0,0 +1,645 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FixedDensityD3Q27.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "field/GhostLayerField.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "blockforest/StructuredBlockForest.h"
+#include "field/FlagField.h"
+#include "core/debug/Debug.h"
+
+#include <set>
+#include <vector>
+
+
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace lbm {
+
+
+class FixedDensityD3Q27
+{
+public:
+    struct IndexInfo { 
+        int32_t x;
+        int32_t y;
+        int32_t z;
+        int32_t dir;
+        IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_) : x(x_), y(y_), z(z_), dir(dir_) {}
+        bool operator==(const IndexInfo & o) const {
+            return x == o.x && y == o.y && z == o.z && dir == o.dir;
+        }
+    };
+
+
+
+    class IndexVectors
+    {
+    public:
+        using CpuIndexVector = std::vector<IndexInfo>;
+
+        enum Type {
+            ALL = 0,
+            INNER = 1,
+            OUTER = 2,
+            NUM_TYPES = 3
+        };
+
+        IndexVectors() = default;
+        bool operator==(IndexVectors const &other) const { return other.cpuVectors_ == cpuVectors_; }
+
+        CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
+        IndexInfo * pointerCpu(Type t)  { return cpuVectors_[t].data(); }
+
+        void syncGPU()
+        {
+            
+        }
+
+    private:
+        std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+
+        
+    };
+
+    FixedDensityD3Q27( const shared_ptr<StructuredBlockForest> & blocks,
+                   BlockDataID pdfsID_, double density)
+        : pdfsID(pdfsID_), density_(density)
+    {
+        auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
+        indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_FixedDensityD3Q27");
+    };
+
+    void run (IBlock * block);
+
+    void operator() (IBlock * block)
+    {
+        run(block);
+    }
+
+    void inner (IBlock * block);
+
+    void outer (IBlock * block);
+
+    std::function<void (IBlock *)> getSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->run(b); };
+    }
+
+    std::function<void (IBlock *)> getInnerSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->inner(b); };
+    }
+
+    std::function<void (IBlock *)> getOuterSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->outer(b); };
+    }
+
+    template<typename FlagField_T>
+    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID)
+    {
+        for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+            fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
+    }
+
+
+    template<typename FlagField_T>
+    void fillFromFlagField(IBlock * block, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID )
+    {
+        auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
+        auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+        auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+        auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+        auto * flagField = block->getData< FlagField_T > ( flagFieldID );
+        
+
+        if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) ))
+            return;
+
+        auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+        auto domainFlag = flagField->getFlag(domainFlagUID);
+
+        auto inner = flagField->xyzSize();
+        inner.expand( cell_idx_t(-1) );
+
+        indexVectorAll.clear();
+        indexVectorInner.clear();
+        indexVectorOuter.clear();
+
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  0 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  1 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  2 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  3 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  5 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  6 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  7 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  9 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  11 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  12 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  13 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  15 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  16 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  17 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  19 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  20 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  21 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  22 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  23 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  24 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  25 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  26 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        
+        
+
+        indexVectors->syncGPU();
+    }
+
+private:
+    void run_impl(IBlock * block, IndexVectors::Type type);
+
+    BlockDataID indexVectorID;
+    
+public:
+    BlockDataID pdfsID;
+    double density_;
+};
+
+
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/src/lbm_generated/boundary/FreeSlipD3Q19.cpp b/src/lbm_generated/boundary/FreeSlipD3Q19.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2e3dc46580b5cbd0bdf533dd33742986ab13cd7f
--- /dev/null
+++ b/src/lbm_generated/boundary/FreeSlipD3Q19.cpp
@@ -0,0 +1,132 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FreeSlipD3Q19.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+#include "FreeSlipD3Q19.h"
+
+
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_freeslipd3q19_even {
+static FUNC_PREFIX void freeslipd3q19_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+{
+   
+   const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
+   const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 }; 
+   const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 }; 
+   const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   
+   
+   const int32_t neighbour_offset_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 }; 
+   const int32_t neighbour_offset_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 }; 
+   const int32_t neighbour_offset_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
+   {
+      const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0]));
+      const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4]));
+      const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8]));
+      const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12]));
+      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 16])) + neighbour_offset_x[dir]) + _stride_pdfs_1*y + _stride_pdfs_1*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 20])) + neighbour_offset_y[dir]) + _stride_pdfs_2*z + _stride_pdfs_2*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 24])) + neighbour_offset_z[dir]) + _stride_pdfs_3**((int32_t * )(& _data_indexVector[32*ctr_0 + 28]))];
+   }
+}
+}
+
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+
+void FreeSlipD3Q19::run_impl(IBlock * block, IndexVectors::Type type)
+{
+   auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
+   int32_t indexVectorSize = int32_c( indexVectors->indexVector(type).size() );
+   if( indexVectorSize == 0)
+      return;
+
+   
+   auto pointer = indexVectors->pointerCpu(type);
+   
+
+   uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
+
+   auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+
+   uint8_t timestep = pdfs->getTimestep();
+   
+   WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
+    double * RESTRICT  _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    if(((timestep & 1) ^ 1)) {
+        internal_freeslipd3q19_even::freeslipd3q19_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    } else {
+        internal_freeslipd3q19_even::freeslipd3q19_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    }
+}
+
+void FreeSlipD3Q19::run(IBlock * block)
+{
+   run_impl(block, IndexVectors::ALL);
+}
+
+void FreeSlipD3Q19::inner(IBlock * block)
+{
+   run_impl(block, IndexVectors::INNER);
+}
+
+void FreeSlipD3Q19::outer(IBlock * block)
+{
+   run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace lbm
+} // namespace walberla
+
diff --git a/src/lbm_generated/boundary/FreeSlipD3Q19.h b/src/lbm_generated/boundary/FreeSlipD3Q19.h
new file mode 100644
index 0000000000000000000000000000000000000000..4679ffc4ff0cbf7cc5bfb07d1a9f9d9a7e775e2e
--- /dev/null
+++ b/src/lbm_generated/boundary/FreeSlipD3Q19.h
@@ -0,0 +1,1101 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FreeSlipD3Q19.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "field/GhostLayerField.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "blockforest/StructuredBlockForest.h"
+#include "field/FlagField.h"
+#include "core/debug/Debug.h"
+
+#include <set>
+#include <vector>
+
+
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace lbm {
+
+
+class FreeSlipD3Q19
+{
+public:
+    struct IndexInfo { 
+        int32_t x;
+        int32_t y;
+        int32_t z;
+        int32_t dir;
+        int32_t wnx;
+        int32_t wny;
+        int32_t wnz;
+        int32_t ref_dir;
+        IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_) : x(x_), y(y_), z(z_), dir(dir_), wnx(), wny(), wnz(), ref_dir() {}
+        bool operator==(const IndexInfo & o) const {
+            return x == o.x && y == o.y && z == o.z && dir == o.dir && wnx == o.wnx && wny == o.wny && wnz == o.wnz && ref_dir == o.ref_dir;
+        }
+    };
+
+
+
+    class IndexVectors
+    {
+    public:
+        using CpuIndexVector = std::vector<IndexInfo>;
+
+        enum Type {
+            ALL = 0,
+            INNER = 1,
+            OUTER = 2,
+            NUM_TYPES = 3
+        };
+
+        IndexVectors() = default;
+        bool operator==(IndexVectors const &other) const { return other.cpuVectors_ == cpuVectors_; }
+
+        CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
+        IndexInfo * pointerCpu(Type t)  { return cpuVectors_[t].data(); }
+
+        void syncGPU()
+        {
+            
+        }
+
+    private:
+        std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+
+        
+    };
+
+    FreeSlipD3Q19( const shared_ptr<StructuredBlockForest> & blocks,
+                   BlockDataID pdfsID_)
+        : pdfsID(pdfsID_)
+    {
+        auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
+        indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_FreeSlipD3Q19");
+    };
+
+    void run (IBlock * block);
+
+    void operator() (IBlock * block)
+    {
+        run(block);
+    }
+
+    void inner (IBlock * block);
+
+    void outer (IBlock * block);
+
+    std::function<void (IBlock *)> getSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->run(b); };
+    }
+
+    std::function<void (IBlock *)> getInnerSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->inner(b); };
+    }
+
+    std::function<void (IBlock *)> getOuterSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->outer(b); };
+    }
+
+    template<typename FlagField_T>
+    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID)
+    {
+        for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+            fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
+    }
+
+
+    template<typename FlagField_T>
+    void fillFromFlagField(IBlock * block, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID )
+    {
+        auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
+        auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+        auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+        auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+        auto * flagField = block->getData< FlagField_T > ( flagFieldID );
+        
+
+        if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) ))
+            return;
+
+        auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+        auto domainFlag = flagField->getFlag(domainFlagUID);
+
+        auto inner = flagField->xyzSize();
+        inner.expand( cell_idx_t(-1) );
+
+        indexVectorAll.clear();
+        indexVectorInner.clear();
+        indexVectorOuter.clear();
+
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  0 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, 0, 0);
+                int32_t ref_dir = 0; // dir: 0
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 0;
+                   element.wnz = 0;
+                   ref_dir = 0;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  1 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, 1, 0);
+                int32_t ref_dir = 2; // dir: 1
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = -1;
+                   element.wnz = 0;
+                   ref_dir = 1;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  2 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, -1, 0);
+                int32_t ref_dir = 1; // dir: 2
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 1;
+                   element.wnz = 0;
+                   ref_dir = 2;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  3 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(-1, 0, 0);
+                int32_t ref_dir = 4; // dir: 3
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = 0;
+                   element.wnz = 0;
+                   ref_dir = 3;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(1, 0, 0);
+                int32_t ref_dir = 3; // dir: 4
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = 0;
+                   element.wnz = 0;
+                   ref_dir = 4;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  5 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, 0, 1);
+                int32_t ref_dir = 6; // dir: 5
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 0;
+                   element.wnz = -1;
+                   ref_dir = 5;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  6 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, 0, -1);
+                int32_t ref_dir = 5; // dir: 6
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 0;
+                   element.wnz = 1;
+                   ref_dir = 6;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  7 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(-1, 1, 0);
+                int32_t ref_dir = 10; // dir: 7
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = -1;
+                   element.wnz = 0;
+                   ref_dir = 7;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(1, 1, 0);
+                int32_t ref_dir = 9; // dir: 8
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = -1;
+                   element.wnz = 0;
+                   ref_dir = 8;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  9 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(-1, -1, 0);
+                int32_t ref_dir = 8; // dir: 9
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = 1;
+                   element.wnz = 0;
+                   ref_dir = 9;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(1, -1, 0);
+                int32_t ref_dir = 7; // dir: 10
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = 1;
+                   element.wnz = 0;
+                   ref_dir = 10;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  11 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, 1, 1);
+                int32_t ref_dir = 16; // dir: 11
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = -1;
+                   element.wnz = -1;
+                   ref_dir = 11;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  12 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, -1, 1);
+                int32_t ref_dir = 15; // dir: 12
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 1;
+                   element.wnz = -1;
+                   ref_dir = 12;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  13 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(-1, 0, 1);
+                int32_t ref_dir = 18; // dir: 13
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = 0;
+                   element.wnz = -1;
+                   ref_dir = 13;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(1, 0, 1);
+                int32_t ref_dir = 17; // dir: 14
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = 0;
+                   element.wnz = -1;
+                   ref_dir = 14;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  15 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, 1, -1);
+                int32_t ref_dir = 12; // dir: 15
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = -1;
+                   element.wnz = 1;
+                   ref_dir = 15;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  16 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(0, -1, -1);
+                int32_t ref_dir = 11; // dir: 16
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 1;
+                   element.wnz = 1;
+                   ref_dir = 16;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  17 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(-1, 0, -1);
+                int32_t ref_dir = 14; // dir: 17
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = 0;
+                   element.wnz = 1;
+                   ref_dir = 17;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14 };
+                const Cell n = it.cell() + Cell(1, 0, -1);
+                int32_t ref_dir = 13; // dir: 18
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = 0;
+                   element.wnz = 1;
+                   ref_dir = 18;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        
+        
+
+        indexVectors->syncGPU();
+    }
+
+private:
+    void run_impl(IBlock * block, IndexVectors::Type type);
+
+    BlockDataID indexVectorID;
+    
+public:
+    BlockDataID pdfsID;
+};
+
+
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/src/lbm_generated/boundary/FreeSlipD3Q27.cpp b/src/lbm_generated/boundary/FreeSlipD3Q27.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3364610eec662874c88832e7ebedd144755ccf1a
--- /dev/null
+++ b/src/lbm_generated/boundary/FreeSlipD3Q27.cpp
@@ -0,0 +1,132 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FreeSlipD3Q27.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+#include "FreeSlipD3Q27.h"
+
+
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_freeslipd3q27_even {
+static FUNC_PREFIX void freeslipd3q27_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+{
+   
+   const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; 
+   const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1,1,-1,1,-1,1,-1,1,-1 }; 
+   const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0,1,1,-1,-1,1,1,-1,-1 }; 
+   const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   
+   
+   const int32_t neighbour_offset_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1,1,-1,1,-1,1,-1,1,-1 }; 
+   const int32_t neighbour_offset_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0,1,1,-1,-1,1,1,-1,-1 }; 
+   const int32_t neighbour_offset_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
+   {
+      const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0]));
+      const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4]));
+      const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8]));
+      const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12]));
+      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 16])) + neighbour_offset_x[dir]) + _stride_pdfs_1*y + _stride_pdfs_1*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 20])) + neighbour_offset_y[dir]) + _stride_pdfs_2*z + _stride_pdfs_2*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 24])) + neighbour_offset_z[dir]) + _stride_pdfs_3**((int32_t * )(& _data_indexVector[32*ctr_0 + 28]))];
+   }
+}
+}
+
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+
+void FreeSlipD3Q27::run_impl(IBlock * block, IndexVectors::Type type)
+{
+   auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
+   int32_t indexVectorSize = int32_c( indexVectors->indexVector(type).size() );
+   if( indexVectorSize == 0)
+      return;
+
+   
+   auto pointer = indexVectors->pointerCpu(type);
+   
+
+   uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
+
+   auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+
+   uint8_t timestep = pdfs->getTimestep();
+   
+   WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
+    double * RESTRICT  _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    if(((timestep & 1) ^ 1)) {
+        internal_freeslipd3q27_even::freeslipd3q27_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    } else {
+        internal_freeslipd3q27_even::freeslipd3q27_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    }
+}
+
+void FreeSlipD3Q27::run(IBlock * block)
+{
+   run_impl(block, IndexVectors::ALL);
+}
+
+void FreeSlipD3Q27::inner(IBlock * block)
+{
+   run_impl(block, IndexVectors::INNER);
+}
+
+void FreeSlipD3Q27::outer(IBlock * block)
+{
+   run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace lbm
+} // namespace walberla
+
diff --git a/src/lbm_generated/boundary/FreeSlipD3Q27.h b/src/lbm_generated/boundary/FreeSlipD3Q27.h
new file mode 100644
index 0000000000000000000000000000000000000000..562dfbcadd6e98f88ece133ab724080f3488b77e
--- /dev/null
+++ b/src/lbm_generated/boundary/FreeSlipD3Q27.h
@@ -0,0 +1,1485 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file FreeSlipD3Q27.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "field/GhostLayerField.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "blockforest/StructuredBlockForest.h"
+#include "field/FlagField.h"
+#include "core/debug/Debug.h"
+
+#include <set>
+#include <vector>
+
+
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace lbm {
+
+
+class FreeSlipD3Q27
+{
+public:
+    struct IndexInfo { 
+        int32_t x;
+        int32_t y;
+        int32_t z;
+        int32_t dir;
+        int32_t wnx;
+        int32_t wny;
+        int32_t wnz;
+        int32_t ref_dir;
+        IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_) : x(x_), y(y_), z(z_), dir(dir_), wnx(), wny(), wnz(), ref_dir() {}
+        bool operator==(const IndexInfo & o) const {
+            return x == o.x && y == o.y && z == o.z && dir == o.dir && wnx == o.wnx && wny == o.wny && wnz == o.wnz && ref_dir == o.ref_dir;
+        }
+    };
+
+
+
+    class IndexVectors
+    {
+    public:
+        using CpuIndexVector = std::vector<IndexInfo>;
+
+        enum Type {
+            ALL = 0,
+            INNER = 1,
+            OUTER = 2,
+            NUM_TYPES = 3
+        };
+
+        IndexVectors() = default;
+        bool operator==(IndexVectors const &other) const { return other.cpuVectors_ == cpuVectors_; }
+
+        CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
+        IndexInfo * pointerCpu(Type t)  { return cpuVectors_[t].data(); }
+
+        void syncGPU()
+        {
+            
+        }
+
+    private:
+        std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+
+        
+    };
+
+    FreeSlipD3Q27( const shared_ptr<StructuredBlockForest> & blocks,
+                   BlockDataID pdfsID_)
+        : pdfsID(pdfsID_)
+    {
+        auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
+        indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_FreeSlipD3Q27");
+    };
+
+    void run (IBlock * block);
+
+    void operator() (IBlock * block)
+    {
+        run(block);
+    }
+
+    void inner (IBlock * block);
+
+    void outer (IBlock * block);
+
+    std::function<void (IBlock *)> getSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->run(b); };
+    }
+
+    std::function<void (IBlock *)> getInnerSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->inner(b); };
+    }
+
+    std::function<void (IBlock *)> getOuterSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->outer(b); };
+    }
+
+    template<typename FlagField_T>
+    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID)
+    {
+        for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+            fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
+    }
+
+
+    template<typename FlagField_T>
+    void fillFromFlagField(IBlock * block, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID )
+    {
+        auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
+        auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+        auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+        auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+        auto * flagField = block->getData< FlagField_T > ( flagFieldID );
+        
+
+        if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) ))
+            return;
+
+        auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+        auto domainFlag = flagField->getFlag(domainFlagUID);
+
+        auto inner = flagField->xyzSize();
+        inner.expand( cell_idx_t(-1) );
+
+        indexVectorAll.clear();
+        indexVectorInner.clear();
+        indexVectorOuter.clear();
+
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  0 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(0, 0, 0);
+                int32_t ref_dir = 0; // dir: 0
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 0;
+                   element.wnz = 0;
+                   ref_dir = 0;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  1 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(0, 1, 0);
+                int32_t ref_dir = 2; // dir: 1
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = -1;
+                   element.wnz = 0;
+                   ref_dir = 1;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  2 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(0, -1, 0);
+                int32_t ref_dir = 1; // dir: 2
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 1;
+                   element.wnz = 0;
+                   ref_dir = 2;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  3 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(-1, 0, 0);
+                int32_t ref_dir = 4; // dir: 3
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = 0;
+                   element.wnz = 0;
+                   ref_dir = 3;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(1, 0, 0);
+                int32_t ref_dir = 3; // dir: 4
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = 0;
+                   element.wnz = 0;
+                   ref_dir = 4;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  5 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(0, 0, 1);
+                int32_t ref_dir = 6; // dir: 5
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 0;
+                   element.wnz = -1;
+                   ref_dir = 5;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  6 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(0, 0, -1);
+                int32_t ref_dir = 5; // dir: 6
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 0;
+                   element.wnz = 1;
+                   ref_dir = 6;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  7 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(-1, 1, 0);
+                int32_t ref_dir = 10; // dir: 7
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = -1;
+                   element.wnz = 0;
+                   ref_dir = 7;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(1, 1, 0);
+                int32_t ref_dir = 9; // dir: 8
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = -1;
+                   element.wnz = 0;
+                   ref_dir = 8;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  9 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(-1, -1, 0);
+                int32_t ref_dir = 8; // dir: 9
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = 1;
+                   element.wnz = 0;
+                   ref_dir = 9;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(1, -1, 0);
+                int32_t ref_dir = 7; // dir: 10
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 0, domainFlag ) )
+                {
+                   element.wnz = 0;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = 1;
+                   element.wnz = 0;
+                   ref_dir = 10;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  11 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(0, 1, 1);
+                int32_t ref_dir = 16; // dir: 11
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = -1;
+                   element.wnz = -1;
+                   ref_dir = 11;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  12 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(0, -1, 1);
+                int32_t ref_dir = 15; // dir: 12
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 1;
+                   element.wnz = -1;
+                   ref_dir = 12;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  13 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(-1, 0, 1);
+                int32_t ref_dir = 18; // dir: 13
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = 0;
+                   element.wnz = -1;
+                   ref_dir = 13;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(1, 0, 1);
+                int32_t ref_dir = 17; // dir: 14
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = 0;
+                   element.wnz = -1;
+                   ref_dir = 14;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  15 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(0, 1, -1);
+                int32_t ref_dir = 12; // dir: 15
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = -1;
+                   element.wnz = 1;
+                   ref_dir = 15;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  16 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(0, -1, -1);
+                int32_t ref_dir = 11; // dir: 16
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 0, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 0;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 0;
+                   element.wny = 1;
+                   element.wnz = 1;
+                   ref_dir = 16;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  17 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(-1, 0, -1);
+                int32_t ref_dir = 14; // dir: 17
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = 0;
+                   element.wnz = 1;
+                   ref_dir = 17;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(1, 0, -1);
+                int32_t ref_dir = 13; // dir: 18
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 0, n.z(), domainFlag ) )
+                {
+                   element.wny = 0;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = 0;
+                   element.wnz = 1;
+                   ref_dir = 18;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  19 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(1, 1, 1);
+                int32_t ref_dir = 26; // dir: 19
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = -1;
+                   element.wnz = -1;
+                   ref_dir = 19;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  20 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(-1, 1, 1);
+                int32_t ref_dir = 25; // dir: 20
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = -1;
+                   element.wnz = -1;
+                   ref_dir = 20;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  21 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(1, -1, 1);
+                int32_t ref_dir = 24; // dir: 21
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = 1;
+                   element.wnz = -1;
+                   ref_dir = 21;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  22 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(-1, -1, 1);
+                int32_t ref_dir = 23; // dir: 22
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + -1, domainFlag ) )
+                {
+                   element.wnz = -1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = 1;
+                   element.wnz = -1;
+                   ref_dir = 22;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  23 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(1, 1, -1);
+                int32_t ref_dir = 22; // dir: 23
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = -1;
+                   element.wnz = 1;
+                   ref_dir = 23;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  24 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(-1, 1, -1);
+                int32_t ref_dir = 21; // dir: 24
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + -1, n.z(), domainFlag ) )
+                {
+                   element.wny = -1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = -1;
+                   element.wnz = 1;
+                   ref_dir = 24;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  25 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(1, -1, -1);
+                int32_t ref_dir = 20; // dir: 25
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + -1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = -1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = -1;
+                   element.wny = 1;
+                   element.wnz = 1;
+                   ref_dir = 25;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  26 );
+              const int32_t x_axis_mirrored_stencil_dir [] = { 0,1,2,4,3,5,6,8,7,10,9,11,12,14,13,15,16,18,17,20,19,22,21,24,23,26,25 };
+                const int32_t y_axis_mirrored_stencil_dir [] = { 0,2,1,3,4,5,6,9,10,7,8,12,11,13,14,16,15,17,18,21,22,19,20,25,26,23,24 };
+                const int32_t z_axis_mirrored_stencil_dir [] = { 0,1,2,3,4,6,5,7,8,9,10,15,16,17,18,11,12,13,14,23,24,25,26,19,20,21,22 };
+                const Cell n = it.cell() + Cell(-1, -1, -1);
+                int32_t ref_dir = 19; // dir: 26
+                element.wnx = 0; // compute discrete normal vector of free slip wall
+                element.wny = 0;
+                if( flagField->isPartOfMaskSet( n.x() + 1, n.y(), n.z(), domainFlag ) )
+                {
+                   element.wnx = 1;
+                   ref_dir = x_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                if( flagField->isPartOfMaskSet( n.x(), n.y() + 1, n.z(), domainFlag ) )
+                {
+                   element.wny = 1;
+                   ref_dir = y_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                element.wnz = 0;
+                if( flagField->isPartOfMaskSet( n.x(), n.y(), n.z() + 1, domainFlag ) )
+                {
+                   element.wnz = 1;
+                   ref_dir = z_axis_mirrored_stencil_dir[ ref_dir ];
+                }
+                // concave corner (neighbors are non-fluid)
+                if( element.wnx == 0 && element.wny == 0 && element.wnz == 0 )
+                {
+                   element.wnx = 1;
+                   element.wny = 1;
+                   element.wnz = 1;
+                   ref_dir = 26;
+                }
+                element.ref_dir = ref_dir;
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        
+        
+
+        indexVectors->syncGPU();
+    }
+
+private:
+    void run_impl(IBlock * block, IndexVectors::Type type);
+
+    BlockDataID indexVectorID;
+    
+public:
+    BlockDataID pdfsID;
+};
+
+
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/src/lbm_generated/boundary/NoSlipD3Q19.cpp b/src/lbm_generated/boundary/NoSlipD3Q19.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..268cbf43361645c8e7886f6abd86a56089a75fff
--- /dev/null
+++ b/src/lbm_generated/boundary/NoSlipD3Q19.cpp
@@ -0,0 +1,125 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file NoSlipD3Q19.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+#include "NoSlipD3Q19.h"
+
+
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_noslipd3q19_even {
+static FUNC_PREFIX void noslipd3q19_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+{
+   
+   const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
+   const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 }; 
+   const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 }; 
+   const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
+   {
+      const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
+      const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
+      const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
+      const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
+      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
+   }
+}
+}
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+
+void NoSlipD3Q19::run_impl(IBlock * block, IndexVectors::Type type)
+{
+   auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
+   int32_t indexVectorSize = int32_c( indexVectors->indexVector(type).size() );
+   if( indexVectorSize == 0)
+      return;
+
+   
+   auto pointer = indexVectors->pointerCpu(type);
+   
+
+   uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
+
+   auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+
+   uint8_t timestep = pdfs->getTimestep();
+   
+   WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
+    double * RESTRICT  _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    if(((timestep & 1) ^ 1)) {
+        internal_noslipd3q19_even::noslipd3q19_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    } else {
+        internal_noslipd3q19_even::noslipd3q19_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    }
+}
+
+void NoSlipD3Q19::run(IBlock * block)
+{
+   run_impl(block, IndexVectors::ALL);
+}
+
+void NoSlipD3Q19::inner(IBlock * block)
+{
+   run_impl(block, IndexVectors::INNER);
+}
+
+void NoSlipD3Q19::outer(IBlock * block)
+{
+   run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace lbm
+} // namespace walberla
+
diff --git a/src/lbm_generated/boundary/NoSlipD3Q19.h b/src/lbm_generated/boundary/NoSlipD3Q19.h
new file mode 100644
index 0000000000000000000000000000000000000000..933108eec5fdcdeee8e0af6abb90617fc149307e
--- /dev/null
+++ b/src/lbm_generated/boundary/NoSlipD3Q19.h
@@ -0,0 +1,508 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file NoSlipD3Q19.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "field/GhostLayerField.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "blockforest/StructuredBlockForest.h"
+#include "field/FlagField.h"
+#include "core/debug/Debug.h"
+
+#include <set>
+#include <vector>
+
+
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace lbm {
+
+
+class NoSlipD3Q19
+{
+public:
+    struct IndexInfo { 
+        int32_t x;
+        int32_t y;
+        int32_t z;
+        int32_t dir;
+        IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_) : x(x_), y(y_), z(z_), dir(dir_) {}
+        bool operator==(const IndexInfo & o) const {
+            return x == o.x && y == o.y && z == o.z && dir == o.dir;
+        }
+    };
+
+
+
+    class IndexVectors
+    {
+    public:
+        using CpuIndexVector = std::vector<IndexInfo>;
+
+        enum Type {
+            ALL = 0,
+            INNER = 1,
+            OUTER = 2,
+            NUM_TYPES = 3
+        };
+
+        IndexVectors() = default;
+        bool operator==(IndexVectors const &other) const { return other.cpuVectors_ == cpuVectors_; }
+
+        CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
+        IndexInfo * pointerCpu(Type t)  { return cpuVectors_[t].data(); }
+
+        void syncGPU()
+        {
+            
+        }
+
+    private:
+        std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+
+        
+    };
+
+    NoSlipD3Q19( const shared_ptr<StructuredBlockForest> & blocks,
+                   BlockDataID pdfsID_)
+        : pdfsID(pdfsID_)
+    {
+        auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
+        indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_NoSlipD3Q19");
+    };
+
+    void run (IBlock * block);
+
+    void operator() (IBlock * block)
+    {
+        run(block);
+    }
+
+    void inner (IBlock * block);
+
+    void outer (IBlock * block);
+
+    std::function<void (IBlock *)> getSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->run(b); };
+    }
+
+    std::function<void (IBlock *)> getInnerSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->inner(b); };
+    }
+
+    std::function<void (IBlock *)> getOuterSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->outer(b); };
+    }
+
+    template<typename FlagField_T>
+    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID)
+    {
+        for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+            fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
+    }
+
+
+    template<typename FlagField_T>
+    void fillFromFlagField(IBlock * block, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID )
+    {
+        auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
+        auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+        auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+        auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+        auto * flagField = block->getData< FlagField_T > ( flagFieldID );
+        
+
+        if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) ))
+            return;
+
+        auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+        auto domainFlag = flagField->getFlag(domainFlagUID);
+
+        auto inner = flagField->xyzSize();
+        inner.expand( cell_idx_t(-1) );
+
+        indexVectorAll.clear();
+        indexVectorInner.clear();
+        indexVectorOuter.clear();
+
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  0 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  1 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  2 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  3 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  5 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  6 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  7 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  9 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  11 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  12 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  13 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  15 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  16 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  17 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        
+        
+
+        indexVectors->syncGPU();
+    }
+
+private:
+    void run_impl(IBlock * block, IndexVectors::Type type);
+
+    BlockDataID indexVectorID;
+    
+public:
+    BlockDataID pdfsID;
+};
+
+
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/src/lbm_generated/boundary/NoSlipD3Q27.cpp b/src/lbm_generated/boundary/NoSlipD3Q27.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c38bee8122daa4ee1d09b1b861e5729d232bf310
--- /dev/null
+++ b/src/lbm_generated/boundary/NoSlipD3Q27.cpp
@@ -0,0 +1,126 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file NoSlipD3Q27.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+#include "NoSlipD3Q27.h"
+
+
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_noslipd3q27_even {
+static FUNC_PREFIX void noslipd3q27_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+{
+   
+   const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; 
+   const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1,1,-1,1,-1,1,-1,1,-1 }; 
+   const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0,1,1,-1,-1,1,1,-1,-1 }; 
+   const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
+   {
+      const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
+      const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
+      const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
+      const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
+      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
+   }
+}
+}
+
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+
+void NoSlipD3Q27::run_impl(IBlock * block, IndexVectors::Type type)
+{
+   auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
+   int32_t indexVectorSize = int32_c( indexVectors->indexVector(type).size() );
+   if( indexVectorSize == 0)
+      return;
+
+   
+   auto pointer = indexVectors->pointerCpu(type);
+   
+
+   uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
+
+   auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+
+   uint8_t timestep = pdfs->getTimestep();
+   
+   WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
+    double * RESTRICT  _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    if(((timestep & 1) ^ 1)) {
+        internal_noslipd3q27_even::noslipd3q27_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    } else {
+        internal_noslipd3q27_even::noslipd3q27_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    }
+}
+
+void NoSlipD3Q27::run(IBlock * block)
+{
+   run_impl(block, IndexVectors::ALL);
+}
+
+void NoSlipD3Q27::inner(IBlock * block)
+{
+   run_impl(block, IndexVectors::INNER);
+}
+
+void NoSlipD3Q27::outer(IBlock * block)
+{
+   run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace lbm
+} // namespace walberla
+
diff --git a/src/lbm_generated/boundary/NoSlipD3Q27.h b/src/lbm_generated/boundary/NoSlipD3Q27.h
new file mode 100644
index 0000000000000000000000000000000000000000..56bbfb0611d6a506b3ed4558c388b3d9ed65d443
--- /dev/null
+++ b/src/lbm_generated/boundary/NoSlipD3Q27.h
@@ -0,0 +1,644 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file NoSlipD3Q27.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "field/GhostLayerField.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "blockforest/StructuredBlockForest.h"
+#include "field/FlagField.h"
+#include "core/debug/Debug.h"
+
+#include <set>
+#include <vector>
+
+
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace lbm {
+
+
+class NoSlipD3Q27
+{
+public:
+    struct IndexInfo { 
+        int32_t x;
+        int32_t y;
+        int32_t z;
+        int32_t dir;
+        IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_) : x(x_), y(y_), z(z_), dir(dir_) {}
+        bool operator==(const IndexInfo & o) const {
+            return x == o.x && y == o.y && z == o.z && dir == o.dir;
+        }
+    };
+
+
+
+    class IndexVectors
+    {
+    public:
+        using CpuIndexVector = std::vector<IndexInfo>;
+
+        enum Type {
+            ALL = 0,
+            INNER = 1,
+            OUTER = 2,
+            NUM_TYPES = 3
+        };
+
+        IndexVectors() = default;
+        bool operator==(IndexVectors const &other) const { return other.cpuVectors_ == cpuVectors_; }
+
+        CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
+        IndexInfo * pointerCpu(Type t)  { return cpuVectors_[t].data(); }
+
+        void syncGPU()
+        {
+            
+        }
+
+    private:
+        std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+
+        
+    };
+
+    NoSlipD3Q27( const shared_ptr<StructuredBlockForest> & blocks,
+                   BlockDataID pdfsID_)
+        : pdfsID(pdfsID_)
+    {
+        auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
+        indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_NoSlipD3Q27");
+    };
+
+    void run (IBlock * block);
+
+    void operator() (IBlock * block)
+    {
+        run(block);
+    }
+
+    void inner (IBlock * block);
+
+    void outer (IBlock * block);
+
+    std::function<void (IBlock *)> getSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->run(b); };
+    }
+
+    std::function<void (IBlock *)> getInnerSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->inner(b); };
+    }
+
+    std::function<void (IBlock *)> getOuterSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->outer(b); };
+    }
+
+    template<typename FlagField_T>
+    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID)
+    {
+        for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+            fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
+    }
+
+
+    template<typename FlagField_T>
+    void fillFromFlagField(IBlock * block, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID )
+    {
+        auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
+        auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+        auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+        auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+        auto * flagField = block->getData< FlagField_T > ( flagFieldID );
+        
+
+        if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) ))
+            return;
+
+        auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+        auto domainFlag = flagField->getFlag(domainFlagUID);
+
+        auto inner = flagField->xyzSize();
+        inner.expand( cell_idx_t(-1) );
+
+        indexVectorAll.clear();
+        indexVectorInner.clear();
+        indexVectorOuter.clear();
+
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  0 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  1 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  2 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  3 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  5 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  6 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  7 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  9 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  11 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  12 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  13 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  15 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  16 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  17 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  19 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  20 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  21 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  22 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  23 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  24 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  25 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  26 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        
+        
+
+        indexVectors->syncGPU();
+    }
+
+private:
+    void run_impl(IBlock * block, IndexVectors::Type type);
+
+    BlockDataID indexVectorID;
+    
+public:
+    BlockDataID pdfsID;
+};
+
+
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/src/lbm_generated/boundary/OutflowD3Q19.cpp b/src/lbm_generated/boundary/OutflowD3Q19.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d42cf90429601d5ed4809c30b8926548d8bf6618
--- /dev/null
+++ b/src/lbm_generated/boundary/OutflowD3Q19.cpp
@@ -0,0 +1,136 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file OutflowD3Q19.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+#include "OutflowD3Q19.h"
+
+
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_outflowd3q19_even {
+static FUNC_PREFIX void outflowd3q19_even(const uint8_t * RESTRICT  _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+{
+   
+   const int32_t f_out_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
+   const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
+   const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 }; 
+   const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 }; 
+   const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   
+   
+   const int32_t neighbour_offset_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 }; 
+   const int32_t neighbour_offset_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 }; 
+   const int32_t neighbour_offset_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
+   {
+      const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0]));
+      const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4]));
+      const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8]));
+      const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12]));
+      const double pdf_inter = 0.42264973081037427**((double * )(& _data_indexVector[32*ctr_0 + 24])) + 0.57735026918962573**((double * )(& _data_indexVector[32*ctr_0 + 16]));
+      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = pdf_inter;
+      *((double * )(& _data_indexVector[32*ctr_0 + 16])) = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(neighbour_offset_x[dir] - 1) + _stride_pdfs_1*y + _stride_pdfs_1*neighbour_offset_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*neighbour_offset_z[dir] + _stride_pdfs_3*f_out_inv_dir_idx[dir]];
+      *((double * )(& _data_indexVector[32*ctr_0 + 24])) = pdf_inter;
+   }
+}
+}
+
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+
+void OutflowD3Q19::run_impl(IBlock * block, IndexVectors::Type type)
+{
+   auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
+   int32_t indexVectorSize = int32_c( indexVectors->indexVector(type).size() );
+   if( indexVectorSize == 0)
+      return;
+
+   
+   auto pointer = indexVectors->pointerCpu(type);
+   
+
+   uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
+
+   auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+
+   uint8_t timestep = pdfs->getTimestep();
+   
+   WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
+    double * RESTRICT  _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    if(((timestep & 1) ^ 1)) {
+        internal_outflowd3q19_even::outflowd3q19_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    } else {
+        internal_outflowd3q19_even::outflowd3q19_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    }
+}
+
+void OutflowD3Q19::run(IBlock * block)
+{
+   run_impl(block, IndexVectors::ALL);
+}
+
+void OutflowD3Q19::inner(IBlock * block)
+{
+   run_impl(block, IndexVectors::INNER);
+}
+
+void OutflowD3Q19::outer(IBlock * block)
+{
+   run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace lbm
+} // namespace walberla
+
diff --git a/src/lbm_generated/boundary/OutflowD3Q19.h b/src/lbm_generated/boundary/OutflowD3Q19.h
new file mode 100644
index 0000000000000000000000000000000000000000..bb2999966556997e70c9f469e65062951276a601
--- /dev/null
+++ b/src/lbm_generated/boundary/OutflowD3Q19.h
@@ -0,0 +1,277 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file OutflowD3Q19.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "field/GhostLayerField.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "blockforest/StructuredBlockForest.h"
+#include "field/FlagField.h"
+#include "core/debug/Debug.h"
+
+#include <set>
+#include <vector>
+
+
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace lbm {
+
+
+class OutflowD3Q19
+{
+public:
+    struct IndexInfo { 
+        int32_t x;
+        int32_t y;
+        int32_t z;
+        int32_t dir;
+        double pdf;
+        double pdf_nd;
+        IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_) : x(x_), y(y_), z(z_), dir(dir_), pdf(), pdf_nd() {}
+        bool operator==(const IndexInfo & o) const {
+            return x == o.x && y == o.y && z == o.z && dir == o.dir && floatIsEqual(pdf, o.pdf) && floatIsEqual(pdf_nd, o.pdf_nd);
+        }
+    };
+
+
+
+    class IndexVectors
+    {
+    public:
+        using CpuIndexVector = std::vector<IndexInfo>;
+
+        enum Type {
+            ALL = 0,
+            INNER = 1,
+            OUTER = 2,
+            NUM_TYPES = 3
+        };
+
+        IndexVectors() = default;
+        bool operator==(IndexVectors const &other) const { return other.cpuVectors_ == cpuVectors_; }
+
+        CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
+        IndexInfo * pointerCpu(Type t)  { return cpuVectors_[t].data(); }
+
+        void syncGPU()
+        {
+            
+        }
+
+    private:
+        std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+
+        
+    };
+
+    OutflowD3Q19( const shared_ptr<StructuredBlockForest> & blocks,
+                   BlockDataID pdfsID_)
+        : pdfsID(pdfsID_)
+    {
+        auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
+        indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_OutflowD3Q19");
+    };
+
+    void run (IBlock * block);
+
+    void operator() (IBlock * block)
+    {
+        run(block);
+    }
+
+    void inner (IBlock * block);
+
+    void outer (IBlock * block);
+
+    std::function<void (IBlock *)> getSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->run(b); };
+    }
+
+    std::function<void (IBlock *)> getInnerSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->inner(b); };
+    }
+
+    std::function<void (IBlock *)> getOuterSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->outer(b); };
+    }
+
+    template<typename FlagField_T>
+    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID)
+    {
+        for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+            fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
+    }
+
+
+    template<typename FlagField_T>
+    void fillFromFlagField(IBlock * block, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID )
+    {
+        auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
+        auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+        auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+        auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+        auto * flagField = block->getData< FlagField_T > ( flagFieldID );
+        auto pdfs = block->getData< field::GhostLayerField<real_t, 19> >(pdfsID); 
+
+        if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) ))
+            return;
+
+        auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+        auto domainFlag = flagField->getFlag(domainFlagUID);
+
+        auto inner = flagField->xyzSize();
+        inner.expand( cell_idx_t(-1) );
+
+        indexVectorAll.clear();
+        indexVectorInner.clear();
+        indexVectorOuter.clear();
+
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        
+        
+
+        indexVectors->syncGPU();
+    }
+
+private:
+    void run_impl(IBlock * block, IndexVectors::Type type);
+
+    BlockDataID indexVectorID;
+    BlockDataID pdfsCPUID;
+public:
+    BlockDataID pdfsID;
+};
+
+
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/src/lbm_generated/boundary/OutflowD3Q27.cpp b/src/lbm_generated/boundary/OutflowD3Q27.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8ec9a490b443740ff1ae24adfa1a1739261311a2
--- /dev/null
+++ b/src/lbm_generated/boundary/OutflowD3Q27.cpp
@@ -0,0 +1,136 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file OutflowD3Q27.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+#include "OutflowD3Q27.h"
+
+
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_outflowd3q27_even {
+static FUNC_PREFIX void outflowd3q27_even(const uint8_t * RESTRICT  _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+{
+   
+   const int32_t f_out_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; 
+   const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; 
+   const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1,1,-1,1,-1,1,-1,1,-1 }; 
+   const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0,1,1,-1,-1,1,1,-1,-1 }; 
+   const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   
+   
+   const int32_t neighbour_offset_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1,1,-1,1,-1,1,-1,1,-1 }; 
+   const int32_t neighbour_offset_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0,1,1,-1,-1,1,1,-1,-1 }; 
+   const int32_t neighbour_offset_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
+   {
+      const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0]));
+      const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4]));
+      const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8]));
+      const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12]));
+      const double pdf_inter = 0.42264973081037427**((double * )(& _data_indexVector[32*ctr_0 + 24])) + 0.57735026918962573**((double * )(& _data_indexVector[32*ctr_0 + 16]));
+      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = pdf_inter;
+      *((double * )(& _data_indexVector[32*ctr_0 + 16])) = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(neighbour_offset_x[dir] - 1) + _stride_pdfs_1*y + _stride_pdfs_1*neighbour_offset_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*neighbour_offset_z[dir] + _stride_pdfs_3*f_out_inv_dir_idx[dir]];
+      *((double * )(& _data_indexVector[32*ctr_0 + 24])) = pdf_inter;
+   }
+}
+}
+
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+
+void OutflowD3Q27::run_impl(IBlock * block, IndexVectors::Type type)
+{
+   auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
+   int32_t indexVectorSize = int32_c( indexVectors->indexVector(type).size() );
+   if( indexVectorSize == 0)
+      return;
+
+   
+   auto pointer = indexVectors->pointerCpu(type);
+   
+
+   uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
+
+   auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+
+   uint8_t timestep = pdfs->getTimestep();
+   
+   WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
+    double * RESTRICT  _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    if(((timestep & 1) ^ 1)) {
+        internal_outflowd3q27_even::outflowd3q27_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    } else {
+        internal_outflowd3q27_even::outflowd3q27_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
+    }
+}
+
+void OutflowD3Q27::run(IBlock * block)
+{
+   run_impl(block, IndexVectors::ALL);
+}
+
+void OutflowD3Q27::inner(IBlock * block)
+{
+   run_impl(block, IndexVectors::INNER);
+}
+
+void OutflowD3Q27::outer(IBlock * block)
+{
+   run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace lbm
+} // namespace walberla
+
diff --git a/src/lbm_generated/boundary/OutflowD3Q27.h b/src/lbm_generated/boundary/OutflowD3Q27.h
new file mode 100644
index 0000000000000000000000000000000000000000..53b4e4bae5e6c6da6b4b108751120bf90a5ab25b
--- /dev/null
+++ b/src/lbm_generated/boundary/OutflowD3Q27.h
@@ -0,0 +1,349 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file OutflowD3Q27.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "field/GhostLayerField.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "blockforest/StructuredBlockForest.h"
+#include "field/FlagField.h"
+#include "core/debug/Debug.h"
+
+#include <set>
+#include <vector>
+
+
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace lbm {
+
+
+class OutflowD3Q27
+{
+public:
+    struct IndexInfo { 
+        int32_t x;
+        int32_t y;
+        int32_t z;
+        int32_t dir;
+        double pdf;
+        double pdf_nd;
+        IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_) : x(x_), y(y_), z(z_), dir(dir_), pdf(), pdf_nd() {}
+        bool operator==(const IndexInfo & o) const {
+            return x == o.x && y == o.y && z == o.z && dir == o.dir && floatIsEqual(pdf, o.pdf) && floatIsEqual(pdf_nd, o.pdf_nd);
+        }
+    };
+
+
+
+    class IndexVectors
+    {
+    public:
+        using CpuIndexVector = std::vector<IndexInfo>;
+
+        enum Type {
+            ALL = 0,
+            INNER = 1,
+            OUTER = 2,
+            NUM_TYPES = 3
+        };
+
+        IndexVectors() = default;
+        bool operator==(IndexVectors const &other) const { return other.cpuVectors_ == cpuVectors_; }
+
+        CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
+        IndexInfo * pointerCpu(Type t)  { return cpuVectors_[t].data(); }
+
+        void syncGPU()
+        {
+            
+        }
+
+    private:
+        std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+
+        
+    };
+
+    OutflowD3Q27( const shared_ptr<StructuredBlockForest> & blocks,
+                   BlockDataID pdfsID_)
+        : pdfsID(pdfsID_)
+    {
+        auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
+        indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_OutflowD3Q27");
+    };
+
+    void run (IBlock * block);
+
+    void operator() (IBlock * block)
+    {
+        run(block);
+    }
+
+    void inner (IBlock * block);
+
+    void outer (IBlock * block);
+
+    std::function<void (IBlock *)> getSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->run(b); };
+    }
+
+    std::function<void (IBlock *)> getInnerSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->inner(b); };
+    }
+
+    std::function<void (IBlock *)> getOuterSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->outer(b); };
+    }
+
+    template<typename FlagField_T>
+    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID)
+    {
+        for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+            fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
+    }
+
+
+    template<typename FlagField_T>
+    void fillFromFlagField(IBlock * block, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID )
+    {
+        auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
+        auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+        auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+        auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+        auto * flagField = block->getData< FlagField_T > ( flagFieldID );
+        auto pdfs = block->getData< field::GhostLayerField<real_t, 27> >(pdfsID); 
+
+        if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) ))
+            return;
+
+        auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+        auto domainFlag = flagField->getFlag(domainFlagUID);
+
+        auto inner = flagField->xyzSize();
+        inner.expand( cell_idx_t(-1) );
+
+        indexVectorAll.clear();
+        indexVectorInner.clear();
+        indexVectorOuter.clear();
+
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  19 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(1), 26);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(1), 26);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  21 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(1), 24);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(1), 24);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  23 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(-1), 22);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(-1), 22);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  25 );
+              element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(-1), 20);
+                element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(-1), 20);
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        
+        
+
+        indexVectors->syncGPU();
+    }
+
+private:
+    void run_impl(IBlock * block, IndexVectors::Type type);
+
+    BlockDataID indexVectorID;
+    BlockDataID pdfsCPUID;
+public:
+    BlockDataID pdfsID;
+};
+
+
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/src/lbm_generated/boundary/UBBD3Q19.cpp b/src/lbm_generated/boundary/UBBD3Q19.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0a88d2feeff0237881df80f6494a4f58f8936e02
--- /dev/null
+++ b/src/lbm_generated/boundary/UBBD3Q19.cpp
@@ -0,0 +1,136 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file UBBD3Q19.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+#include "UBBD3Q19.h"
+
+
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_ubbd3q19_even {
+static FUNC_PREFIX void ubbd3q19_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize, double u_x, double u_y, double u_z)
+{
+   
+   const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; 
+   const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 }; 
+   const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 }; 
+   const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   
+   const double weights [] = {0.33333333333333333, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778};
+   
+   
+   
+   const int32_t neighbour_offset_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 }; 
+   const int32_t neighbour_offset_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 }; 
+   const int32_t neighbour_offset_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
+   {
+      const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
+      const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
+      const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
+      const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
+      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = (u_x*6.0*((double)(neighbour_offset_x[dir])) + u_y*6.0*((double)(neighbour_offset_y[dir])) + u_z*6.0*((double)(neighbour_offset_z[dir])))*-1.0*weights[dir] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
+   }
+}
+}
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+
+void UBBD3Q19::run_impl(IBlock * block, IndexVectors::Type type)
+{
+   auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
+   int32_t indexVectorSize = int32_c( indexVectors->indexVector(type).size() );
+   if( indexVectorSize == 0)
+      return;
+
+   
+   auto pointer = indexVectors->pointerCpu(type);
+   
+
+   uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
+
+   auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+
+   uint8_t timestep = pdfs->getTimestep();
+   auto & u_y = u_y_;
+    auto & u_x = u_x_;
+    auto & u_z = u_z_;
+   WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
+    double * RESTRICT  _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    if(((timestep & 1) ^ 1)) {
+        internal_ubbd3q19_even::ubbd3q19_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize, u_x, u_y, u_z);
+    } else {
+        internal_ubbd3q19_even::ubbd3q19_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize, u_x, u_y, u_z);
+    }
+}
+
+void UBBD3Q19::run(IBlock * block)
+{
+   run_impl(block, IndexVectors::ALL);
+}
+
+void UBBD3Q19::inner(IBlock * block)
+{
+   run_impl(block, IndexVectors::INNER);
+}
+
+void UBBD3Q19::outer(IBlock * block)
+{
+   run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace lbm
+} // namespace walberla
+
diff --git a/src/lbm_generated/boundary/UBBD3Q19.h b/src/lbm_generated/boundary/UBBD3Q19.h
new file mode 100644
index 0000000000000000000000000000000000000000..f57bac12d404b9b3d8819d7955dc65c3cdbcab61
--- /dev/null
+++ b/src/lbm_generated/boundary/UBBD3Q19.h
@@ -0,0 +1,511 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file UBBD3Q19.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "field/GhostLayerField.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "blockforest/StructuredBlockForest.h"
+#include "field/FlagField.h"
+#include "core/debug/Debug.h"
+
+#include <set>
+#include <vector>
+
+
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace lbm {
+
+
+class UBBD3Q19
+{
+public:
+    struct IndexInfo { 
+        int32_t x;
+        int32_t y;
+        int32_t z;
+        int32_t dir;
+        IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_) : x(x_), y(y_), z(z_), dir(dir_) {}
+        bool operator==(const IndexInfo & o) const {
+            return x == o.x && y == o.y && z == o.z && dir == o.dir;
+        }
+    };
+
+
+
+    class IndexVectors
+    {
+    public:
+        using CpuIndexVector = std::vector<IndexInfo>;
+
+        enum Type {
+            ALL = 0,
+            INNER = 1,
+            OUTER = 2,
+            NUM_TYPES = 3
+        };
+
+        IndexVectors() = default;
+        bool operator==(IndexVectors const &other) const { return other.cpuVectors_ == cpuVectors_; }
+
+        CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
+        IndexInfo * pointerCpu(Type t)  { return cpuVectors_[t].data(); }
+
+        void syncGPU()
+        {
+            
+        }
+
+    private:
+        std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+
+        
+    };
+
+    UBBD3Q19( const shared_ptr<StructuredBlockForest> & blocks,
+                   BlockDataID pdfsID_, double u_x, double u_y, double u_z)
+        : pdfsID(pdfsID_), u_x_(u_x), u_y_(u_y), u_z_(u_z)
+    {
+        auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
+        indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_UBBD3Q19");
+    };
+
+    void run (IBlock * block);
+
+    void operator() (IBlock * block)
+    {
+        run(block);
+    }
+
+    void inner (IBlock * block);
+
+    void outer (IBlock * block);
+
+    std::function<void (IBlock *)> getSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->run(b); };
+    }
+
+    std::function<void (IBlock *)> getInnerSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->inner(b); };
+    }
+
+    std::function<void (IBlock *)> getOuterSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->outer(b); };
+    }
+
+    template<typename FlagField_T>
+    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID)
+    {
+        for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+            fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
+    }
+
+
+    template<typename FlagField_T>
+    void fillFromFlagField(IBlock * block, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID )
+    {
+        auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
+        auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+        auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+        auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+        auto * flagField = block->getData< FlagField_T > ( flagFieldID );
+        
+
+        if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) ))
+            return;
+
+        auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+        auto domainFlag = flagField->getFlag(domainFlagUID);
+
+        auto inner = flagField->xyzSize();
+        inner.expand( cell_idx_t(-1) );
+
+        indexVectorAll.clear();
+        indexVectorInner.clear();
+        indexVectorOuter.clear();
+
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  0 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  1 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  2 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  3 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  5 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  6 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  7 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  9 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  11 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  12 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  13 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  15 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  16 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  17 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        
+        
+
+        indexVectors->syncGPU();
+    }
+
+private:
+    void run_impl(IBlock * block, IndexVectors::Type type);
+
+    BlockDataID indexVectorID;
+    
+public:
+    BlockDataID pdfsID;
+    double u_x_;
+    double u_y_;
+    double u_z_;
+};
+
+
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/src/lbm_generated/boundary/UBBD3Q27.cpp b/src/lbm_generated/boundary/UBBD3Q27.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..08ee3ef38ef4460b590216b789caea5457da8b97
--- /dev/null
+++ b/src/lbm_generated/boundary/UBBD3Q27.cpp
@@ -0,0 +1,137 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file UBBD3Q27.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+#include "UBBD3Q27.h"
+
+
+
+#define FUNC_PREFIX
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 177
+#else
+#pragma diag_suppress 177
+#endif
+#endif
+
+namespace internal_ubbd3q27_even {
+static FUNC_PREFIX void ubbd3q27_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT  _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize, double u_x, double u_y, double u_z)
+{
+   
+   const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; 
+   const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1,1,-1,1,-1,1,-1,1,-1 }; 
+   const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0,1,1,-1,-1,1,1,-1,-1 }; 
+   const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   
+   const double weights [] = {0.29629629629629630, 0.074074074074074074, 0.074074074074074074, 0.074074074074074074, 0.074074074074074074, 0.074074074074074074, 0.074074074074074074, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296};
+   
+   
+   
+   const int32_t neighbour_offset_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1,1,-1,1,-1,1,-1,1,-1 }; 
+   const int32_t neighbour_offset_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0,1,1,-1,-1,1,1,-1,-1 }; 
+   const int32_t neighbour_offset_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1 }; 
+   
+   for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
+   {
+      const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
+      const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
+      const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
+      const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
+      _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = (u_x*6.0*((double)(neighbour_offset_x[dir])) + u_y*6.0*((double)(neighbour_offset_y[dir])) + u_z*6.0*((double)(neighbour_offset_z[dir])))*-1.0*weights[dir] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
+   }
+}
+}
+
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
+
+void UBBD3Q27::run_impl(IBlock * block, IndexVectors::Type type)
+{
+   auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
+   int32_t indexVectorSize = int32_c( indexVectors->indexVector(type).size() );
+   if( indexVectorSize == 0)
+      return;
+
+   
+   auto pointer = indexVectors->pointerCpu(type);
+   
+
+   uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
+
+   auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+
+   uint8_t timestep = pdfs->getTimestep();
+   auto & u_y = u_y_;
+    auto & u_x = u_x_;
+    auto & u_z = u_z_;
+   WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
+    double * RESTRICT  _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    if(((timestep & 1) ^ 1)) {
+        internal_ubbd3q27_even::ubbd3q27_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize, u_x, u_y, u_z);
+    } else {
+        internal_ubbd3q27_even::ubbd3q27_even(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize, u_x, u_y, u_z);
+    }
+}
+
+void UBBD3Q27::run(IBlock * block)
+{
+   run_impl(block, IndexVectors::ALL);
+}
+
+void UBBD3Q27::inner(IBlock * block)
+{
+   run_impl(block, IndexVectors::INNER);
+}
+
+void UBBD3Q27::outer(IBlock * block)
+{
+   run_impl(block, IndexVectors::OUTER);
+}
+
+} // namespace lbm
+} // namespace walberla
+
diff --git a/src/lbm_generated/boundary/UBBD3Q27.h b/src/lbm_generated/boundary/UBBD3Q27.h
new file mode 100644
index 0000000000000000000000000000000000000000..b7836d6958677e9b221f74f37b014b3de35019c7
--- /dev/null
+++ b/src/lbm_generated/boundary/UBBD3Q27.h
@@ -0,0 +1,647 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file UBBD3Q27.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+#include "core/DataTypes.h"
+
+#include "field/GhostLayerField.h"
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "blockforest/StructuredBlockForest.h"
+#include "field/FlagField.h"
+#include "core/debug/Debug.h"
+
+#include <set>
+#include <vector>
+
+
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace lbm {
+
+
+class UBBD3Q27
+{
+public:
+    struct IndexInfo { 
+        int32_t x;
+        int32_t y;
+        int32_t z;
+        int32_t dir;
+        IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_) : x(x_), y(y_), z(z_), dir(dir_) {}
+        bool operator==(const IndexInfo & o) const {
+            return x == o.x && y == o.y && z == o.z && dir == o.dir;
+        }
+    };
+
+
+
+    class IndexVectors
+    {
+    public:
+        using CpuIndexVector = std::vector<IndexInfo>;
+
+        enum Type {
+            ALL = 0,
+            INNER = 1,
+            OUTER = 2,
+            NUM_TYPES = 3
+        };
+
+        IndexVectors() = default;
+        bool operator==(IndexVectors const &other) const { return other.cpuVectors_ == cpuVectors_; }
+
+        CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
+        IndexInfo * pointerCpu(Type t)  { return cpuVectors_[t].data(); }
+
+        void syncGPU()
+        {
+            
+        }
+
+    private:
+        std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
+
+        
+    };
+
+    UBBD3Q27( const shared_ptr<StructuredBlockForest> & blocks,
+                   BlockDataID pdfsID_, double u_x, double u_y, double u_z)
+        : pdfsID(pdfsID_), u_x_(u_x), u_y_(u_y), u_z_(u_z)
+    {
+        auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
+        indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_UBBD3Q27");
+    };
+
+    void run (IBlock * block);
+
+    void operator() (IBlock * block)
+    {
+        run(block);
+    }
+
+    void inner (IBlock * block);
+
+    void outer (IBlock * block);
+
+    std::function<void (IBlock *)> getSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->run(b); };
+    }
+
+    std::function<void (IBlock *)> getInnerSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->inner(b); };
+    }
+
+    std::function<void (IBlock *)> getOuterSweep()
+    {
+        return [this]
+               (IBlock * b)
+               { this->outer(b); };
+    }
+
+    template<typename FlagField_T>
+    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID)
+    {
+        for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
+            fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
+    }
+
+
+    template<typename FlagField_T>
+    void fillFromFlagField(IBlock * block, ConstBlockDataID flagFieldID,
+                            FlagUID boundaryFlagUID, FlagUID domainFlagUID )
+    {
+        auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
+        auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+        auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+        auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+        auto * flagField = block->getData< FlagField_T > ( flagFieldID );
+        
+
+        if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) ))
+            return;
+
+        auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
+        auto domainFlag = flagField->getFlag(domainFlagUID);
+
+        auto inner = flagField->xyzSize();
+        inner.expand( cell_idx_t(-1) );
+
+        indexVectorAll.clear();
+        indexVectorInner.clear();
+        indexVectorOuter.clear();
+
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  0 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  1 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  2 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  3 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  5 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  6 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  7 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  9 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  11 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  12 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  13 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  15 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(0, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  16 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  17 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  19 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  20 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  21 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, 1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  22 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  23 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, 1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  24 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(1, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  25 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        for( auto it = flagField->beginWithGhostLayerXYZ( cell_idx_c( flagField->nrOfGhostLayers() - 1 ) ); it != flagField->end(); ++it )
+        {
+           if( ! isFlagSet(it, domainFlag) )
+              continue;
+
+           if ( isFlagSet( it.neighbor(-1, -1, -1 , 0 ), boundaryFlag ) )
+           {
+              auto element = IndexInfo(it.x(), it.y(),  it.z(),  26 );
+              
+              indexVectorAll.push_back( element );
+              if( inner.contains( it.x(), it.y(), it.z() ) )
+                 indexVectorInner.push_back( element );
+              else
+                 indexVectorOuter.push_back( element );
+           }
+        }
+        
+        
+        
+
+        indexVectors->syncGPU();
+    }
+
+private:
+    void run_impl(IBlock * block, IndexVectors::Type type);
+
+    BlockDataID indexVectorID;
+    
+public:
+    BlockDataID pdfsID;
+    double u_x_;
+    double u_y_;
+    double u_z_;
+};
+
+
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/src/lbm_generated/boundary/boundary_generation_script.py b/src/lbm_generated/boundary/boundary_generation_script.py
new file mode 100644
index 0000000000000000000000000000000000000000..970daedc56e562b2aba05fa6e9147ad7c889cda0
--- /dev/null
+++ b/src/lbm_generated/boundary/boundary_generation_script.py
@@ -0,0 +1,55 @@
+import sympy as sp
+
+from pystencils import Target
+
+from lbmpy.creationfunctions import create_lb_method
+from lbmpy import LBMConfig, Stencil, Method, LBStencil
+from lbmpy.boundaries import ExtrapolationOutflow, FixedDensity, FreeSlip, NoSlip, UBB
+
+from pystencils_walberla import ManualCodeGenerationContext, generate_info_header
+from lbmpy_walberla.boundary_collection import generate_boundary_collection
+from lbmpy_walberla import lbm_boundary_generator
+
+with ManualCodeGenerationContext(openmp=False, optimize_for_localhost=False,
+                                 mpi=True, double_accuracy=True, cuda=False) as ctx:
+
+    for stencil in [LBStencil(Stencil.D3Q19), LBStencil(Stencil.D3Q27)]:
+        target = Target.GPU if ctx.cuda else Target.CPU
+        data_type = "float64" if ctx.double_accuracy else "float32"
+
+        method = Method.SRT
+        relaxation_rate = sp.symbols("omega")
+        streaming_pattern = 'pull'
+
+        lbm_config = LBMConfig(stencil=stencil, method=method, relaxation_rate=relaxation_rate,
+                               streaming_pattern=streaming_pattern)
+
+        lb_method = create_lb_method(lbm_config=lbm_config)
+
+        outflow_west_boundary = ExtrapolationOutflow(normal_direction=(1, 0, 0), lb_method=lb_method)
+        fixed_density_boundary = FixedDensity(density=sp.Symbol("density"))
+        free_slip_boundary = FreeSlip(stencil)
+        no_slip_boundary = NoSlip()
+        ubb_boundary = UBB(sp.symbols("u_x, u_y, u_z"), data_type=data_type)
+
+        outflow = lbm_boundary_generator(class_name=f'Outflow{stencil.name}', flag_uid='Outflow',
+                                         boundary_object=outflow_west_boundary)
+
+        fixed_density = lbm_boundary_generator(class_name=f'FixedDensity{stencil.name}', flag_uid='FixedDensity',
+                                               boundary_object=fixed_density_boundary)
+
+        free_slip = lbm_boundary_generator(class_name=f'FreeSlip{stencil.name}', flag_uid='FreeSlip',
+                                           boundary_object=free_slip_boundary)
+
+        no_slip = lbm_boundary_generator(class_name=f'NoSlip{stencil.name}', flag_uid='NoSlip',
+                                         boundary_object=no_slip_boundary)
+
+        ubb = lbm_boundary_generator(class_name=f'UBB{stencil.name}', flag_uid='UBB',
+                                     boundary_object=ubb_boundary)
+
+        boundaries = [outflow, fixed_density, free_slip, no_slip, ubb]
+        generate_boundary_collection(ctx, f'{stencil.name}BoundaryCollection', boundary_generators=boundaries,
+                                     lb_method=lb_method, streaming_pattern=streaming_pattern,
+                                     target=target)
+
+        ctx.write_all_files()
diff --git a/src/lbm_generated/communication/CMakeLists.txt b/src/lbm_generated/communication/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd5516b9e96a757f9d8911269d7f703b13e92105
--- /dev/null
+++ b/src/lbm_generated/communication/CMakeLists.txt
@@ -0,0 +1,9 @@
+target_sources( lbm_generated
+    PRIVATE
+    CombinedInPlacePackInfo.h
+    NonuniformCommData.h
+    NonuniformCommData.impl.h
+    NonuniformGeneratedPdfPackInfo.h
+    NonuniformGeneratedPdfPackInfo.impl.h
+    UniformGeneratedPdfPackInfo.h
+    )
diff --git a/src/lbm_generated/communication/CombinedInPlacePackInfo.h b/src/lbm_generated/communication/CombinedInPlacePackInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..b5a4c0ba2fd0fc6a9b2816ecbe38cff2af1dd150
--- /dev/null
+++ b/src/lbm_generated/communication/CombinedInPlacePackInfo.h
@@ -0,0 +1,117 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file CombinedInPlacePackInfo.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+#include "communication/UniformPackInfo.h"
+
+namespace walberla::lbm_generated {
+
+template< typename LatticeStorageSpecification_T, typename EvenPackInfo, typename OddPackInfo >
+class CombinedInPlaceCpuPackInfo : public ::walberla::communication::UniformPackInfo
+{
+ public:
+   template< typename... Args >
+   CombinedInPlaceCpuPackInfo(std::shared_ptr< LatticeStorageSpecification_T >& storageSecification, Args&&... args)
+      : storageSecification_(storageSecification), evenPackInfo_(std::forward< Args >(args)...), oddPackInfo_(std::forward< Args >(args)...)
+   {}
+
+   ~CombinedInPlaceCpuPackInfo() override = default;
+   bool constantDataExchange() const override { return true; }
+   bool threadsafeReceiving() const override { return true; }
+
+   void unpackData(IBlock* receiver, stencil::Direction dir, mpi::RecvBuffer& buffer) override
+   {
+      if (storageSecification_->isEvenTimeStep())
+      {
+         return evenPackInfo_.unpackData(receiver, dir, buffer);
+      }
+      else
+      {
+         return oddPackInfo_.unpackData(receiver, dir, buffer);
+      }
+   }
+
+   void communicateLocal(const IBlock* sender, IBlock* receiver, stencil::Direction dir) override
+   {
+      if (storageSecification_->isEvenTimeStep())
+      {
+         return evenPackInfo_.communicateLocal(sender, receiver, dir);
+      }
+      else
+      {
+         return oddPackInfo_.communicateLocal(sender, receiver, dir);
+      }
+   }
+
+   void packDataImpl(const IBlock* sender, stencil::Direction dir, mpi::SendBuffer& outBuffer) const override
+   {
+      if (storageSecification_->isEvenTimeStep())
+      {
+         return evenPackInfo_.packDataImpl(sender, dir, outBuffer);
+      }
+      else
+      {
+         return oddPackInfo_.packDataImpl(sender, dir, outBuffer);
+      }
+   }
+
+   void pack(stencil::Direction dir, unsigned char* buffer, IBlock* block) const
+   {
+      if (storageSecification_->isEvenTimeStep())
+      {
+         evenPackInfo_.pack(dir, buffer, block);
+      }
+      else
+      {
+         oddPackInfo_.pack(dir, buffer, block);
+      }
+   }
+
+   void unpack(stencil::Direction dir, unsigned char* buffer, IBlock* block) const
+   {
+      if (storageSecification_->isEvenTimeStep())
+      {
+         evenPackInfo_.unpack(dir, buffer, block);
+      }
+      else
+      {
+         oddPackInfo_.unpack(dir, buffer, block);
+      }
+   }
+
+   uint_t size(stencil::Direction dir, IBlock* block) const
+   {
+      if (storageSecification_->isEvenTimeStep())
+      {
+         return evenPackInfo_.size(dir, block);
+      }
+      else
+      {
+         return oddPackInfo_.size(dir, block);
+      }
+   }
+
+ private:
+   const std::shared_ptr< LatticeStorageSpecification_T >& storageSecification_;
+   EvenPackInfo evenPackInfo_;
+   OddPackInfo oddPackInfo_;
+};
+
+} // namespace walberla::lbm_generated
diff --git a/src/lbm_generated/communication/NonuniformCommData.h b/src/lbm_generated/communication/NonuniformCommData.h
new file mode 100644
index 0000000000000000000000000000000000000000..762dde86c5cf2a8336e3791dd3b56274b5f26df3
--- /dev/null
+++ b/src/lbm_generated/communication/NonuniformCommData.h
@@ -0,0 +1,136 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NonuniformCommData.h
+//! \author Frederik Hennig <frederik.hennig@fau.de>
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "blockforest/StructuredBlockForest.h"
+#include "blockforest/BlockDataHandling.h"
+
+#include "domain_decomposition/IBlock.h"
+
+#include "field/FlagField.h"
+
+#include "lbm_generated/field/PdfField.h"
+
+#include "stencil/Directions.h"
+
+#define USE_CELL_INTERVALS
+
+namespace walberla::lbm_generated {
+
+using PartialCoalescenceMaskField = FlagField< uint32_t >;
+
+namespace util {
+   void forEachSubdirection(const Vector3< cell_idx_t > mainDirection, const std::function< void(Vector3< cell_idx_t >) >& func);
+   bool forEachSubdirectionCancel(const Vector3< cell_idx_t > mainDirection,
+                               const std::function< bool(Vector3< cell_idx_t >) >& func);
+   void getSubdirections(const Vector3< cell_idx_t > mainDirection, std::vector< Vector3< cell_idx_t > > subdirs);
+
+   template< typename Stencil_T >
+   void forEachOrthogonalDirection(Vector3<cell_idx_t> d, std::function< void(Vector3< cell_idx_t >) > func);
+} // namespace util
+
+template< typename LatticeStorageSpecification_T >
+class NonuniformCommData
+{
+ private:
+   void registerFlags();
+   void computeBitMask();
+
+ public:
+   using Stencil              = typename LatticeStorageSpecification_T::Stencil;
+   using CommunicationStencil = typename LatticeStorageSpecification_T::CommunicationStencil;
+
+#if defined(USE_CELL_INTERVALS)
+   NonuniformCommData(IBlock* const block, uint_t xSize, uint_t ySize, uint_t zSize)
+      : block_(block), maskField_(xSize, ySize, zSize, 2),
+        interiorInterval(0, 0, 0, cell_idx_c(xSize) - 1, cell_idx_c(ySize) - 1, cell_idx_c(zSize) - 1)
+   {
+      registerFlags();
+      computeBitMask();
+   };
+#else
+   NonuniformCommData(IBlock* const block, const BlockDataID pdfFieldID, uint_t xSize, uint_t ySize, uint_t zSize)
+      : block_(block), pdfFieldID_(pdfFieldID), maskField_(xSize, ySize, zSize, 2)
+   {
+      registerFlags();
+      computeBitMask();
+   };
+#endif
+
+   bool operator==(const NonuniformCommData& other) { return this == &other; }
+   bool operator!=(const NonuniformCommData& other) { return this != &other; }
+
+   PartialCoalescenceMaskField& getMaskField() { return maskField_; }
+   const PartialCoalescenceMaskField& getMaskField() const { return maskField_; }
+
+ private:
+#if defined(USE_CELL_INTERVALS)
+   void prepareIntervals();
+   void setFlagOnInterval(const CellInterval & ci, const uint_t fIdx);
+#else
+   void prepareFlags();
+   void resetCornerSkippingOriginFlags();
+#endif
+
+   void setupCornerSkippingOrigins(stencil::Direction commDir);
+   void setupBitMaskSlice(stencil::Direction commDir, stencil::Direction streamDir);
+
+   bool haveSmallestIdInIntersection(Vector3<cell_idx_t> cornerDir);
+
+   const IBlock* const block_;
+   PartialCoalescenceMaskField maskField_;
+
+#if defined(USE_CELL_INTERVALS)
+   const CellInterval interiorInterval;
+   std::vector< CellInterval > passThroughIntervals_;
+   std::vector< CellInterval > cornerSkippingOriginIntervals_;
+#endif
+};
+
+
+template< typename LatticeStorageSpecification_T >
+class NonuniformCommDataHandling
+   : public blockforest::AlwaysInitializeBlockDataHandling< NonuniformCommData< LatticeStorageSpecification_T > >
+{
+ public:
+   using CommmData_T = NonuniformCommData< LatticeStorageSpecification_T >;
+
+   NonuniformCommDataHandling(const weak_ptr< StructuredBlockForest >& blocks)
+      : blocks_(blocks){};
+
+   CommmData_T* initialize(IBlock* const block) override
+   {
+      WALBERLA_ASSERT_NOT_NULLPTR(block)
+      auto blocks = blocks_.lock();
+      WALBERLA_CHECK_NOT_NULLPTR(blocks)
+
+      return new CommmData_T(block, blocks->getNumberOfXCells(*block), blocks->getNumberOfYCells(*block),
+                             blocks->getNumberOfZCells(*block));
+   }
+
+ private:
+   const weak_ptr< StructuredBlockStorage > blocks_;
+};
+
+} // walberla::lbm_generated
+
+#include "lbm_generated/communication/NonuniformCommData.impl.h"
diff --git a/src/lbm_generated/communication/NonuniformCommData.impl.h b/src/lbm_generated/communication/NonuniformCommData.impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..5a4bc3293087a5ed1e0c1aef261381511d908371
--- /dev/null
+++ b/src/lbm_generated/communication/NonuniformCommData.impl.h
@@ -0,0 +1,400 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NonuniformCommData.impl.h
+//! \author Frederik Hennig <frederik.hennig@fau.de>
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "blockforest/all.h"
+
+#include "lbm_generated/communication/NonuniformCommData.h"
+
+#include "stencil/Directions.h"
+
+#define IDX_FLAG(d) (1 << d)
+
+#if !defined(USE_CELL_INTERVALS)
+#define INTERIOR_FLAG_BIT 29
+#define INTERIOR_FLAG (1 << INTERIOR_FLAG_BIT)
+
+#define PASS_THROUGH_FLAG_BIT 30
+#define PASS_THROUGH_FLAG (1 << PASS_THROUGH_FLAG_BIT)
+
+#define CORNER_SKIPPING_ORIGIN_FLAG_BIT 31
+#define CORNER_SKIPPING_ORIGIN_FLAG (1 << CORNER_SKIPPING_ORIGIN_FLAG_BIT)
+#endif
+
+using namespace walberla::lbm_generated::util;
+
+namespace walberla::lbm_generated {
+namespace util {
+
+/***********************************************************************************************************************
+ *                                    Utility Functions for handling directions                                        *
+ **********************************************************************************************************************/
+
+/**
+ * Iterates all sub-directions of a given direction vector and runs a callback on each of them.
+ * Subdirections are any nonzero directions obtained by truncating zero or more components of a direction
+ * vector to zero. The direction vector itself is contained in this set.
+ * @param mainDirection The direction whose subdirections will be iterated
+ * @param func          The callback that should be run for each subdirection
+ */
+inline void forEachSubdirection(const Vector3< cell_idx_t > mainDirection,
+                                const std::function< void(Vector3< cell_idx_t >) >& func)
+{
+   for (cell_idx_t z = std::min(0, mainDirection[2]); z <= std::max(0, mainDirection[2]); z++)
+   {
+      for (cell_idx_t y = std::min(0, mainDirection[1]); y <= std::max(0, mainDirection[1]); y++)
+      {
+         for (cell_idx_t x = std::min(0, mainDirection[0]); x <= std::max(0, mainDirection[0]); x++)
+         {
+            if (x == 0 && y == 0 && z == 0) continue;
+            func(Vector3< cell_idx_t >(x, y, z));
+         }
+      }
+   }
+}
+
+/**
+ * Iterates all sub-directions of a given direction vector and runs a callback on each of them.
+ * Subdirections are any nonzero directions obtained by truncating zero or more components of a direction
+ * vector to zero. The direction vector itself is contained in this set.
+ * @param mainDirection The direction whose subdirections will be iterated
+ * @param func          The callback that should be run for each subdirection. If the callback returns false, the
+ *                      iteration will be stopped.
+ * @return true if the iteration completed, false if it was canceled
+ */
+inline bool forEachSubdirectionCancel(const Vector3< cell_idx_t > mainDirection,
+                                      const std::function< bool(Vector3< cell_idx_t >) >& func)
+{
+   for (cell_idx_t z = std::min(0, mainDirection[2]); z <= std::max(0, mainDirection[2]); z++)
+   {
+      for (cell_idx_t y = std::min(0, mainDirection[1]); y <= std::max(0, mainDirection[1]); y++)
+      {
+         for (cell_idx_t x = std::min(0, mainDirection[0]); x <= std::max(0, mainDirection[0]); x++)
+         {
+            if (x == 0 && y == 0 && z == 0) continue;
+            if (!func(Vector3< cell_idx_t >(x, y, z))) return false;
+         }
+      }
+   }
+
+   return true;
+}
+
+inline void getSubdirections(const Vector3< cell_idx_t > mainDirection,
+                             std::vector< Vector3< cell_idx_t > > subdirections)
+{
+   forEachSubdirection(mainDirection, [&](Vector3< cell_idx_t > v) { subdirections.push_back(v); });
+}
+
+/**
+ * Iterates all directions orthogonal to d that are part of the given stencil, and executes a function on
+ * each of them.
+ * @tparam Stencil_T The underlying stencil
+ * @param d
+ * @param func
+ */
+template< typename Stencil_T >
+inline void forEachOrthogonalDirection(Vector3< cell_idx_t > d, std::function< void(Vector3< cell_idx_t >) > func)
+{
+   for (cell_idx_t x = (d[0] == 0 ? -1 : 0); x <= (d[0] == 0 ? 1 : 0); x++)
+      for (cell_idx_t y = (d[1] == 0 ? -1 : 0); y <= (d[1] == 0 ? 1 : 0); y++)
+         for (cell_idx_t z = (d[2] == 0 ? -1 : 0); z <= (d[2] == 0 ? 1 : 0); z++)
+         {
+            if (x == 0 && y == 0 && z == 0) continue;
+            if (Stencil_T::containsDir(stencil::vectorToDirection(x, y, z))) { func(Vector3(x, y, z)); }
+         }
+}
+
+} // namespace util
+
+/***********************************************************************************************************************
+ *                                               Bit Mask Computation                                                  *
+ **********************************************************************************************************************/
+
+template< typename LatticeStorageSpecification_T >
+void NonuniformCommData< LatticeStorageSpecification_T >::registerFlags()
+{
+#if !defined(USE_CELL_INTERVALS)
+   maskField_.registerFlag(FlagUID(true), INTERIOR_FLAG_BIT);
+   maskField_.registerFlag(FlagUID(true), PASS_THROUGH_FLAG_BIT);
+   maskField_.registerFlag(FlagUID(true), CORNER_SKIPPING_ORIGIN_FLAG_BIT);
+#endif
+
+   for(auto it = Stencil::beginNoCenter(); it != Stencil::end(); ++it){
+      maskField_.registerFlag(FlagUID(true), Stencil::idx[*it]);
+   }
+}
+
+#if defined(USE_CELL_INTERVALS)
+
+template< typename LatticeStorageSpecification_T >
+inline void NonuniformCommData< LatticeStorageSpecification_T >::prepareIntervals()
+{
+   passThroughIntervals_.clear();
+   const Block * b = dynamic_cast< const Block * >(block_);
+
+   for(auto commDir = CommunicationStencil::beginNoCenter(); commDir != CommunicationStencil::end(); ++commDir){
+      uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(*commDir);
+      if(!b->neighborhoodSectionHasEquallySizedBlock(nSecIdx)){
+         CellInterval ci;
+         maskField_.getGhostRegion(*commDir, ci, 2);
+         passThroughIntervals_.push_back(ci);
+      }
+   }
+}
+
+template< typename LatticeStorageSpecification_T >
+inline void NonuniformCommData< LatticeStorageSpecification_T >::setFlagOnInterval(const CellInterval & ci,
+                                                                                   const uint_t fIdx)
+{
+   for(auto c : ci){
+      maskField_.addFlag(c, IDX_FLAG(fIdx));
+   }
+}
+
+#else
+
+/**
+ * Prepares the INTERIOR and PASS_THROUGH flags.
+ * Sets the domain interior to INTERIOR. Sets any ghost layers corresponding to a coarse block
+ * or no block to PASS_THROUGH.
+ */
+template< typename LatticeStorageSpecification_T >
+void NonuniformCommData< LatticeStorageSpecification_T >::prepareFlags()
+{
+   const Block * b = dynamic_cast< const Block * >(block_);
+
+   // Set interior to origin
+   for (auto it = maskField_.beginXYZ(); it != maskField_.end(); ++it)
+   {
+      maskField_.addFlag(it.cell(), INTERIOR_FLAG);
+   }
+
+   // Set GLs to pass-through
+   for(auto commDir = CommunicationStencil::beginNoCenter(); commDir != CommunicationStencil::end(); ++commDir){
+      uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(*commDir);
+      if(!b->neighborhoodSectionHasEquallySizedBlock(nSecIdx)){
+         for(auto it = maskField_.beginGhostLayerOnlyXYZ(2, *commDir); it != maskField_.end(); ++it){
+            maskField_.addFlag(it.cell(), PASS_THROUGH_FLAG);
+         }
+      }
+   }
+}
+
+/**
+ * Resets the origin flag on any ghost layers.
+ */
+template< typename LatticeStorageSpecification_T >
+inline void NonuniformCommData< LatticeStorageSpecification_T >::resetCornerSkippingOriginFlags()
+{
+   const Block * b = dynamic_cast< const Block * >(block_);
+
+   // Remove origin flag from any ghost layers
+   for(auto commDir = CommunicationStencil::beginNoCenter(); commDir != CommunicationStencil::end(); ++commDir){
+      uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(*commDir);
+      if(!b->neighborhoodSectionHasEquallySizedBlock(nSecIdx)){
+         for(auto it = maskField_.beginGhostLayerOnlyXYZ(2, *commDir); it != maskField_.end(); ++it){
+            maskField_.removeFlag(it.cell(), CORNER_SKIPPING_ORIGIN_FLAG);
+         }
+      }
+   }
+}
+
+#endif
+
+
+/**
+ * Determines whether the current block has the smallest BlockID among all fine blocks of a
+ * given intersection volume.
+ * @tparam LatticeStorageSpecification_T
+ * @param cornerDir
+ * @return
+ */
+template< typename LatticeStorageSpecification_T >
+inline bool NonuniformCommData< LatticeStorageSpecification_T >::haveSmallestIdInIntersection(Vector3<cell_idx_t> cornerDir)
+{
+   const IBlockID& myId = block_->getId();
+   const Block* b = dynamic_cast< const Block* >(block_);
+   return forEachSubdirectionCancel(cornerDir, [&](Vector3< cell_idx_t > dirVec) {
+     const uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(dirVec[0], dirVec[1], dirVec[2]);
+     if (b->neighborhoodSectionHasEquallySizedBlock(nSecIdx))
+     {
+        if (b->getNeighbor(nSecIdx, 0).getId() < myId) return false;
+     }
+     return true;
+   });
+}
+
+
+/**
+ * Sets up the feasible space for the given communication direction.
+ * Additionally to the field interior, marks every ghost layer slice corresponding to an adjacent coarse block,
+ * and the corresponding corner as feasible, if that corner also belongs to a coarse block and the current block
+ * has the smallest BlockID participating in the intersection.
+ * @param commDir A communication direction pointing toward an adjacent coarse block
+ */
+template< typename LatticeStorageSpecification_T >
+inline void NonuniformCommData< LatticeStorageSpecification_T >::setupCornerSkippingOrigins(stencil::Direction commDir)
+{
+#if defined(USE_CELL_INTERVALS)
+   cornerSkippingOriginIntervals_.clear();
+#else
+   resetCornerSkippingOriginFlags();
+#endif
+
+   const Block* b = dynamic_cast< const Block* >(block_);
+   Vector3<cell_idx_t> commDirVec(stencil::cx[commDir], stencil::cy[commDir], stencil::cz[commDir]);
+
+   // Iterate all orthogonal comm directions
+   forEachOrthogonalDirection< CommunicationStencil >(commDirVec, [&](Vector3< cell_idx_t > toSourceVec) {
+      const uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(toSourceVec[0], toSourceVec[1], toSourceVec[2]);
+      // Find if there is a coarse block or no block at all in this neighborhood
+      // There are three possibilities: Coarse block, Same-level block or no block
+      // Finer block is not possible because of 2:1 balance
+      if (!b->neighborhoodSectionHasEquallySizedBlock(nSecIdx))
+      {
+         // From this adjacent coarse block (or not-block, for boundary handling), corner skipping must be handled.
+         // Also, if there is no block, boundary handling in that region must be done on only
+         // one of the participating fine blocks.
+         Vector3< cell_idx_t > cornerDirVec = toSourceVec + commDirVec;
+
+         // If the current block has the smallest participating ID...
+         if (haveSmallestIdInIntersection(cornerDirVec))
+         {
+            const stencil::Direction toSourceDir = stencil::vectorToDirection(toSourceVec);
+
+            // ... Mark source GL region as corner skipping origin.
+#if defined(USE_CELL_INTERVALS)
+            CellInterval ci;
+            maskField_.getGhostRegion(toSourceDir, ci, 2);
+            cornerSkippingOriginIntervals_.push_back(ci);
+#else
+            for (auto it = maskField_.beginGhostLayerOnlyXYZ(toSourceDir); it != maskField_.end(); ++it)
+            {
+               maskField_.addFlag(it.cell(), CORNER_SKIPPING_ORIGIN_FLAG);
+            }
+#endif
+         }
+      }
+   });
+}
+
+
+template< typename LatticeStorageSpecification_T >
+inline void NonuniformCommData< LatticeStorageSpecification_T >::setupBitMaskSlice(stencil::Direction commDir, stencil::Direction streamDir)
+{
+   uint_t fIdx = Stencil::idx[streamDir];
+   Cell streamVec(stencil::cx[streamDir], stencil::cy[streamDir], stencil::cz[streamDir]);
+
+#if defined(USE_CELL_INTERVALS)
+   CellInterval commSliceInterval;
+   maskField_.getGhostRegion(commDir, commSliceInterval, 2);
+
+   // Shift back once
+   commSliceInterval.shift(-streamVec);
+
+   // Intersect with interior and set flag on intersection volume
+   CellInterval interiorIntersection(interiorInterval);
+   interiorIntersection.intersect(commSliceInterval);
+   if(!interiorIntersection.empty()){
+      interiorIntersection.shift(streamVec);
+      setFlagOnInterval(interiorIntersection, fIdx);
+   }
+
+   // Intersect with pass-through regions...
+   for(auto passThroughIntersection : std::as_const(passThroughIntervals_)){
+      passThroughIntersection.intersect(commSliceInterval);
+      if(passThroughIntersection.empty()) continue;
+
+      // ... shift back once more ...
+      passThroughIntersection.shift(-streamVec);
+
+      // ... intersect with interior ...
+      interiorIntersection = interiorInterval;
+      interiorIntersection.intersect(passThroughIntersection);
+      if(!interiorIntersection.empty()){
+         interiorIntersection.shift(2*streamVec.x(), 2* streamVec.y(), 2*streamVec.z());
+         setFlagOnInterval(interiorIntersection, fIdx);
+      }
+
+      // ... and with corner-skipping origin regions
+      for(auto originIntersection : std::as_const(cornerSkippingOriginIntervals_)){
+         originIntersection.intersect(passThroughIntersection);
+         if(!originIntersection.empty()){
+            originIntersection.shift(2*streamVec.x(), 2* streamVec.y(), 2*streamVec.z());
+            setFlagOnInterval(originIntersection, fIdx);
+         }
+      }
+   }
+#else
+   for(auto it = maskField_.beginGhostLayerOnlyXYZ(2, commDir); it != maskField_.end(); ++it){
+      Cell currentCell = it.cell();
+
+      // Shift back once
+      Cell shiftedCell = currentCell - streamVec;
+
+      if (maskField_.isFlagSet(shiftedCell, INTERIOR_FLAG)){
+         maskField_.addFlag(currentCell, IDX_FLAG(fIdx));
+      }
+      else if (maskField_.isFlagSet(shiftedCell, PASS_THROUGH_FLAG)){
+         // Shift back twice
+         shiftedCell -= streamVec;
+         if (maskField_.isPartOfMaskSet(shiftedCell, INTERIOR_FLAG | CORNER_SKIPPING_ORIGIN_FLAG)){
+            maskField_.addFlag(currentCell, IDX_FLAG(fIdx));
+         }
+
+      }
+      // else continue;
+   }
+#endif
+}
+
+/**
+ * Computes the partial coalescence bit mask on the mask field.
+ * Assumes that all flags are already registered at the field, and that the field
+ * has been initialized to zero.
+ */
+template< typename LatticeStorageSpecification_T >
+void NonuniformCommData< LatticeStorageSpecification_T >::computeBitMask()
+{
+#if defined(USE_CELL_INTERVALS)
+   prepareIntervals();
+#else
+   prepareFlags();
+#endif
+
+   const Block* b = dynamic_cast< const Block* >(block_);
+   for(auto commIt = CommunicationStencil::beginNoCenter(); commIt != CommunicationStencil::end(); ++commIt){
+      stencil::Direction commDir = *commIt;
+      const uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(commDir);
+      if(b->neighborhoodSectionHasLargerBlock(nSecIdx)){
+         setupCornerSkippingOrigins(commDir);
+
+         for(uint_t streamDirIdx = 0; streamDirIdx < Stencil::d_per_d_length[commDir]; streamDirIdx++){
+            stencil::Direction streamDir = Stencil::d_per_d[commDir][streamDirIdx];
+            setupBitMaskSlice(commDir, streamDir);
+         }
+      }
+   }
+}
+
+} // walberla::lbm_generated
diff --git a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..1b3e43a51dd7e7e8965e2152c58e493f73d8af84
--- /dev/null
+++ b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h
@@ -0,0 +1,317 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NonuniformGeneratedPdfPackInfo.h
+//! \author Frederik Hennig <frederik.hennig@fau.de>
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "blockforest/communication/NonUniformPackInfo.h"
+
+#include "core/DataTypes.h"
+#include "core/mpi/RecvBuffer.h"
+#include "core/mpi/SendBuffer.h"
+
+#include "lbm_generated/communication/NonuniformCommData.h"
+#include "lbm_generated/field/PdfField.h"
+
+namespace walberla::lbm_generated {
+using stencil::Direction;
+
+namespace internal
+{
+/*
+ * Base Template for Packing Kernels Wrapper. This wrapper is required for passing the time step to
+ * kernels generated for in-place streaming patterns. The generated code should not be templated.
+ */
+template< typename PdfField_T, bool inplace >
+class NonuniformPackingKernelsWrapper
+{
+ public:
+
+   void packAll(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer) const  = 0;
+   void unpackAll(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer) const = 0;
+   void localCopyAll(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField,
+                     CellInterval& dstInterval) const                                    = 0;
+
+   void packDirection(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer, Direction dir) const  = 0;
+   void unpackDirection(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, Direction dir) const = 0;
+   void localCopyDirection(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField,
+                           CellInterval& dstInterval, Direction dir) const                                    = 0;
+
+   void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer,
+                           stencil::Direction dir) const = 0;
+
+   void packPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval& ci,
+                               unsigned char* outBuffer, Direction dir) const = 0;
+   void zeroCoalescenceRegion(PdfField_T* dstField, CellInterval& ci, Direction dir) const = 0;
+   void unpackCoalescence(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, Direction dir) const = 0;
+
+   uint_t size(CellInterval& ci, Direction dir) const                   = 0;
+   uint_t size(CellInterval& ci) const                                  = 0;
+   uint_t redistributeSize(CellInterval& ci) const                      = 0;
+   uint_t partialCoalescenceSize(CellInterval& ci, Direction dir) const = 0;
+};
+
+/*
+ * Template Specialization for two-fields patterns, with trivial method wrappers.
+ */
+template< typename PdfField_T >
+class NonuniformPackingKernelsWrapper< PdfField_T, false >
+{
+ public:
+   using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
+   using PackingKernels_T = typename LatticeStorageSpecification_T::PackKernels;
+
+   void packAll(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer) const
+   {
+      kernels_.packAll(srcField, ci, outBuffer);
+   }
+
+   void unpackAll(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer) const
+   {
+      kernels_.unpackAll(dstField, ci, inBuffer);
+   }
+
+   void localCopyAll(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField,
+                     CellInterval& dstInterval) const
+   {
+      kernels_.localCopyAll(srcField, srcInterval, dstField, dstInterval);
+   }
+
+   void packDirection(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer, Direction dir) const
+   {
+      kernels_.packDirection(srcField, ci, outBuffer, dir);
+   }
+
+   void unpackDirection(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, Direction dir) const
+   {
+      kernels_.unpackDirection(dstField, ci, inBuffer, dir);
+   }
+
+   void localCopyDirection(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField,
+                           CellInterval& dstInterval, Direction dir) const
+   {
+      kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir);
+   }
+
+   void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer,
+                           stencil::Direction dir) const
+   {
+      kernels_.unpackRedistribute(dstField, ci, inBuffer, dir);
+   }
+
+   void packPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval& ci,
+                               unsigned char* outBuffer, Direction dir) const
+   {
+      kernels_.packPartialCoalescence(srcField, maskField, ci, outBuffer, dir);
+   }
+
+   void unpackCoalescence(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, Direction dir) const
+   {
+      kernels_.unpackCoalescence(dstField, ci, inBuffer, dir);
+   }
+
+   void zeroCoalescenceRegion(PdfField_T* dstField, CellInterval& ci, Direction dir) const
+   {
+      kernels_.zeroCoalescenceRegion(dstField, ci, dir);
+   }
+
+   uint_t size(CellInterval& ci, Direction dir) const { return kernels_.size(ci, dir); }
+   uint_t size(CellInterval& ci) const { return kernels_.size(ci); }
+   uint_t redistributeSize(CellInterval& ci) const { return kernels_.redistributeSize(ci); }
+   uint_t partialCoalescenceSize(CellInterval& ci, Direction dir) const
+   {
+      return kernels_.partialCoalescenceSize(ci, dir);
+   }
+
+ private:
+   PackingKernels_T kernels_;
+};
+
+/*
+ * Template Specialization for in-place patterns, extracting the timestep from the lattice model.
+ */
+template< typename PdfField_T >
+class NonuniformPackingKernelsWrapper< PdfField_T, true >
+{
+ public:
+   using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
+   using PackingKernels_T = typename LatticeStorageSpecification_T::PackKernels;
+
+   void packAll(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      kernels_.packAll(srcField, ci, outBuffer, timestep);
+   }
+
+   void unpackAll(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer) const
+   {
+      uint8_t timestep = dstField->getTimestep();
+      kernels_.unpackAll(dstField, ci, inBuffer, timestep);
+   }
+
+   void localCopyAll(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField,
+                     CellInterval& dstInterval) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      WALBERLA_ASSERT_EQUAL(timestep, dstField->getTimestep())
+      kernels_.localCopyAll(srcField, srcInterval, dstField, dstInterval, timestep);
+   }
+
+   void packDirection(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer, Direction dir) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      kernels_.packDirection(srcField, ci, outBuffer, dir, timestep);
+   }
+
+   void unpackDirection(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, Direction dir) const
+   {
+      uint8_t timestep = dstField->getTimestep();
+      kernels_.unpackDirection(dstField, ci, inBuffer, dir, timestep);
+   }
+
+   void localCopyDirection(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField,
+                           CellInterval& dstInterval, Direction dir) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      WALBERLA_ASSERT_EQUAL(timestep, dstField->getTimestep())
+      kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir, timestep);
+   }
+
+   void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer,
+                           stencil::Direction dir) const
+   {
+      uint8_t timestep = dstField->getTimestep();
+      kernels_.unpackRedistribute(dstField, ci, inBuffer, dir, timestep);
+   }
+
+   void packPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval& ci,
+                               unsigned char* outBuffer, Direction dir) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      kernels_.packPartialCoalescence(srcField, maskField, ci, outBuffer, dir, timestep);
+   }
+
+   void zeroCoalescenceRegion(PdfField_T* dstField, CellInterval& ci, Direction dir) const
+   {
+      uint8_t timestep = dstField->getTimestep();
+      kernels_.zeroCoalescenceRegion(dstField, ci, dir, timestep);
+   }
+
+   void unpackCoalescence(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, Direction dir) const
+   {
+      uint8_t timestep = dstField->getTimestep();
+      kernels_.unpackCoalescence(dstField, ci, inBuffer, dir, timestep);
+   }
+
+   uint_t size(CellInterval& ci, Direction dir) const { return kernels_.size(ci, dir); }
+   uint_t size(CellInterval& ci) const { return kernels_.size(ci); }
+   uint_t redistributeSize(CellInterval& ci) const { return kernels_.redistributeSize(ci); }
+   uint_t partialCoalescenceSize(CellInterval& ci, Direction dir) const
+   {
+      return kernels_.partialCoalescenceSize(ci, dir);
+   }
+
+ private:
+   PackingKernels_T kernels_;
+};
+} // namespace internal
+
+/***********************************************************************************************************************
+ *                                                  Class Declaration                                                  *
+ **********************************************************************************************************************/
+
+template< typename PdfField_T >
+class NonuniformGeneratedPdfPackInfo : public blockforest::communication::NonUniformPackInfo
+{
+ public:
+   using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
+   using PackingKernels_T = typename LatticeStorageSpecification_T::PackKernels;
+   using Stencil      = typename LatticeStorageSpecification_T::Stencil;
+   using CommunicationStencil = typename LatticeStorageSpecification_T::CommunicationStencil;
+   using CommData_T           = NonuniformCommData< LatticeStorageSpecification_T >;
+
+
+   NonuniformGeneratedPdfPackInfo(const BlockDataID pdfFieldID, const BlockDataID commDataID)
+      : pdfFieldID_(pdfFieldID), commDataID_(commDataID){};
+
+   bool constantDataExchange() const override { return true; };
+   bool threadsafeReceiving() const override { return false; };
+
+   /// Equal Level
+   void unpackDataEqualLevel(Block* receiver, Direction dir, mpi::RecvBuffer& buffer) override;
+   void communicateLocalEqualLevel(const Block* sender, Block* receiver, stencil::Direction dir) override;
+
+   /// Coarse to Fine
+   void unpackDataCoarseToFine(Block* fineReceiver, const BlockID& coarseSender, stencil::Direction dir,
+                               mpi::RecvBuffer& buffer) override;
+   void communicateLocalCoarseToFine(const Block* coarseSender, Block* fineReceiver, stencil::Direction dir) override;
+
+   /// Fine to Coarse
+   void prepareCoalescence(Block* coarseReceiver);
+   void unpackDataFineToCoarse(Block* coarseReceiver, const BlockID& fineSender, stencil::Direction dir,
+                               mpi::RecvBuffer& buffer) override;
+
+   void communicateLocalFineToCoarse(const Block* fineSender, Block* coarseReceiver, stencil::Direction dir) override;
+
+ protected:
+   void packDataEqualLevelImpl(const Block* sender, stencil::Direction dir, mpi::SendBuffer& buffer) const override;
+
+   void packDataCoarseToFineImpl(const Block* coarseSender, const BlockID& fineReceiver, stencil::Direction dir,
+                                 mpi::SendBuffer& buffer) const override;
+   void packDataFineToCoarseImpl(const Block* fineSender, const BlockID& coarseReceiver, stencil::Direction dir,
+                                 mpi::SendBuffer& buffer) const override;
+
+ private:
+   /// Helper Functions
+   /// As in PdfFieldPackInfo.h
+   Vector3< cell_idx_t > getNeighborShift(const BlockID& fineBlock, stencil::Direction dir) const;
+   bool areNeighborsInDirection(const Block * block, const BlockID & neighborID, const Vector3< cell_idx_t> dirVec) const;
+
+   CellInterval intervalHullInDirection(const CellInterval& ci, const Vector3< cell_idx_t > tangentialDir,
+                                        cell_idx_t width) const;
+   bool skipsThroughCoarseBlock(const Block* block, const Direction dir) const;
+
+   void getCoarseBlockCommIntervals(const BlockID& fineBlockID, const Direction dir, const PdfField_T* field,
+                                    std::vector< std::pair< Direction, CellInterval > >& intervals) const;
+   void getFineBlockCommIntervals(const BlockID& fineBlockID, const Direction dir, const PdfField_T* field,
+                                  std::vector< std::pair< Direction, CellInterval > >& intervals) const;
+
+   CellInterval getCoarseBlockCoalescenceInterval(const Block * coarseBlock, const BlockID & fineBlockID,
+                                                  Direction dir, const PdfField_T * field) const;
+
+   const BlockDataID pdfFieldID_;
+   internal::NonuniformPackingKernelsWrapper< PdfField_T, LatticeStorageSpecification_T::inplace > kernels_;
+
+ public:
+   const BlockDataID commDataID_;
+};
+
+/***********************************************************************************************************************
+ *                                                  Factory Functions                                                  *
+ **********************************************************************************************************************/
+
+template< typename PdfField_T>
+std::shared_ptr< NonuniformGeneratedPdfPackInfo< PdfField_T > >
+   setupNonuniformPdfCommunication(const std::weak_ptr< StructuredBlockForest >& blocks, const BlockDataID pdfFieldID,
+                                   const std::string& dataIdentifier = "NonuniformCommData");
+
+} // walberla::lbm_generated
+
+#include "lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h"
diff --git a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..cf36a61f9813989b5e975e6782f5c3ea138a3e96
--- /dev/null
+++ b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h
@@ -0,0 +1,490 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NonuniformGeneratedPdfPackInfo.impl.h
+//! \author Frederik Hennig <frederik.hennig@fau.de>
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "NonuniformGeneratedPdfPackInfo.h"
+
+using namespace walberla::lbm_generated::util;
+
+namespace walberla::lbm_generated {
+
+/***********************************************************************************************************************
+ *                                                  Factory Functions                                                  *
+ **********************************************************************************************************************/
+
+
+/**
+ * Sets up a NonuniformGeneratedPdfPackInfo.
+ *
+ * @tparam LatticeStorageSpecification_T
+ * @tparam PackingKernels_T
+ * @param blocks
+ * @param pdfFieldID
+ * @param dataIdentifier
+ * @return
+ */
+template< typename PdfField_T>
+std::shared_ptr< NonuniformGeneratedPdfPackInfo< PdfField_T > >
+setupNonuniformPdfCommunication( const std::weak_ptr< StructuredBlockForest > & blocks,
+                                 const BlockDataID pdfFieldID,
+                                 const std::string & dataIdentifier)
+{
+   using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
+
+   auto sbf = blocks.lock();
+   WALBERLA_CHECK_NOT_NULLPTR(sbf)
+
+   auto handling = std::make_shared<NonuniformCommDataHandling< LatticeStorageSpecification_T > >(blocks);
+   BlockDataID commDataID = sbf->addBlockData(handling, dataIdentifier);
+
+   return std::make_shared<NonuniformGeneratedPdfPackInfo< PdfField_T > >(pdfFieldID, commDataID);
+}
+
+
+/***********************************************************************************************************************
+ *                                          Equal Level Communication                                                  *
+ **********************************************************************************************************************/
+
+template< typename PdfField_T>
+void NonuniformGeneratedPdfPackInfo< PdfField_T >::unpackDataEqualLevel(Block* receiver,
+                                                                                              Direction dir,
+                                                                                              mpi::RecvBuffer& buffer)
+{
+   auto field = receiver->getData< PdfField_T >(pdfFieldID_);
+   CellInterval ci;
+   cell_idx_t gls = skipsThroughCoarseBlock(receiver, dir) ? 2 : 1;
+   field->getGhostRegion(dir, ci, gls, false);
+   uint_t size              = kernels_.size(ci, dir);
+   unsigned char* bufferPtr = buffer.skip(size);
+   kernels_.unpackDirection(field, ci, bufferPtr, dir);
+}
+
+template< typename PdfField_T>
+void NonuniformGeneratedPdfPackInfo< PdfField_T >::communicateLocalEqualLevel(
+   const Block* sender, Block* receiver, stencil::Direction dir)
+{
+   auto srcField = const_cast< Block* >(sender)->getData< PdfField_T >(pdfFieldID_);
+   auto dstField = receiver->getData< PdfField_T >(pdfFieldID_);
+
+   CellInterval srcRegion;
+   CellInterval dstRegion;
+   cell_idx_t gls = skipsThroughCoarseBlock(sender, dir) ? 2 : 1;
+   srcField->getSliceBeforeGhostLayer(dir, srcRegion, gls, false);
+   dstField->getGhostRegion(stencil::inverseDir[dir], dstRegion, gls, false);
+   kernels_.localCopyDirection(srcField, srcRegion, dstField, dstRegion, dir);
+}
+
+template< typename PdfField_T>
+void NonuniformGeneratedPdfPackInfo< PdfField_T >::packDataEqualLevelImpl(
+   const Block* sender, stencil::Direction dir, mpi::SendBuffer& buffer) const
+{
+   auto field = const_cast< Block* >(sender)->getData< PdfField_T >(pdfFieldID_);
+   CellInterval ci;
+   cell_idx_t gls = skipsThroughCoarseBlock(sender, dir) ? 2 : 1;
+   field->getSliceBeforeGhostLayer(dir, ci, gls, false);
+   unsigned char* bufferPtr = buffer.forward(kernels_.size(ci, dir));
+   kernels_.packDirection(field, ci, bufferPtr, dir);
+}
+
+/***********************************************************************************************************************
+ *                                          Coarse to Fine Communication                                               *
+ **********************************************************************************************************************/
+
+template< typename PdfField_T>
+void NonuniformGeneratedPdfPackInfo< PdfField_T >::packDataCoarseToFineImpl(
+   const Block* coarseSender, const BlockID& fineReceiver, stencil::Direction dir, mpi::SendBuffer& buffer) const
+{
+   auto field = const_cast< Block* >(coarseSender)->getData< PdfField_T >(pdfFieldID_);
+
+   std::vector< std::pair< Direction, CellInterval > > intervals;
+   getCoarseBlockCommIntervals(fineReceiver, dir, field, intervals);
+
+   for (auto t : intervals)
+   {
+      CellInterval ci          = t.second;
+      unsigned char* bufferPtr = buffer.forward(kernels_.size(ci));
+      kernels_.packAll(field, ci, bufferPtr);
+   }
+}
+
+template< typename PdfField_T>
+void NonuniformGeneratedPdfPackInfo< PdfField_T >::unpackDataCoarseToFine(
+   Block* fineReceiver, const BlockID& /*coarseSender*/, stencil::Direction dir, mpi::RecvBuffer& buffer)
+{
+   auto field = fineReceiver->getData< PdfField_T >(pdfFieldID_);
+
+   std::vector< std::pair< Direction, CellInterval > > intervals;
+   getFineBlockCommIntervals(fineReceiver->getId(), dir, field, intervals);
+
+   for (auto t : intervals)
+   {
+      Direction d              = t.first;
+      CellInterval ci          = t.second;
+      uint_t size              = kernels_.redistributeSize(ci);
+      unsigned char* bufferPtr = buffer.skip(size);
+      kernels_.unpackRedistribute(field, ci, bufferPtr, d);
+   }
+}
+
+template< typename PdfField_T>
+void NonuniformGeneratedPdfPackInfo< PdfField_T >::communicateLocalCoarseToFine(
+   const Block* coarseSender, Block* fineReceiver, stencil::Direction dir)
+{
+   auto srcField = const_cast< Block* >(coarseSender)->getData< PdfField_T >(pdfFieldID_);
+   auto dstField = fineReceiver->getData< PdfField_T >(pdfFieldID_);
+
+   std::vector< std::pair< Direction, CellInterval > > srcIntervals;
+   getCoarseBlockCommIntervals(fineReceiver->getId(), dir, srcField, srcIntervals);
+
+   std::vector< std::pair< Direction, CellInterval > > dstIntervals;
+   getFineBlockCommIntervals(fineReceiver->getId(), stencil::inverseDir[dir], dstField, dstIntervals);
+
+   WALBERLA_ASSERT_EQUAL(srcIntervals.size(), dstIntervals.size())
+
+   for(size_t index = 0; index < srcIntervals.size(); index++)
+   {
+      CellInterval srcInterval = srcIntervals[index].second;
+
+      Direction unpackDir      = dstIntervals[index].first;
+      CellInterval dstInterval = dstIntervals[index].second;
+
+      uint_t packSize      = kernels_.size(srcInterval);
+
+#ifndef NDEBUG
+      Direction const packDir        = srcIntervals[index].first;
+      WALBERLA_ASSERT_EQUAL(packDir, stencil::inverseDir[unpackDir])
+      uint_t unpackSize = kernels_.redistributeSize(dstInterval);
+      WALBERLA_ASSERT_EQUAL(packSize, unpackSize)
+#endif
+
+      // TODO: This is a dirty workaround. Code-generate direct redistribution!
+      std::vector< unsigned char > buffer(packSize);
+      kernels_.packAll(srcField, srcInterval, &buffer[0]);
+      kernels_.unpackRedistribute(dstField, dstInterval, &buffer[0], unpackDir);
+   }
+}
+
+/***********************************************************************************************************************
+ *                                          Fine to Coarse Communication                                               *
+ **********************************************************************************************************************/
+
+template< typename PdfField_T>
+void NonuniformGeneratedPdfPackInfo< PdfField_T >::prepareCoalescence(Block* coarseReceiver)
+{
+   auto dstField = coarseReceiver->getData<PdfField_T>(pdfFieldID_);
+
+   for(auto it = CommunicationStencil::beginNoCenter(); it != CommunicationStencil::end(); ++it){
+      uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(*it);
+      if(coarseReceiver->neighborhoodSectionHasSmallerBlocks(nSecIdx)){
+         CellInterval ci;
+         dstField->getSliceBeforeGhostLayer(*it, ci, 1);
+         kernels_.zeroCoalescenceRegion(dstField, ci, *it);
+      }
+   }
+}
+
+template< typename PdfField_T>
+void walberla::lbm_generated::NonuniformGeneratedPdfPackInfo< PdfField_T >::unpackDataFineToCoarse(
+   walberla::Block* coarseReceiver, const walberla::BlockID& fineSender, walberla::stencil::Direction dir,
+   walberla::mpi::RecvBuffer& buffer)
+{
+   auto dstField = coarseReceiver->getData<PdfField_T>(pdfFieldID_);
+
+   CellInterval ci = getCoarseBlockCoalescenceInterval(coarseReceiver, fineSender, dir, dstField);
+   uint_t size = kernels_.size(ci, dir);
+   unsigned char* bufferPtr = buffer.skip(size);
+   kernels_.unpackCoalescence(dstField, ci, bufferPtr, dir);
+}
+
+template< typename PdfField_T>
+void walberla::lbm_generated::NonuniformGeneratedPdfPackInfo< PdfField_T >::communicateLocalFineToCoarse(
+   const walberla::Block* fineSender, walberla::Block* coarseReceiver, walberla::stencil::Direction dir)
+{
+   Block * varFineSender = const_cast< Block * >(fineSender);
+   auto srcField   = varFineSender->getData< PdfField_T >(pdfFieldID_);
+   auto srcCommData   = varFineSender->getData< CommData_T >(commDataID_);
+   PartialCoalescenceMaskField * maskField = &(srcCommData->getMaskField());
+   auto dstField = coarseReceiver->getData<PdfField_T>(pdfFieldID_);
+   Direction invDir = stencil::inverseDir[dir];
+
+   CellInterval srcInterval;
+   srcField->getGhostRegion(dir, srcInterval, 2);
+   uint_t packSize = kernels_.partialCoalescenceSize(srcInterval, dir);
+
+   CellInterval dstInterval = getCoarseBlockCoalescenceInterval(coarseReceiver, fineSender->getId(),
+                                                                invDir, dstField);
+
+#ifndef NDEBUG
+   uint_t unpackSize = kernels_.size(dstInterval, invDir);
+   WALBERLA_ASSERT_EQUAL(packSize, unpackSize)
+#endif
+
+   // TODO: This is a dirty workaround. Code-generate direct redistribution!
+   std::vector< unsigned char > buffer(packSize);
+   kernels_.packPartialCoalescence(srcField, maskField, srcInterval, &buffer[0], dir);
+   kernels_.unpackCoalescence(dstField, dstInterval, &buffer[0], invDir);
+}
+
+template< typename PdfField_T>
+void walberla::lbm_generated::NonuniformGeneratedPdfPackInfo< PdfField_T >::packDataFineToCoarseImpl(
+   const walberla::Block* fineSender, const walberla::BlockID& /*coarseReceiver*/, walberla::stencil::Direction dir,
+   walberla::mpi::SendBuffer& buffer) const
+{
+   Block* varBlock = const_cast< Block* >(fineSender);
+   auto srcField   = varBlock->getData< PdfField_T >(pdfFieldID_);
+   auto commData  = varBlock->getData< CommData_T >(commDataID_);
+   PartialCoalescenceMaskField * maskField = &(commData->getMaskField());
+
+   CellInterval ci;
+   srcField->getGhostRegion(dir, ci, 2);
+   uint_t size = kernels_.partialCoalescenceSize(ci, dir);
+   unsigned char* bufferPtr = buffer.forward(size);
+   kernels_.packPartialCoalescence(srcField, maskField, ci, bufferPtr, dir);
+}
+
+/***********************************************************************************************************************
+ *                                                  Helper Functions                                                   *
+ **********************************************************************************************************************/
+
+template< typename PdfField_T>
+inline Vector3< cell_idx_t >
+NonuniformGeneratedPdfPackInfo< PdfField_T >::getNeighborShift(const BlockID& fineBlock,
+                                                                                     stencil::Direction dir) const
+{
+   // dir: direction from coarse to fine block, or vice versa
+   Vector3< cell_idx_t > shift;
+
+   uint_t const branchId = fineBlock.getBranchId();
+
+   shift[0] = (stencil::cx[dir] == 0) ? (((branchId & uint_t(1)) == uint_t(0)) ? cell_idx_t(-1) : cell_idx_t(1)) :
+              cell_idx_t(0);
+   shift[1] = (stencil::cy[dir] == 0) ? (((branchId & uint_t(2)) == uint_t(0)) ? cell_idx_t(-1) : cell_idx_t(1)) :
+              cell_idx_t(0);
+   shift[2] = (Stencil::D == uint_t(3)) ?
+              ((stencil::cz[dir] == 0) ? (((branchId & uint_t(4)) == uint_t(0)) ? cell_idx_t(-1) : cell_idx_t(1)) :
+               cell_idx_t(0)) :
+              cell_idx_t(0);
+
+   return shift;
+}
+
+/**
+ * Returns the part of a cell interval's hull of given width in direction dirVec.
+ * @param ci        The original cell interval
+ * @param dirVec    Direction Vector
+ * @param width     Width of the hull
+ * @return          Interval forming the part of the hull
+ */
+template< typename PdfField_T>
+inline CellInterval NonuniformGeneratedPdfPackInfo< PdfField_T >::intervalHullInDirection(
+   const CellInterval& ci, const Vector3< cell_idx_t > dirVec, cell_idx_t width) const
+{
+   CellInterval result(ci);
+   for (uint_t i = 0; i < Stencil::D; i++)
+   {
+      if (dirVec[i] == 1)
+      {
+         result.min()[i] = result.max()[i] + cell_idx_t(1);
+         result.max()[i] += width;
+      }
+      if (dirVec[i] == -1)
+      {
+         result.max()[i] = result.min()[i] - cell_idx_t(1);
+         result.min()[i] -= width;
+      }
+   }
+
+   return result;
+}
+
+/**
+ * For edge or corner directions, checks if a coarser block is part of the respective edge or corner intersection.
+ * @param block The local block
+ * @param dir   The direction to check
+ * @return      `true`  if dir is an edge or corner direction skipping through a coarser block.
+ */
+template< typename PdfField_T>
+inline bool NonuniformGeneratedPdfPackInfo< PdfField_T >::skipsThroughCoarseBlock(
+   const Block* block, const Direction dir) const
+{
+   Vector3< cell_idx_t > dirVec(stencil::cx[dir], stencil::cy[dir], stencil::cz[dir]);
+   bool coarseBlockFound = false;
+   forEachSubdirectionCancel(dirVec, [&](Vector3< cell_idx_t > subdir) {
+     coarseBlockFound =
+        coarseBlockFound || block->neighborhoodSectionHasLargerBlock(
+           blockforest::getBlockNeighborhoodSectionIndex(subdir[0], subdir[1], subdir[2]));
+     return !coarseBlockFound;
+   });
+
+   return coarseBlockFound;
+}
+
+/**
+ * For coarse-to-fine and fine-to-coarse communication, returns a list of pairs (Direction, CellInterval)
+ * mapping sub-directions of the communication direction to cell intervals on the coarse block interior
+ * whose data must be communicated <i>as if</i> communicating in those sub-directions.
+ * @param fineBlockID   ID of the fine block
+ * @param dir           Direction from the coarse to the fine block
+ * @param field         Pointer to the PDF field on the coarse block
+ * @param intervals     Vector that will be filled with the computed intervals
+ */
+template< typename PdfField_T>
+inline void NonuniformGeneratedPdfPackInfo< PdfField_T >::getCoarseBlockCommIntervals(
+   const BlockID& fineBlockID, const Direction dir, const PdfField_T* field,
+   std::vector< std::pair< Direction, CellInterval > >& intervals) const
+{
+   Vector3< cell_idx_t > shift = getNeighborShift(fineBlockID, dir);
+
+   CellInterval mainSlice;
+   field->getSliceBeforeGhostLayer(dir, mainSlice, 1, false);
+
+   // In all directions, restrict the slice to the lower or upper half, depending on neighbor shift
+   for (uint_t i = 0; i != Stencil::D; ++i)
+   {
+      if (shift[i] == cell_idx_t(-1))
+      {
+         WALBERLA_ASSERT_EQUAL(mainSlice.size(i) & 1, 0)
+         mainSlice.max()[i] = mainSlice.min()[i] + cell_idx_c(mainSlice.size(i) / uint_t(2)) - cell_idx_t(1);
+      }
+      if (shift[i] == cell_idx_t(1))
+      {
+         WALBERLA_ASSERT_EQUAL(mainSlice.size(i) & 1, 0)
+         mainSlice.min()[i] = mainSlice.min()[i] + cell_idx_c(mainSlice.size(i) / uint_t(2));
+      }
+   }
+
+   intervals.emplace_back(dir, mainSlice);
+
+   Vector3< cell_idx_t > const commDirVec{ stencil::cx[dir], stencil::cy[dir], stencil::cz[dir] };
+
+   // Get extended slices in all tangential directions for the diagonal part of communication
+   forEachSubdirection(-shift, [&](Vector3< cell_idx_t > t) {
+     CellInterval hullInterval = intervalHullInDirection(mainSlice, t, cell_idx_t(1));
+     Direction subCommDir      = stencil::vectorToDirection(commDirVec - t);
+     if(CommunicationStencil::containsDir(subCommDir)){
+        intervals.emplace_back(subCommDir, hullInterval);
+     }
+   });
+}
+
+/**
+ * For coarse-to-fine and fine-to-coarse communication, returns a list of pairs (Direction, CellInterval)
+ * mapping sub-directions of the communication direction to cell intervals on the fine block whose data must
+ * be communicated <i>as if</i> communicating in those sub-directions.
+ * @param fineBlockID   ID of the fine block
+ * @param dir           Direction from the fine to the coarse block
+ * @param field         Pointer to the PDF Field on the fine block
+ * @param intervals     Vector that will be filled with the computed intervals
+ */
+template< typename PdfField_T>
+inline void NonuniformGeneratedPdfPackInfo< PdfField_T >::getFineBlockCommIntervals(
+   const BlockID& fineBlockID, const Direction dir, const PdfField_T* field,
+   std::vector< std::pair< Direction, CellInterval > >& intervals) const
+{
+   Vector3< cell_idx_t > shift = getNeighborShift(fineBlockID, dir);
+
+   CellInterval mainSlice;
+   field->getGhostRegion(dir, mainSlice, 2, false);
+   intervals.emplace_back(dir, mainSlice);
+
+   Vector3< cell_idx_t > const commDirVec{ stencil::cx[dir], stencil::cy[dir], stencil::cz[dir] };
+
+   forEachSubdirection(-shift, [&](Vector3< cell_idx_t > t) {
+     CellInterval hullInterval = intervalHullInDirection(mainSlice, t, cell_idx_t(2));
+     Direction subCommDir      = stencil::vectorToDirection(commDirVec + t);
+     if(CommunicationStencil::containsDir(subCommDir)){
+        intervals.emplace_back(subCommDir, hullInterval);
+     }
+   });
+}
+/**
+ * Checks whether or not the block with ID `neighborID` is a neighbor of `block` in direction `dir`.
+ */
+template< typename PdfField_T>
+bool NonuniformGeneratedPdfPackInfo< PdfField_T >::areNeighborsInDirection(
+   const Block* block, const BlockID& neighborID, const Vector3< cell_idx_t> dirVec) const
+{
+   uint_t const nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(dirVec[0], dirVec[1], dirVec[2]);
+   uint_t const nSecSize = block->getNeighborhoodSectionSize(nSecIdx);
+
+   for(uint_t i = 0; i < nSecSize; i++){
+      if(block->getNeighborId(nSecIdx, i) == neighborID){
+         return true;
+      }
+   }
+   return false;
+}
+
+template< typename PdfField_T>
+CellInterval NonuniformGeneratedPdfPackInfo< PdfField_T >::getCoarseBlockCoalescenceInterval(
+   const Block* coarseBlock, const BlockID& fineBlockID, Direction dir, const PdfField_T* field) const
+{
+   Direction mainDir(dir);
+   Vector3< cell_idx_t > commDirVec(stencil::cx[dir], stencil::cy[dir], stencil::cz[dir]);
+   Vector3< cell_idx_t > mainDirVec(commDirVec);
+   bool isAsymmetric = !areNeighborsInDirection(coarseBlock, fineBlockID, commDirVec);
+
+   // If asymmetric, find the main subdirection
+   if(isAsymmetric){
+      mainDirVec = Vector3< cell_idx_t >(0);
+      forEachSubdirection(commDirVec, [&](Vector3< cell_idx_t > subdirVec){
+         if(areNeighborsInDirection(coarseBlock, fineBlockID, subdirVec)){
+            // -dir is one main communication direction from F to C, but, due to periodicity,
+            // it might not be the only one. Find the main comm direction from the subdirections
+            // that is largest in the 1-norm.
+            if(subdirVec.sqrLength() > mainDirVec.sqrLength()) mainDirVec = subdirVec;
+         }
+      });
+      mainDir = stencil::vectorToDirection(mainDirVec);
+   }
+
+   Vector3< cell_idx_t > shift = getNeighborShift(fineBlockID, mainDir);
+
+   CellInterval mainSlice;
+   field->getSliceBeforeGhostLayer(mainDir, mainSlice, 1, false);
+
+   // In all directions, restrict the slice to the lower or upper half, depending on neighbor shift
+   for (uint_t i = 0; i != Stencil::D; ++i)
+   {
+      if (shift[i] == cell_idx_t(-1))
+      {
+         WALBERLA_ASSERT_EQUAL(mainSlice.size(i) & 1, 0)
+         mainSlice.max()[i] = mainSlice.min()[i] + cell_idx_c(mainSlice.size(i) / uint_t(2)) - cell_idx_t(1);
+      }
+      if (shift[i] == cell_idx_t(1))
+      {
+         WALBERLA_ASSERT_EQUAL(mainSlice.size(i) & 1, 0)
+         mainSlice.min()[i] = mainSlice.min()[i] + cell_idx_c(mainSlice.size(i) / uint_t(2));
+      }
+   }
+
+   CellInterval commSlice(mainSlice);
+
+   // If asymmetric, find coalescence slice as hull of main slice
+   if(isAsymmetric){
+      commSlice = intervalHullInDirection(mainSlice, mainDirVec - commDirVec, 1);
+   }
+
+   return commSlice;
+}
+
+} // walberla::lbm_generated
diff --git a/src/lbm_generated/communication/UniformGeneratedPdfPackInfo.h b/src/lbm_generated/communication/UniformGeneratedPdfPackInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..76d28617a2b7f7be888eb1ed84ecb945a23bc229
--- /dev/null
+++ b/src/lbm_generated/communication/UniformGeneratedPdfPackInfo.h
@@ -0,0 +1,291 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file UniformGeneratedPdfPackInfo.h
+//! \ingroup lbm
+//! \author Frederik Hennig <frederik.hennig@fau.de>
+//! \brief Class Template for Lattice Boltzmann PDF Pack Infos using code-generated kernels
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "communication/UniformPackInfo.h"
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+
+#include "lbm/field/PdfField.h"
+
+#include "stencil/Directions.h"
+
+namespace walberla
+{
+using communication::UniformPackInfo;
+
+namespace lbm_generated
+{
+using stencil::Direction;
+
+namespace internal
+{
+/*
+ * Base Template for Packing Kernels Wrapper. This wrapper is required for passing the time step to
+ * kernels generated for in-place streaming patterns. The generated code should not be templated.
+ */
+template< typename PdfField_T, bool inplace >
+class UniformPackingKernelsWrapper
+{
+ public:
+
+   void packAll(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer) const  = 0;
+   void unpackAll(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer) const = 0;
+   void localCopyAll(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField,
+                     CellInterval& dstInterval) const                                    = 0;
+
+   void packDirection(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer, const Direction dir) const  = 0;
+   void unpackDirection(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, const Direction dir) const = 0;
+   void localCopyDirection(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField,
+                           CellInterval& dstInterval, const Direction dir) const                                    = 0;
+
+   uint_t size(CellInterval& ci, const Direction dir) const = 0;
+   uint_t size(CellInterval& ci) const                = 0;
+};
+
+/*
+ * Template Specialization for two-fields patterns, with trivial method wrappers.
+ */
+template< typename PdfField_T >
+class UniformPackingKernelsWrapper< PdfField_T, false >
+{
+ public:
+   using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
+   using PackingKernels_T = typename LatticeStorageSpecification_T::PackKernels;
+
+   void packAll(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer) const
+   {
+      kernels_.packAll(srcField, ci, outBuffer);
+   }
+
+   void unpackAll(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer) const
+   {
+      kernels_.unpackAll(dstField, ci, inBuffer);
+   }
+
+   void localCopyAll(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField,
+                     CellInterval& dstInterval) const
+   {
+      kernels_.localCopyAll(srcField, srcInterval, dstField, dstInterval);
+   }
+
+   void packDirection(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer, const Direction dir) const
+   {
+      kernels_.packDirection(srcField, ci, outBuffer, dir);
+   }
+
+   void unpackDirection(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, const Direction dir) const
+   {
+      kernels_.unpackDirection(dstField, ci, inBuffer, dir);
+   }
+
+   void localCopyDirection(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField,
+                           CellInterval& dstInterval, const Direction dir) const
+   {
+      kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir);
+   }
+
+   uint_t size(CellInterval& ci, const Direction dir) const { return kernels_.size(ci, dir); }
+   uint_t size(CellInterval& ci) const { return kernels_.size(ci); }
+
+ private:
+   PackingKernels_T kernels_;
+};
+
+/*
+ * Template Specialization for in-place patterns, extracting the timestep from the lattice model.
+ */
+template< typename PdfField_T >
+class UniformPackingKernelsWrapper< PdfField_T, true >
+{
+ public:
+   using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
+   using PackingKernels_T = typename LatticeStorageSpecification_T::PackKernels;
+
+   void packAll(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      kernels_.packAll(srcField, ci, outBuffer, timestep);
+   }
+
+   void unpackAll(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer) const
+   {
+      uint8_t timestep = dstField->getTimestep();
+      kernels_.unpackAll(dstField, ci, inBuffer, timestep);
+   }
+
+   void localCopyAll(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField,
+                     CellInterval& dstInterval) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      WALBERLA_ASSERT_EQUAL(timestep, dstField->getTimestep())
+      kernels_.localCopyAll(srcField, srcInterval, dstField, dstInterval, timestep);
+   }
+
+   void packDirection(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer, const Direction dir) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      kernels_.packDirection(srcField, ci, outBuffer, dir, timestep);
+   }
+
+   void unpackDirection(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, const Direction dir) const
+   {
+      uint8_t timestep = dstField->getTimestep();
+      kernels_.unpackDirection(dstField, ci, inBuffer, dir, timestep);
+   }
+
+   void localCopyDirection(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField,
+                           CellInterval& dstInterval, const Direction dir) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      WALBERLA_ASSERT_EQUAL(timestep, dstField->getTimestep())
+      kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir, timestep);
+   }
+
+   uint_t size(CellInterval& ci, const Direction dir) const { return kernels_.size(ci, dir); }
+   uint_t size(CellInterval& ci) const { return kernels_.size(ci); }
+
+ private:
+   PackingKernels_T kernels_;
+};
+} // namespace internal
+
+/**
+ * Pack Info class template for lattice Boltzmann PDF fields. Relies on a code-generated
+ * class providing kernel implementations for packing, unpacking and local copying of data.
+ *
+ * This template relies on a PackingKernels implementation generated by lbmpy_walberla.packing_kernels.
+ * The code generated part provides the kernels for transferring data between communication buffers
+ * and fields. The iteration slices are constructed by this class.
+ *
+ * The code-generated substructure enables the usage of arbitrary, in particular in-place streaming
+ * patterns.
+ *
+ * @tparam  PackingKernels_T Type of a PackingKernels implementation generated using
+ *          `lbmpy_walberla.generate_packing_kernels`.
+ *
+ * \ingroup lbm
+ */
+template< typename PdfField_T >
+class UniformGeneratedPdfPackInfo : public UniformPackInfo
+{
+ public:
+   using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
+   using PackingKernels_T = typename LatticeStorageSpecification_T::PackKernels;
+   using Stencil      = typename LatticeStorageSpecification_T::Stencil;
+
+   /**
+    * Constructor.
+    *
+    * @param pdfFieldID ID of the associated walberla::lbm::PdfField
+    * @param cellLayersToSend The amount of cell layers that should be communicated
+    * @param sendAll If true, instead of only those populations streaming in subdirections of the communication
+    *                direction, all populations will always be communicated.
+    *                \warning Be careful when using this option with any streaming pattern other than
+    *                the pull pattern. Other patterns store at least some of their post-collision
+    *                populations in neighbouring cells. This might lead to out-of-bounds errors when
+    *                copying to the outermost ghost layer! Solve this by adding an additional ghost layer
+    *                as a safety margin.
+    */
+   UniformGeneratedPdfPackInfo(const BlockDataID pdfFieldID, cell_idx_t cellLayersToSend = 1, bool sendAll = false)
+      : pdfFieldID_(pdfFieldID), ghostLayersToSend_(cellLayersToSend), sendAll_(sendAll)
+   {}
+
+   bool constantDataExchange() const override { return true; }
+   bool threadsafeReceiving() const override { return true; }
+
+   void unpackData(IBlock * receiver, Direction dir, mpi::RecvBuffer & buffer) override;
+   void communicateLocal(const IBlock * sender, IBlock * receiver, Direction dir) override;
+
+ protected:
+   void packDataImpl(const IBlock * sender, Direction dir, mpi::SendBuffer & buffer) const override;
+
+ private:
+   const BlockDataID pdfFieldID_;
+   internal::UniformPackingKernelsWrapper< PdfField_T, LatticeStorageSpecification_T::inplace > kernels_;
+   cell_idx_t ghostLayersToSend_;
+   bool sendAll_;
+};
+
+template< typename PdfField_T >
+void UniformGeneratedPdfPackInfo< PdfField_T >::unpackData( IBlock * receiver, Direction dir, mpi::RecvBuffer& buffer)
+{
+   auto field = receiver->getData< PdfField_T >(pdfFieldID_);
+   CellInterval ci;
+   field->getGhostRegion(dir, ci, ghostLayersToSend_, false);
+
+   if (sendAll_)
+   {
+      unsigned char* bufferPtr = buffer.skip(kernels_.size(ci));
+      kernels_.unpackAll(field, ci, bufferPtr);
+   }
+   else
+   {
+      uint_t size              = kernels_.size(ci, dir);
+      unsigned char* bufferPtr = buffer.skip(size);
+      kernels_.unpackDirection(field, ci, bufferPtr, dir);
+   }
+}
+
+template< typename PdfField_T >
+void UniformGeneratedPdfPackInfo< PdfField_T >::communicateLocal(const IBlock* sender, IBlock* receiver, Direction dir)
+{
+   auto srcField = const_cast< IBlock* >(sender)->getData< PdfField_T >(pdfFieldID_);
+   auto dstField = receiver->getData< PdfField_T >(pdfFieldID_);
+
+   CellInterval srcRegion;
+   CellInterval dstRegion;
+   srcField->getSliceBeforeGhostLayer(dir, srcRegion, ghostLayersToSend_, false);
+   dstField->getGhostRegion(stencil::inverseDir[dir], dstRegion, ghostLayersToSend_, false);
+
+   if (sendAll_) {
+      kernels_.localCopyAll(srcField, srcRegion, dstField, dstRegion);
+   }
+   else
+   {
+      kernels_.localCopyDirection(srcField, srcRegion, dstField, dstRegion, dir);
+   }
+}
+
+template< typename PdfField_T>
+void UniformGeneratedPdfPackInfo< PdfField_T >:: packDataImpl(const IBlock* sender, Direction dir, mpi::SendBuffer& buffer) const
+{
+   auto field = const_cast< IBlock* >(sender)->getData< PdfField_T >(pdfFieldID_);
+   CellInterval ci;
+   field->getSliceBeforeGhostLayer(dir, ci, ghostLayersToSend_, false);
+
+   if (sendAll_)
+   {
+      unsigned char* bufferPtr = buffer.forward(kernels_.size(ci));
+      kernels_.packAll(field, ci, bufferPtr);
+   }
+   else
+   {
+      unsigned char* bufferPtr = buffer.forward(kernels_.size(ci, dir));
+      kernels_.packDirection(field, ci, bufferPtr, dir);
+   }
+}
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/src/lbm_generated/evaluation/CMakeLists.txt b/src/lbm_generated/evaluation/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..922cf93c3cb989af797b913baa227a2cf1735b23
--- /dev/null
+++ b/src/lbm_generated/evaluation/CMakeLists.txt
@@ -0,0 +1,4 @@
+target_sources( lbm_generated
+    PRIVATE
+    PerformanceEvaluation.h
+    )
diff --git a/src/lbm_generated/evaluation/PerformanceEvaluation.h b/src/lbm_generated/evaluation/PerformanceEvaluation.h
new file mode 100644
index 0000000000000000000000000000000000000000..9fb7e934a2506ca360af12882a0775bcf8281eb6
--- /dev/null
+++ b/src/lbm_generated/evaluation/PerformanceEvaluation.h
@@ -0,0 +1,415 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PerformanceEvaluation.h
+//! \ingroup lbm_generated
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/Hostname.h"
+#include "core/Set.h"
+#include "core/waLBerlaBuildInfo.h"
+#include "core/debug/CheckFunctions.h"
+#include "core/logging/Logging.h"
+#include "core/mpi/MPIManager.h"
+#include "core/uid/SUID.h"
+
+#include "domain_decomposition/StructuredBlockStorage.h"
+
+#include "field/CellCounter.h"
+#include "field/FlagUID.h"
+
+#include <cstdlib>
+#include <map>
+#include <string>
+#include <sstream>
+
+
+namespace walberla::lbm_generated {
+
+
+//**********************************************************************************************************************
+/*!
+*   \brief Class for evaluating the performance of LBM simulations
+*/
+//**********************************************************************************************************************
+template< typename CellCounter_T, typename FluidCellCounter_T >
+class PerformanceEvaluationBase
+{
+public:
+
+   PerformanceEvaluationBase( const weak_ptr< StructuredBlockStorage > & blocks,
+                              const CellCounter_T & cellCounter, const FluidCellCounter_T & fluidCellCounter,
+                              const Set<SUID> & requiredSelectors     = Set<SUID>::emptySet(),
+                              const Set<SUID> & incompatibleSelectors = Set<SUID>::emptySet() );
+   
+   void refresh();
+
+   void logResultOnRoot( const uint_t timeSteps, const double time ) const
+   {
+      WALBERLA_LOG_RESULT_ON_ROOT( "Simulation performance:\n" << loggingString( timeSteps, time ) )
+   }
+
+   void logInfoOnRoot( const uint_t timeSteps, const double time ) const
+   {
+      WALBERLA_LOG_INFO_ON_ROOT( "Simulation performance:\n" << loggingString( timeSteps, time ) )
+   }
+
+   std::string loggingString( const uint_t timeSteps, const double time ) const;
+   
+   void getResultsForSQLOnRoot( std::map< std::string, int > &         integerProperties,
+                                std::map< std::string, double > &      realProperties,
+                                std::map< std::string, std::string > & stringProperties,
+                                const uint_t timeSteps, const double time );
+   
+   static int processes() { return mpi::MPIManager::instance()->numProcesses(); }
+
+   int threads() const { return processes() * threadsPerProcess_; }
+   int cores()   const { return ( threadsPerCore_ == 0 ) ? 0 : ( threads() / threadsPerCore_ ); }
+
+   uint64_t allFineCells() const
+   {
+      uint64_t c( uint64_t(0) );
+      for( uint_t i = uint_t(0); i < levels_; ++i )
+         c += cells_.numberOfCells(i) * uint64_c( math::uintPow8( levels_ - uint_t(1) - i ) );
+      return c;
+   }
+
+   double mlups( const uint_t timeSteps, const double time ) const
+   {
+      double m( 0.0 );
+      for( uint_t i = uint_t(0); i < levels_; ++i )
+         m += double_c( timeSteps * math::uintPow2(i) ) * double_c( cells_.numberOfCells(i) );
+      return m / ( time * 1000000.0 );
+   }
+
+   double mlupsPerProcess( const uint_t timeSteps, const double time ) const
+   {
+      return mlups( timeSteps, time ) / processes();
+   }
+
+   double mlupsPerCore( const uint_t timeSteps, const double time ) const
+   {
+      return ( cores() == 0 ) ? 0.0 : ( mlups( timeSteps, time ) / cores() );
+   }
+
+   double vMlups( const uint_t timeSteps, const double time ) const
+   {
+      double m( 0.0 );
+      for( uint_t i = uint_t(0); i < levels_; ++i )
+         m += double_c( timeSteps * math::uintPow2( levels_ - uint_t(1) ) ) *
+              double_c( uint64_c( math::uintPow8( levels_ - uint_t(1) - i ) ) * cells_.numberOfCells(i) );
+      return m / ( time * 1000000.0 );
+   }
+
+   double vMlupsPerProcess( const uint_t timeSteps, const double time ) const
+   {
+      return vMlups( timeSteps, time ) / processes();
+   }
+
+   double vMlupsPerCore( const uint_t timeSteps, const double time ) const
+   {
+      return ( cores() == 0 ) ? 0.0 : ( vMlups( timeSteps, time ) / cores() );
+   }
+
+   double mflups( const uint_t timeSteps, const double time ) const
+   {
+      double m( 0.0 );
+      for( uint_t i = uint_t(0); i < levels_; ++i )
+         m += double_c( timeSteps * math::uintPow2(i) ) * double_c( fluidCells_.numberOfCells(i) );
+      return m / ( time * 1000000.0 );
+   }
+
+   double mflupsPerProcess( const uint_t timeSteps, const double time ) const
+   {
+      return mflups( timeSteps, time ) / processes();
+   }
+
+   double mflupsPerCore( const uint_t timeSteps, const double time ) const
+   {
+      return ( cores() == 0 ) ? 0.0 : ( mflups( timeSteps, time ) / cores() );
+   }
+
+   double vMflups( const uint_t timeSteps, const double time ) const
+   {
+      double m( 0.0 );
+      for( uint_t i = uint_t(0); i < levels_; ++i )
+         m += double_c( timeSteps * math::uintPow2( levels_ - uint_t(1) ) ) *
+              double_c( uint64_c( math::uintPow8( levels_ - uint_t(1) - i ) ) * fluidCells_.numberOfCells(i) );
+      return m / ( time * 1000000.0 );
+   }
+
+   double vMflupsPerProcess( const uint_t timeSteps, const double time ) const
+   {
+      return vMflups( timeSteps, time ) / processes();
+   }
+
+   double vMflupsPerCore( const uint_t timeSteps, const double time ) const
+   {
+      return ( cores() == 0 ) ? 0.0 : ( vMflups( timeSteps, time ) / cores() );
+   }
+
+   static double timeStepsPerSecond( const uint_t timeSteps, const double time ) { return double_c( timeSteps ) / time; }
+
+   double fineTimeStepsPerSecond( const uint_t timeSteps, const double time ) const
+   {
+      return double_c( timeSteps * math::uintPow2( levels_ - uint_t(1) ) ) / time;
+   }
+
+private:
+
+   int threadsPerProcess_{ 1 };
+   int threadsPerCore_{ 0 };
+
+   weak_ptr< StructuredBlockStorage > blocks_;
+   uint_t levels_;
+
+   CellCounter_T cells_;
+   FluidCellCounter_T fluidCells_;
+
+}; // class PerformanceEvaluationBase
+
+
+
+//**********************************************************************************************************************
+/*!
+*   \brief Class for evaluating the performance of LBM simulations using fields
+*
+*   Assumes that in-between creating an object of this class and calling any of the member functions the number of cells
+*   and the number of fluid cells do not change! For simulations with static geometry, this is always the case.
+*/
+//**********************************************************************************************************************
+template< typename FlagField_T >
+class PerformanceEvaluation : public PerformanceEvaluationBase< field::CellCounter< FlagField_T >, field::CellCounter< FlagField_T > >
+{
+public:
+   PerformanceEvaluation( const weak_ptr< StructuredBlockStorage > & blocks,
+                          const ConstBlockDataID & flagFieldId, const Set< FlagUID > & fluid,
+                          const Set<SUID> & requiredSelectors = Set<SUID>::emptySet(),
+                          const Set<SUID> & incompatibleSelectors = Set<SUID>::emptySet() )
+                          : PerformanceEvaluationBase< field::CellCounter< FlagField_T >, field::CellCounter< FlagField_T > >(
+                              blocks,
+                              field::CellCounter< FlagField_T >( blocks, flagFieldId, Set< FlagUID >::emptySet(), requiredSelectors, incompatibleSelectors ),
+                              field::CellCounter< FlagField_T >( blocks, flagFieldId, fluid, requiredSelectors, incompatibleSelectors ),
+                              requiredSelectors, incompatibleSelectors )
+   {
+   }
+};
+
+
+template< typename CellCounter_T, typename FluidCellCounter_T >
+PerformanceEvaluationBase< CellCounter_T, FluidCellCounter_T >::PerformanceEvaluationBase(
+                                                                   const weak_ptr< StructuredBlockStorage > & blocks,
+                                                                   const CellCounter_T & cellCounter, const FluidCellCounter_T & fluidCellCounter,
+                                                                   const Set<SUID> & /*requiredSelectors*/, const Set<SUID> & /*incompatibleSelectors*/ )
+   : blocks_( blocks ),
+     cells_( cellCounter ),
+     fluidCells_( fluidCellCounter )
+{
+#ifdef _OPENMP
+   if( std::getenv( "OMP_NUM_THREADS" ) == NULL )
+      WALBERLA_ABORT( "If you are using a version of the program that was compiled with OpenMP you have to "
+                      "specify the environment variable \'OMP_NUM_THREADS\' accordingly!" );
+   threadsPerProcess_ = std::atoi( std::getenv( "OMP_NUM_THREADS" ) );
+#endif
+
+   if( std::getenv( "THREADS_PER_CORE" ) )
+      threadsPerCore_ = std::atoi( std::getenv( "THREADS_PER_CORE" ) );
+
+   refresh();
+}
+
+
+
+template< typename CellCounter_T, typename FluidCellCounter_T >
+void PerformanceEvaluationBase< CellCounter_T, FluidCellCounter_T >::refresh()
+{
+   auto blocks = blocks_.lock();
+   WALBERLA_CHECK_NOT_NULLPTR( blocks, "Trying to access 'PerformanceEvaluation' for a block storage object that doesn't exist anymore" )
+   
+   levels_ = blocks->getNumberOfLevels();
+   
+   cells_();
+   fluidCells_();
+}
+
+
+
+template< typename CellCounter_T, typename FluidCellCounter_T >
+std::string PerformanceEvaluationBase< CellCounter_T, FluidCellCounter_T >::loggingString( const uint_t timeSteps, const double time ) const
+{
+   std::ostringstream oss;
+
+   std::string na( "n/a *)" );
+
+   std::ostringstream threadsPerCoreString;
+   threadsPerCoreString << threadsPerCore_;
+
+   std::ostringstream coresString;
+   coresString << cores();
+
+   oss <<   "- processes:   " << processes()
+      << "\n- threads:     " << threads() << " (threads per process = " << threadsPerProcess_
+      << ", threads per core = " << ( ( threadsPerCore_ == 0 ) ? na : threadsPerCoreString.str() ) << ")"
+      << "\n- cores:       " << ( ( threadsPerCore_ == 0 ) ? na : coresString.str() )
+      << "\n- time steps:  " << timeSteps;
+
+   if( levels_ > uint_t(1) )
+   {
+      oss << " (on the coarsest grid, " << ( timeSteps * math::uintPow2( levels_ - uint_t(1) ) ) << " on the finest grid)";
+   }
+
+   oss << "\n- time:        " << time << " sec"
+      << "\n- cells:       " << cells_.numberOfCells();
+
+   if( levels_ > uint_t(1) )
+   {
+      oss << " (" << allFineCells() << " if everything were fine -> data reduction by factor of "
+         << ( real_c( allFineCells() ) / real_c( cells_.numberOfCells() ) ) << ")";
+   }
+
+   oss << "\n- fluid cells: " << fluidCells_.numberOfCells() << " ("
+      << ( real_c(100) * real_c( fluidCells_.numberOfCells() ) / real_c( cells_.numberOfCells() ) ) << " % of all cells)";
+
+   if( levels_ > uint_t(1) )
+   {
+      oss << "\n- distribution of cells to different grid levels:";
+      for( uint_t i = uint_t(0); i < levels_; ++i )
+         oss << "\n   + level " << i <<": " << cells_.numberOfCells(i) << " cells (" << fluidCells_.numberOfCells(i) << " fluid cells = "
+         << ( real_c(100) * real_c( fluidCells_.numberOfCells(i) ) / real_c( cells_.numberOfCells(i) ) )
+         << " % of all cells on this level)";
+   }
+
+   std::ostringstream mlupsPerCoreString;
+   mlupsPerCoreString << mlupsPerCore( timeSteps, time );
+
+   std::ostringstream mflupsPerCoreString;
+   mflupsPerCoreString << mflupsPerCore( timeSteps, time );
+
+   oss << "\n- performance: " << mlups( timeSteps, time ) << " MLUPS (million lattice cell updates per second)"
+      << "\n               " << mlupsPerProcess( timeSteps, time ) << " MLUPS / process"
+      << "\n               " << ( ( threadsPerCore_ == 0 ) ? na : mlupsPerCoreString.str() ) << " MLUPS / core"
+      << "\n               " << mflups( timeSteps, time ) << " MFLUPS (million fluid lattice cell updates per second)"
+      << "\n               " << mflupsPerProcess( timeSteps, time ) << " MFLUPS / process"
+      << "\n               " << ( ( threadsPerCore_ == 0 ) ? na : mflupsPerCoreString.str() ) << " MFLUPS / core"
+      << "\n               " << timeStepsPerSecond( timeSteps, time ) << " time steps / second";
+
+   if( levels_ > uint_t(1) )
+   {
+      std::ostringstream vMlupsPerCoreString;
+      vMlupsPerCoreString << vMlupsPerCore( timeSteps, time );
+
+      std::ostringstream vMflupsPerCoreString;
+      vMflupsPerCoreString << vMflupsPerCore( timeSteps, time );
+
+      oss << "\n- 'virtual' performance (if everything were fine): " << vMlups( timeSteps, time ) << " MLUPS (million lattice cell updates per second)"
+         << "\n                                                   " << vMlupsPerProcess( timeSteps, time ) << " MLUPS / process"
+         << "\n                                                   " << ( ( threadsPerCore_ == 0 ) ? na : vMlupsPerCoreString.str() ) << " MLUPS / core"
+         << "\n                                                   " << vMflups( timeSteps, time ) << " MFLUPS (million fluid lattice cell updates per second)"
+         << "\n                                                   " << vMflupsPerProcess( timeSteps, time ) << " MFLUPS / process"
+         << "\n                                                   " << ( ( threadsPerCore_ == 0 ) ? na : vMflupsPerCoreString.str() ) << " MFLUPS / core"
+         << "\n                                                   " << fineTimeStepsPerSecond( timeSteps, time ) << " fine time steps / second";
+   }
+
+   oss << "\n- build / run information:"
+      << "\n   + host machine:   " << getHostName()
+      << "\n   + build machine:  " << WALBERLA_BUILD_MACHINE
+      << "\n   + git SHA1:       " << WALBERLA_GIT_SHA1
+      << "\n   + build type:     " << WALBERLA_BUILD_TYPE
+      << "\n   + compiler flags: " << WALBERLA_COMPILER_FLAGS;
+
+   if( threadsPerCore_ == 0 )
+      oss << "\n\n  *) only available if environment variable 'THREADS_PER_CORE' is set";
+
+   return oss.str();
+}
+
+
+
+template< typename CellCounter_T, typename FluidCellCounter_T >
+void PerformanceEvaluationBase< CellCounter_T, FluidCellCounter_T >::getResultsForSQLOnRoot( std::map< std::string, int > &         integerProperties,
+                                                                                             std::map< std::string, double > &      realProperties,
+                                                                                             std::map< std::string, std::string > & stringProperties,
+                                                                                             const uint_t timeSteps, const double time )
+{
+   WALBERLA_NON_ROOT_SECTION()
+   {
+      return;
+   }
+
+   integerProperties[ "levels" ]            = int_c( levels_ );
+   integerProperties[ "processes" ]         = processes();
+   integerProperties[ "threads" ]           = threads();
+   integerProperties[ "cores" ]             = cores();
+   integerProperties[ "threadsPerProcess" ] = threadsPerProcess_;
+   integerProperties[ "threadsPerCore" ]    = threadsPerCore_;
+
+   integerProperties[ "timeSteps" ] = int_c( timeSteps );
+   if( levels_ > uint_t(1) )
+      integerProperties[ "fineTimeSteps" ] = int_c( timeSteps * math::uintPow2( levels_ - uint_t(1) ) );
+
+   realProperties[ "time" ] = real_c( time );
+
+   realProperties[ "cells" ] = real_c( cells_.numberOfCells() );
+   if( levels_ > uint_t(1) )
+      realProperties[ "refinementCellsReduction" ] = real_c( allFineCells() ) / real_c( cells_.numberOfCells() );
+   realProperties[ "fluidCells" ] = real_c( fluidCells_.numberOfCells() );
+
+   if( levels_ > uint_t(1) )
+   {
+      for( uint_t i = uint_t(0); i < levels_; ++i )
+      {
+         std::ostringstream cells_i;
+         std::ostringstream fluidCells_i;
+
+         cells_i << "cells_" << i;
+         fluidCells_i << "fluidCells_" << i;
+
+         realProperties[ cells_i.str() ] = real_c( cells_.numberOfCells(i) );
+         realProperties[ fluidCells_i.str() ] = real_c( fluidCells_.numberOfCells(i) );
+      }
+   }
+
+   realProperties[ "MLUPS" ]              = double_c( mlups( timeSteps, time ) );
+   realProperties[ "MLUPS_process" ]      = double_c( mlupsPerProcess( timeSteps, time ) );
+   realProperties[ "MLUPS_core" ]         = double_c( mlupsPerCore( timeSteps, time ) );
+   realProperties[ "MFLUPS" ]             = double_c( mflups( timeSteps, time ) );
+   realProperties[ "MFLUPS_process" ]     = double_c( mflupsPerProcess( timeSteps, time ) );
+   realProperties[ "MFLUPS_core" ]        = double_c( mflupsPerCore( timeSteps, time ) );
+   realProperties[ "timeStepsPerSecond" ] = double_c( timeStepsPerSecond( timeSteps, time ) );
+
+   if( levels_ > uint_t(1) )
+   {
+      realProperties[ "vMLUPS" ]                 = double_c( vMlups( timeSteps, time ) );
+      realProperties[ "vMLUPS_process" ]         = double_c( vMlupsPerProcess( timeSteps, time ) );
+      realProperties[ "vMLUPS_core" ]            = double_c( vMlupsPerCore( timeSteps, time ) );
+      realProperties[ "vMFLUPS" ]                = double_c( vMflups( timeSteps, time ) );
+      realProperties[ "vMFLUPS_process" ]        = double_c( vMflupsPerProcess( timeSteps, time ) );
+      realProperties[ "vMFLUPS_core" ]           = double_c( vMflupsPerCore( timeSteps, time ) );
+      realProperties[ "fineTimeStepsPerSecond" ] = double_c( fineTimeStepsPerSecond( timeSteps, time ) );
+   }
+
+   stringProperties[ "hostMachine" ]   = std::string( getHostName() );
+   stringProperties[ "buildMachine" ]  = std::string( WALBERLA_BUILD_MACHINE );
+   stringProperties[ "gitVersion" ]    = std::string( WALBERLA_GIT_SHA1 );
+   stringProperties[ "buildType" ]     = std::string( WALBERLA_BUILD_TYPE );
+   stringProperties[ "compilerFlags" ] = std::string( WALBERLA_COMPILER_FLAGS );
+}
+
+} // namespace walberla::lbm_generated
diff --git a/src/lbm_generated/field/AddToStorage.h b/src/lbm_generated/field/AddToStorage.h
new file mode 100644
index 0000000000000000000000000000000000000000..afb86819931238443443f3095f73880aec401d36
--- /dev/null
+++ b/src/lbm_generated/field/AddToStorage.h
@@ -0,0 +1,207 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file AddToStorage.h
+//! \ingroup lbm_generated
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "PdfField.h"
+#include "core/debug/CheckFunctions.h"
+#include "core/debug/Debug.h"
+#include "core/uid/SUID.h"
+#include "field/blockforest/BlockDataHandling.h"
+
+namespace walberla::lbm_generated {
+
+namespace internal {
+   
+template< typename LatticeStorageSpecification_T >
+class PdfFieldHandling : public field::BlockDataHandling< PdfField<LatticeStorageSpecification_T>,
+                                                          LatticeStorageSpecification_T::Stencil::D == 2 >
+{
+public:
+
+   using PdfField_T = PdfField<LatticeStorageSpecification_T>;
+   using Base_T = field::BlockDataHandling<PdfField_T, LatticeStorageSpecification_T::Stencil::D == 2>;
+
+   PdfFieldHandling( const weak_ptr< StructuredBlockStorage > & blocks, const LatticeStorageSpecification_T & storageSpecification,
+                     const uint_t nrOfGhostLayers, const field::Layout & layout, const shared_ptr< field::FieldAllocator<real_t> > alloc = nullptr ) :
+      blocks_( blocks ), storageSpecification_( storageSpecification ),
+      nrOfGhostLayers_( nrOfGhostLayers ), layout_( layout ), alloc_( alloc ){}
+
+   inline void serialize( IBlock * const block, const BlockDataID & id, mpi::SendBuffer & buffer ) override
+   {
+      Base_T::serialize( block, id, buffer );
+   }
+
+   void serializeCoarseToFine( Block * const block, const BlockDataID & id, mpi::SendBuffer & buffer, const uint_t child ) override
+   {
+      Base_T::serializeCoarseToFine( block, id, buffer, child );
+   }
+
+   void serializeFineToCoarse( Block * const block, const BlockDataID & id, mpi::SendBuffer & buffer ) override
+   {
+      Base_T::serializeFineToCoarse( block, id, buffer );
+   }
+
+   void deserialize( IBlock * const block, const BlockDataID & id, mpi::RecvBuffer & buffer ) override
+   {
+      Base_T::deserialize( block, id, buffer );
+   }
+
+   void deserializeCoarseToFine( Block * const block, const BlockDataID & id, mpi::RecvBuffer & buffer ) override
+   {
+      Base_T::deserializeCoarseToFine( block, id, buffer );
+   }
+
+   void deserializeFineToCoarse( Block * const block, const BlockDataID & id, mpi::RecvBuffer & buffer, const uint_t child ) override
+   {
+      Base_T::deserializeFineToCoarse( block, id, buffer, child );
+   }
+
+protected:
+
+   PdfField<LatticeStorageSpecification_T> * allocate( IBlock * const block ) override
+   {
+      return allocateDispatch( block );
+   }
+
+   PdfField<LatticeStorageSpecification_T> * reallocate( IBlock * const block ) override
+   {
+      return allocateDispatch( block );
+   }
+
+private:
+
+
+   PdfField<LatticeStorageSpecification_T> * allocateDispatch( IBlock * const block )
+   {
+      WALBERLA_ASSERT_NOT_NULLPTR( block )
+
+      auto blocks = blocks_.lock();
+      WALBERLA_CHECK_NOT_NULLPTR( blocks )
+
+      return new PdfField_T( blocks->getNumberOfXCells( *block ), blocks->getNumberOfYCells( *block ), blocks->getNumberOfZCells( *block ),
+                            storageSpecification_, nrOfGhostLayers_, layout_, alloc_ );
+   }
+
+   weak_ptr< StructuredBlockStorage > blocks_;
+   LatticeStorageSpecification_T    storageSpecification_;
+
+   uint_t            nrOfGhostLayers_;
+   field::Layout     layout_;
+   shared_ptr< field::FieldAllocator<real_t> > alloc_;
+
+}; // class PdfFieldHandling
+
+} // namespace internal
+
+
+
+template< typename LatticeStorageSpecification_T, typename BlockStorage_T >
+BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, const std::string & identifier,
+                                  const LatticeStorageSpecification_T & storageSpecification,
+                                  const uint_t ghostLayers,
+                                  const field::Layout & layout = field::fzyx,
+                                  const Set<SUID> & requiredSelectors     = Set<SUID>::emptySet(),
+                                  const Set<SUID> & incompatibleSelectors = Set<SUID>::emptySet(),
+                                  const shared_ptr< field::FieldAllocator<real_t> > alloc = nullptr)
+{
+   return blocks->addBlockData( make_shared< internal::PdfFieldHandling< LatticeStorageSpecification_T > >(
+                                   blocks, storageSpecification, ghostLayers, layout, alloc ),
+                                identifier, requiredSelectors, incompatibleSelectors );
+}
+
+template< typename LatticeStorageSpecification_T, typename BlockStorage_T >
+BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, const std::string & identifier,
+                                 const LatticeStorageSpecification_T & storageSpecification,
+                                 const field::Layout & layout = field::fzyx,
+                                 const Set<SUID> & requiredSelectors     = Set<SUID>::emptySet(),
+                                 const Set<SUID> & incompatibleSelectors = Set<SUID>::emptySet(),
+                                 const shared_ptr< field::FieldAllocator<real_t> > alloc = nullptr)
+{
+   auto ghostLayers = uint_c(1);
+
+   return blocks->addBlockData( make_shared< internal::PdfFieldHandling< LatticeStorageSpecification_T > >(
+                                  blocks, storageSpecification, ghostLayers, layout, alloc ),
+                               identifier, requiredSelectors, incompatibleSelectors );
+}
+
+template< typename LatticeStorageSpecification_T, typename BlockStorage_T >
+BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, const std::string & identifier,
+                                 const LatticeStorageSpecification_T & storageSpecification,
+                                 const Set<SUID> & requiredSelectors     = Set<SUID>::emptySet(),
+                                 const Set<SUID> & incompatibleSelectors = Set<SUID>::emptySet(),
+                                 const shared_ptr< field::FieldAllocator<real_t> > alloc = nullptr)
+{
+   auto ghostLayers = uint_c(1);
+   auto layout = field::fzyx;
+
+   return blocks->addBlockData( make_shared< internal::PdfFieldHandling< LatticeStorageSpecification_T > >(
+                                  blocks, storageSpecification, ghostLayers, layout, alloc ),
+                               identifier, requiredSelectors, incompatibleSelectors );
+}
+
+template< typename LatticeStorageSpecification_T, typename BlockStorage_T >
+BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, const std::string & identifier,
+                                 const LatticeStorageSpecification_T & storageSpecification,
+                                 const shared_ptr< field::FieldAllocator<real_t> > alloc = nullptr)
+{
+   auto ghostLayers = uint_c(1);
+   auto layout = field::fzyx;
+   auto requiredSelectors = Set<SUID>::emptySet();
+   auto incompatibleSelectors = Set<SUID>::emptySet();
+
+   return blocks->addBlockData( make_shared< internal::PdfFieldHandling< LatticeStorageSpecification_T > >(
+                                  blocks, storageSpecification, ghostLayers, layout, alloc ),
+                               identifier, requiredSelectors, incompatibleSelectors );
+}
+
+template< typename LatticeStorageSpecification_T, typename BlockStorage_T >
+BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, const std::string & identifier,
+                                 const LatticeStorageSpecification_T & storageSpecification,
+                                 const field::Layout & layout = field::fzyx,
+                                 const shared_ptr< field::FieldAllocator<real_t> > alloc = nullptr)
+{
+   auto ghostLayers = uint_c(1);
+   auto requiredSelectors = Set<SUID>::emptySet();
+   auto incompatibleSelectors = Set<SUID>::emptySet();
+
+   return blocks->addBlockData( make_shared< internal::PdfFieldHandling< LatticeStorageSpecification_T > >(
+                                  blocks, storageSpecification, ghostLayers, layout, alloc ),
+                               identifier, requiredSelectors, incompatibleSelectors );
+}
+
+template< typename LatticeStorageSpecification_T, typename BlockStorage_T >
+BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, const std::string & identifier,
+                                 const LatticeStorageSpecification_T & storageSpecification,
+                                 const uint_t ghostLayers,
+                                 const field::Layout & layout,
+                                 const shared_ptr< field::FieldAllocator<real_t> > alloc)
+{
+   auto requiredSelectors = Set<SUID>::emptySet();
+   auto incompatibleSelectors = Set<SUID>::emptySet();
+
+   return blocks->addBlockData( make_shared< internal::PdfFieldHandling< LatticeStorageSpecification_T > >(
+                                  blocks, storageSpecification, ghostLayers, layout, alloc ),
+                               identifier, requiredSelectors, incompatibleSelectors );
+}
+
+
+} // namespace walberla::lbm_generated
diff --git a/src/lbm_generated/field/CMakeLists.txt b/src/lbm_generated/field/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63bc11c8f920acc3e4c244488d72899fd7a24245
--- /dev/null
+++ b/src/lbm_generated/field/CMakeLists.txt
@@ -0,0 +1,5 @@
+target_sources( lbm_generated
+        PRIVATE
+        AddToStorage.h
+        PdfField.h
+        )
\ No newline at end of file
diff --git a/src/lbm_generated/field/PdfField.h b/src/lbm_generated/field/PdfField.h
new file mode 100644
index 0000000000000000000000000000000000000000..6e6b7ee88fd5e9ee0be1dbfb46da6d6e524d5536
--- /dev/null
+++ b/src/lbm_generated/field/PdfField.h
@@ -0,0 +1,136 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PdfField.h
+//! \ingroup lbm_generated
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "field/GhostLayerField.h"
+#include "field/SwapableCompare.h"
+
+
+namespace walberla::lbm_generated {
+
+template< typename LatticeStorageSpecification_T >
+class PdfField : public GhostLayerField< real_t, LatticeStorageSpecification_T::Stencil::Size >
+{
+public:
+
+   //** Type Definitions  **********************************************************************************************
+   /*! \name Type Definitions */
+   //@{
+   using LatticeStorageSpecification = LatticeStorageSpecification_T;
+   using Stencil = typename LatticeStorageSpecification_T::Stencil;
+
+   using value_type = typename GhostLayerField<real_t, Stencil::Size>::value_type;
+
+   using Ptr = typename GhostLayerField<real_t, Stencil::Size>::Ptr;
+   using ConstPtr = typename GhostLayerField<real_t, Stencil::Size>::ConstPtr;
+   //@}
+   //*******************************************************************************************************************
+
+   PdfField( const uint_t _xSize, const uint_t _ySize, const uint_t _zSize,
+            const LatticeStorageSpecification_T & storageSpecification,
+             const uint_t ghostLayers = uint_t(1), const field::Layout & _layout = field::zyxf,
+             const shared_ptr< field::FieldAllocator<real_t> > & alloc = shared_ptr< field::FieldAllocator<real_t> >() );
+
+   ~PdfField() override = default;
+
+   inline PdfField * clone()              const;
+   inline PdfField * cloneUninitialized() const;
+   inline PdfField * cloneShallowCopy()   const;
+
+
+   /////////////////////////////////////////////////
+   // Access functions (with stencil::Direction!) //
+   /////////////////////////////////////////////////
+
+   using GhostLayerField< real_t, Stencil::Size >::get;
+
+         real_t & get( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d )       { return get( x, y, z, Stencil::idx[d] ); }
+   const real_t & get( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) const { return get( x, y, z, Stencil::idx[d] ); }
+         real_t & get( const Cell & c, stencil::Direction d )       { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
+   const real_t & get( const Cell & c, stencil::Direction d ) const { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
+
+   using GhostLayerField< real_t, Stencil::Size >::operator();
+
+         real_t & operator()( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d )       { return get( x, y, z, Stencil::idx[d] ); }
+   const real_t & operator()( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) const { return get( x, y, z, Stencil::idx[d] ); }
+         real_t & operator()( const Cell & c, stencil::Direction d )       { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
+   const real_t & operator()( const Cell & c, stencil::Direction d ) const { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
+
+
+protected:
+   //** Shallow Copy ***************************************************************************************************
+   /*! \name Shallow Copy */
+   //@{
+   inline PdfField( const PdfField< LatticeStorageSpecification_T > & other );
+   Field< real_t, Stencil::Size > * cloneShallowCopyInternal() const override { return new PdfField< LatticeStorageSpecification_T >( *this ); }
+   //@}
+   //*******************************************************************************************************************
+
+   LatticeStorageSpecification_T storageSpecification_;
+};
+
+
+
+template< typename LatticeStorageSpecification_T >
+PdfField< LatticeStorageSpecification_T >::PdfField( const uint_t _xSize, const uint_t _ySize, const uint_t _zSize,
+                                                    const LatticeStorageSpecification_T & storageSpecification,
+                                      const uint_t ghostLayers, const field::Layout & _layout,
+                                      const shared_ptr< field::FieldAllocator<real_t> > & alloc ) :
+
+   GhostLayerField< real_t, Stencil::Size >( _xSize, _ySize, _zSize, ghostLayers, _layout, alloc ),
+      storageSpecification_( storageSpecification )
+
+{
+#ifdef _OPENMP
+   // take care of proper thread<->memory assignment (first-touch allocation policy !)
+   this->setWithGhostLayer( real_t(0) );
+#endif
+   this->setWithGhostLayer( real_t(0) );
+}
+
+
+
+template< typename LatticeStorageSpecification_T >
+inline PdfField< LatticeStorageSpecification_T > * PdfField< LatticeStorageSpecification_T >::clone() const
+{
+   return dynamic_cast< PdfField * >( GhostLayerField< real_t, Stencil::Size >::clone() );
+}
+
+template< typename LatticeStorageSpecification_T >
+inline PdfField< LatticeStorageSpecification_T > * PdfField< LatticeStorageSpecification_T >::cloneUninitialized() const
+{
+   return dynamic_cast< PdfField * >( GhostLayerField< real_t, Stencil::Size >::cloneUninitialized() );
+}
+
+template< typename LatticeStorageSpecification_T >
+inline PdfField< LatticeStorageSpecification_T > * PdfField< LatticeStorageSpecification_T >::cloneShallowCopy() const
+{
+   return dynamic_cast< PdfField * >( GhostLayerField< real_t, Stencil::Size >::cloneShallowCopy() );
+}
+
+template< typename LatticeStorageSpecification_T >
+inline PdfField< LatticeStorageSpecification_T >::PdfField( const PdfField< LatticeStorageSpecification_T > & other )
+   : GhostLayerField< real_t, Stencil::Size >::GhostLayerField( other )
+{
+}
+
+} // namespace lbm
diff --git a/src/lbm_generated/gpu/AddToStorage.h b/src/lbm_generated/gpu/AddToStorage.h
new file mode 100644
index 0000000000000000000000000000000000000000..ef8f28409709ad37244276e3b68269d0edcf19da
--- /dev/null
+++ b/src/lbm_generated/gpu/AddToStorage.h
@@ -0,0 +1,105 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file AddToStorage.h
+//! \ingroup lbm_generated
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "core/debug/CheckFunctions.h"
+#include "core/debug/Debug.h"
+#include "core/uid/SUID.h"
+
+#include "gpu/GPUWrapper.h"
+#include "gpu/FieldCopy.h"
+
+#include "field/blockforest/BlockDataHandling.h"
+
+#include "GPUPdfField.h"
+
+namespace walberla::lbm_generated
+{
+
+namespace internal
+{
+
+template< typename LatticeStorageSpecification_T>
+GPUPdfField< LatticeStorageSpecification_T > * createGPUPdfField( const IBlock * const block,
+                           const StructuredBlockStorage * const bs,
+                           const LatticeStorageSpecification_T& storageSpecification,
+                           const uint_t ghostLayers,
+                           const field::Layout & layout,
+                           const bool usePitchedMem )
+{
+   using GPUField_T = GPUPdfField< LatticeStorageSpecification_T >;
+
+   auto gpuField = new GPUField_T(bs->getNumberOfXCells( *block ),
+                                  bs->getNumberOfYCells( *block ),
+                                  bs->getNumberOfZCells( *block ),
+                                  storageSpecification, ghostLayers,
+                                  layout, usePitchedMem);
+
+   return gpuField;
+}
+
+template< typename Field_T, typename LatticeStorageSpecification_T >
+GPUPdfField< LatticeStorageSpecification_T >*
+   createGPUPdfFieldFromCPUPdfField(const IBlock* const block, const StructuredBlockStorage* const,
+                                    const LatticeStorageSpecification_T& storageSpecification,
+                                    ConstBlockDataID cpuFieldID, const bool usePitchedMem, const bool copyCPUField = true)
+{
+   using GPUField_T = GPUPdfField< LatticeStorageSpecification_T >;
+
+   const Field_T* f = block->getData< Field_T >(cpuFieldID);
+
+   auto gpuField = new GPUField_T(f->xSize(), f->ySize(), f->zSize(), storageSpecification, f->nrOfGhostLayers(),
+                                  f->layout(), usePitchedMem);
+
+   if (copyCPUField)
+      gpu::fieldCpy(*gpuField, *f);
+
+   return gpuField;
+}
+
+} // namespace internal
+
+template< typename GPUField_T, typename LatticeStorageSpecification_T >
+BlockDataID addGPUPdfFieldToStorage(const shared_ptr< StructuredBlockStorage >& bs,
+                                    const std::string & identifier,
+                                    const LatticeStorageSpecification_T& storageSpecification,
+                                    const Layout layout = fzyx,
+                                    const uint_t nrOfGhostLayers = 1,
+                                    const bool usePitchedMem = true )
+{
+
+   auto func = std::bind(internal::createGPUPdfField< LatticeStorageSpecification_T >,
+                         std::placeholders::_1, std::placeholders::_2, storageSpecification, nrOfGhostLayers, layout, usePitchedMem);
+   return bs->addStructuredBlockData< GPUPdfField< LatticeStorageSpecification_T > >(func, identifier);
+}
+
+template< typename Field_T, typename LatticeStorageSpecification_T >
+BlockDataID addGPUPdfFieldToStorage(const shared_ptr< StructuredBlockStorage >& bs, ConstBlockDataID cpuFieldID,
+                                    const LatticeStorageSpecification_T& storageSpecification,
+                                    const std::string& identifier, const bool usePitchedMem = true, const bool copyCPUField = true)
+{
+   auto func = std::bind(internal::createGPUPdfFieldFromCPUPdfField< Field_T, LatticeStorageSpecification_T >,
+                         std::placeholders::_1, std::placeholders::_2, storageSpecification, cpuFieldID, usePitchedMem, copyCPUField);
+   return bs->addStructuredBlockData< GPUPdfField< LatticeStorageSpecification_T > >(func, identifier);
+}
+
+} // namespace walberla::lbm_generated
\ No newline at end of file
diff --git a/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.h b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.h
new file mode 100644
index 0000000000000000000000000000000000000000..4a082d34196c1b7a473956f6f805a2a09b535eb3
--- /dev/null
+++ b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.h
@@ -0,0 +1,108 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file BasicRecursiveTimeStepGPU.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "gpu/GPUWrapper.h"
+#include "gpu/communication/NonUniformGPUScheme.h"
+
+#include "timeloop/SweepTimeloop.h"
+
+#include <utility>
+
+#include "lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h"
+
+namespace walberla
+{
+
+using gpu::communication::NonUniformGPUScheme;
+
+namespace lbm_generated
+{
+
+/**
+ *
+ * @tparam LatticeStorageSpecification_T   Generated storage specification
+ * @tparam SweepCollection_T LBM SweepCollection (must be able to call stream, collide, streamCollide and
+ * streamOnlyNoAdvancement)
+ * @tparam BoundaryCollection_T LBM Boundary collection (Functor that runs all boundary kernels at call)
+ */
+template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
+class BasicRecursiveTimeStepGPU
+{
+ public:
+   using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
+   using Stencil                       = typename LatticeStorageSpecification_T::Stencil;
+   using CommunicationStencil          = typename LatticeStorageSpecification_T::CommunicationStencil;
+
+   using CommScheme = gpu::communication::NonUniformGPUScheme< CommunicationStencil >;
+   using PackInfo   = lbm_generated::NonuniformGeneratedGPUPdfPackInfo< PdfField_T >;
+
+   BasicRecursiveTimeStepGPU(std::shared_ptr< StructuredBlockForest >& sbfs, const BlockDataID& pdfFieldId,
+                             SweepCollection_T& sweepCollection, BoundaryCollection_T& boundaryCollection,
+                             std::shared_ptr< CommScheme >& commScheme, std::shared_ptr< PackInfo >& pdfFieldPackInfo)
+      : sbfs_(sbfs), pdfFieldId_(pdfFieldId), pdfFieldPackInfo_(pdfFieldPackInfo), commScheme_(commScheme),
+        sweepCollection_(sweepCollection), boundaryCollection_(boundaryCollection)
+   {
+#ifndef NDEBUG
+      for (auto& block : *sbfs)
+         WALBERLA_ASSERT(block.isDataOfType< PdfField_T >(pdfFieldId_),
+                         "Template parameter PdfField_T is of different type than BlockDataID pdfFieldId that is "
+                         "provided as constructor argument")
+#endif
+      maxLevel_ = sbfs->getDepth();
+
+      for (uint_t level = 0; level <= maxLevel_; level++)
+      {
+         std::vector< Block* > blocks;
+         sbfs->getBlocks(blocks, level);
+         blocks_.push_back(blocks);
+      }
+   };
+
+   ~BasicRecursiveTimeStepGPU() = default;
+
+   void operator()() { timestep(0); };
+   void addRefinementToTimeLoop(timeloop::SweepTimeloop& timeloop, uint_t level = 0);
+   void test(uint_t maxLevel, uint_t level = 0);
+
+ private:
+   void timestep(uint_t level);
+   void ghostLayerPropagation(Block* block, gpuStream_t gpuStream);
+   std::function< void() > executeStreamCollideOnLevel(uint_t level, bool withGhostLayerPropagation = false);
+
+   std::function< void() > executeBoundaryHandlingOnLevel(uint_t level);
+
+   std::shared_ptr< StructuredBlockForest > sbfs_;
+   uint_t maxLevel_;
+   std::vector< std::vector< Block* > > blocks_;
+
+   const BlockDataID pdfFieldId_;
+   std::shared_ptr< PackInfo > pdfFieldPackInfo_;
+   std::shared_ptr< CommScheme > commScheme_;
+
+   SweepCollection_T& sweepCollection_;
+   BoundaryCollection_T& boundaryCollection_;
+};
+
+} // namespace lbm_generated
+} // namespace walberla
+
+#include "lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h"
diff --git a/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..f7c5b28789d0976061190fb5367d101579cf8ded
--- /dev/null
+++ b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h
@@ -0,0 +1,255 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file BasicRecursiveTimeStep.impl.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "BasicRecursiveTimeStepGPU.h"
+
+namespace walberla {
+namespace lbm_generated {
+
+template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
+void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::timestep(uint_t level)
+{
+   std::vector<Block *> blocks;
+   sbfs_->getBlocks(blocks, level);
+
+   uint_t maxLevel = sbfs_->getDepth();
+
+   // 1.1 Collision
+   for(auto b: blocks){
+      sweepCollection_.streamCollide(b);
+   }
+
+   // 1.2 Recursive Descent
+   if(level < maxLevel){
+      timestep(level + 1);
+   }
+
+   // 1.3 Coarse to Fine Communication, receiving end
+   if(level != 0){
+      commScheme_->communicateCoarseToFine(level);
+   }
+
+   // 1.4 Equal-Level Communication
+   commScheme_->communicateEqualLevel(level);
+
+   // 1.5 Boundary Handling and Coalescence Preparation
+   for(auto b : blocks){
+      boundaryCollection_(b, nullptr);
+      if(level != maxLevel) pdfFieldPackInfo_->prepareCoalescence(b);
+   }
+
+   // 1.6 Fine to Coarse Communication, receiving end
+   if(level < maxLevel){
+      commScheme_->communicateFineToCoarse(level + 1);
+   }
+
+   // Stop here if on coarsest level.
+   // Otherwise, continue to second subcycle.
+   if(level == 0) return;
+
+   // 2.1 Collision and Ghost-Layer Propagation
+   for(auto b: blocks){
+      ghostLayerPropagation(b);  // GL-Propagation first without swapping arrays...
+      sweepCollection_.streamCollide(b);                // then Stream-Collide on interior, and swap arrays
+   }
+
+   // 2.2 Recursive Descent
+   if(level < maxLevel){
+      timestep(level + 1);
+   }
+
+   // 2.4 Equal-Level Communication
+   commScheme_->communicateEqualLevel(level);
+
+   // 2.5 Boundary Handling and Coalescence Preparation
+   for(auto b : blocks){
+      boundaryCollection_(b, nullptr);
+      if(level != maxLevel) pdfFieldPackInfo_->prepareCoalescence(b);
+   }
+
+   // 2.6 Fine to Coarse Communication, receiving end
+   if(level < maxLevel){
+      commScheme_->communicateFineToCoarse(level + 1);
+   }
+}
+
+
+template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
+void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::addRefinementToTimeLoop(timeloop::SweepTimeloop & timeloop, uint_t level)
+{
+   // 1.1 Collision
+   timeloop.addFuncBeforeTimeStep(executeStreamCollideOnLevel(level), "Refinement Cycle: streamCollide on level " + std::to_string(level));
+
+   // 1.2 Recursive Descent
+   if(level < maxLevel_){
+      addRefinementToTimeLoop(timeloop, level + 1);
+   }
+
+   // 1.3 Coarse to Fine Communication, receiving end
+   if(level != 0){
+      timeloop.addFuncBeforeTimeStep(commScheme_->communicateCoarseToFineFunctor(level), "Refinement Cycle: communicate coarse to fine on level " + std::to_string(level));
+   }
+
+   // 1.4 Equal-Level Communication
+   timeloop.addFuncBeforeTimeStep(commScheme_->communicateEqualLevelFunctor(level), "Refinement Cycle: communicate equal level on level " + std::to_string(level));
+
+
+   // 1.5 Boundary Handling and Coalescence Preparation
+   timeloop.addFuncBeforeTimeStep(executeBoundaryHandlingOnLevel(level), "Refinement Cycle: boundary handling on level " + std::to_string(level));
+
+   // 1.6 Fine to Coarse Communication, receiving end
+   if(level < maxLevel_){
+      timeloop.addFuncBeforeTimeStep(commScheme_->communicateFineToCoarseFunctor(level + 1), "Refinement Cycle: communicate fine to coarse on level " + std::to_string(level + 1));
+   }
+
+   // Stop here if on coarsest level.
+   // Otherwise, continue to second subcycle.
+   if(level == 0) return;
+
+   // 2.1 Collision and Ghost-Layer Propagation
+   timeloop.addFuncBeforeTimeStep(executeStreamCollideOnLevel(level, true), "Refinement Cycle: streamCollide with ghost layer propagation on level " + std::to_string(level));
+
+   // 2.2 Recursive Descent
+   if(level < maxLevel_)
+      addRefinementToTimeLoop(timeloop, level + 1);
+
+
+   // 2.4 Equal-Level Communication
+   timeloop.addFuncBeforeTimeStep(commScheme_->communicateEqualLevelFunctor(level), "Refinement Cycle: communicate equal level on level " + std::to_string(level));
+
+   // 2.5 Boundary Handling and Coalescence Preparation
+   timeloop.addFuncBeforeTimeStep(executeBoundaryHandlingOnLevel(level), "Refinement Cycle: boundary handling on level " + std::to_string(level));
+
+   // 2.6 Fine to Coarse Communication, receiving end
+   if(level < maxLevel_)
+      timeloop.addFuncBeforeTimeStep(commScheme_->communicateFineToCoarseFunctor(level + 1), "Refinement Cycle: communicate fine to coarse on level " + std::to_string(level + 1));
+
+}
+
+template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
+void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::test(uint_t maxLevel, uint_t level)
+{
+   // 1.1 Collision
+   WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: streamCollide on level " + std::to_string(level));
+
+   // 1.2 Recursive Descent
+   if(level < maxLevel){
+      test(maxLevel, level + 1);
+   }
+
+   // 1.3 Coarse to Fine Communication, receiving end
+   if(level != 0){
+      WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate coarse to fine on level " + std::to_string(level));
+   }
+
+   // 1.4 Equal-Level Communication
+   WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate equal level on level " + std::to_string(level));
+
+
+   // 1.5 Boundary Handling and Coalescence Preparation
+   WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: boundary handling on level " + std::to_string(level));
+
+   // 1.6 Fine to Coarse Communication, receiving end
+   if(level < maxLevel){
+      WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate fine to coarse on level " + std::to_string(level + 1));
+   }
+
+   // Stop here if on coarsest level.
+   // Otherwise, continue to second subcycle.
+   if(level == 0) return;
+
+   // 2.1 Collision and Ghost-Layer Propagation
+   WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: streamCollide with ghost layer propagation on level " + std::to_string(level));
+
+   // 2.2 Recursive Descent
+   if(level < maxLevel)
+      test(maxLevel, level + 1);
+
+
+   // 2.4 Equal-Level Communication
+   WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate equal level on level " + std::to_string(level));
+
+   // 2.5 Boundary Handling and Coalescence Preparation
+   WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: boundary handling on level " + std::to_string(level));
+
+   // 2.6 Fine to Coarse Communication, receiving end
+   if(level < maxLevel)
+      WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate fine to coarse on level " + std::to_string(level + 1));
+
+}
+
+
+template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
+std::function<void()> BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::executeStreamCollideOnLevel(uint_t level, bool withGhostLayerPropagation)
+{
+   return [level, withGhostLayerPropagation, this]()
+   {
+      if (withGhostLayerPropagation)
+      {
+         for(auto b: blocks_[level]){
+            ghostLayerPropagation(b, nullptr);
+            sweepCollection_.streamCollide(b, 0, nullptr);
+         }
+      }
+      else
+      {
+         for(auto b: blocks_[level]){
+            sweepCollection_.streamCollide(b, 0, nullptr);
+         }
+      }
+      WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+   };
+}
+
+template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
+std::function<void()> BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::executeBoundaryHandlingOnLevel(uint_t level)
+{
+   return [this, level]() {
+      for (auto b : blocks_[level])
+      {
+         boundaryCollection_(b, nullptr);
+         if (level != maxLevel_) pdfFieldPackInfo_->prepareCoalescence(b, nullptr);
+      }
+      WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+   };
+}
+
+
+template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
+void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::ghostLayerPropagation(
+   Block * block, gpuStream_t gpuStream)
+{
+   auto pdfField = block->getData<PdfField_T>(pdfFieldId_);
+
+   for(auto it = CommunicationStencil::beginNoCenter(); it != CommunicationStencil::end(); ++it){
+      uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(*it);
+      // Propagate on ghost layers shadowing coarse or no blocks
+      if(!block->neighborhoodSectionHasSmallerBlocks(nSecIdx)){
+         CellInterval ci;
+         pdfField->getGhostRegion(*it, ci, 1);
+         sweepCollection_.streamOnlyNoAdvancementCellInterval(block, ci, gpuStream);
+      }
+   }
+}
+
+} // namespace lbm_generated
+} // namespace walberla
diff --git a/src/lbm_generated/gpu/CMakeLists.txt b/src/lbm_generated/gpu/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f81e5f2b370d478473f4d02d3853c469c905799f
--- /dev/null
+++ b/src/lbm_generated/gpu/CMakeLists.txt
@@ -0,0 +1,12 @@
+target_sources( lbm_generated
+        PRIVATE
+        AddToStorage.h
+        BasicRecursiveTimeStepGPU.h
+        BasicRecursiveTimeStepGPU.impl.h
+        GPUPdfField.h
+        NonuniformGPUCommData.h
+        NonuniformGPUCommData.impl.h
+        NonuniformGeneratedGPUPdfPackInfo.h
+        NonuniformGeneratedGPUPdfPackInfo.impl.h
+        UniformGeneratedGPUPdfPackInfo.h
+        )
\ No newline at end of file
diff --git a/src/lbm_generated/gpu/GPUPdfField.h b/src/lbm_generated/gpu/GPUPdfField.h
new file mode 100644
index 0000000000000000000000000000000000000000..1a9f59a116b8c4e7c5fcb4ebd817dcb5cad0a908
--- /dev/null
+++ b/src/lbm_generated/gpu/GPUPdfField.h
@@ -0,0 +1,66 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file GPUPdfField.h
+//! \ingroup lbm_generated
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "gpu/GPUField.h"
+
+using namespace walberla::gpu;
+
+namespace walberla::lbm_generated {
+
+template< typename LatticeStorageSpecification_T >
+class GPUPdfField : public GPUField< real_t >
+{
+ public:
+
+   //** Type Definitions  **********************************************************************************************
+   /*! \name Type Definitions */
+   //@{
+   using LatticeStorageSpecification = LatticeStorageSpecification_T;
+   using Stencil = typename LatticeStorageSpecification_T::Stencil;
+
+   using value_type = typename GPUField<real_t>::value_type;
+   //@}
+   //*******************************************************************************************************************
+
+   GPUPdfField( uint_t _xSize, uint_t _ySize, uint_t _zSize,
+               const LatticeStorageSpecification_T & storageSpecification,
+               uint_t _nrOfGhostLayers, const Layout & _layout = zyxf, bool usePitchedMem = true );
+
+
+   ~GPUPdfField() = default;
+
+ protected:
+   LatticeStorageSpecification_T storageSpecification_;
+};
+
+
+
+template< typename LatticeStorageSpecification_T >
+GPUPdfField< LatticeStorageSpecification_T >::GPUPdfField( uint_t _xSize, uint_t _ySize, uint_t _zSize,
+                                                          const LatticeStorageSpecification_T & storageSpecification,
+                                                          uint_t ghostLayers, const Layout & layout, bool usePitchedMem) :
+                    GPUField< real_t>( _xSize, _ySize, _zSize, LatticeStorageSpecification_T::Stencil::Size, ghostLayers, layout, usePitchedMem ), storageSpecification_( storageSpecification )
+{
+}
+
+} // namespace lbm
\ No newline at end of file
diff --git a/src/lbm_generated/gpu/NonuniformGPUCommData.h b/src/lbm_generated/gpu/NonuniformGPUCommData.h
new file mode 100644
index 0000000000000000000000000000000000000000..795a9bcb5868c156f8c42dd94057f36361ca1e3d
--- /dev/null
+++ b/src/lbm_generated/gpu/NonuniformGPUCommData.h
@@ -0,0 +1,137 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NonuniformGPUCommData.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "blockforest/StructuredBlockForest.h"
+#include "blockforest/BlockDataHandling.h"
+
+#include "gpu/GPUWrapper.h"
+#include "gpu/GPUField.h"
+#include "gpu/FieldCopy.h"
+
+#include "domain_decomposition/IBlock.h"
+
+#include "field/FlagField.h"
+
+#include "lbm_generated/communication/NonuniformCommData.h"
+
+#include "stencil/Directions.h"
+
+#define USE_CELL_INTERVALS
+
+namespace walberla::lbm_generated {
+
+using PartialCoalescenceMaskFieldGPU = gpu::GPUField< uint32_t >;
+
+template< typename LatticeStorageSpecification_T >
+class NonuniformGPUCommData
+{
+ private:
+   void registerFlags();
+   void computeBitMask();
+   void syncDataGPU();
+
+ public:
+   using Stencil              = typename LatticeStorageSpecification_T::Stencil;
+   using CommunicationStencil = typename LatticeStorageSpecification_T::CommunicationStencil;
+
+#if defined(USE_CELL_INTERVALS)
+   NonuniformGPUCommData(IBlock* const block, uint_t xSize, uint_t ySize, uint_t zSize)
+      : block_(block), maskField_(xSize, ySize, zSize, 2),
+        maskFieldGPU_(xSize, ySize, zSize, 1, 2, field::fzyx),
+        interiorInterval(0, 0, 0, cell_idx_c(xSize) - 1, cell_idx_c(ySize) - 1, cell_idx_c(zSize) - 1)
+   {
+      registerFlags();
+      computeBitMask();
+      syncDataGPU();
+   };
+#else
+   NonuniformCommData(IBlock* const block, const BlockDataID pdfFieldID, uint_t xSize, uint_t ySize, uint_t zSize)
+      : block_(block), pdfFieldID_(pdfFieldID), maskField_(xSize, ySize, zSize, 2)
+   {
+      registerFlags();
+      computeBitMask();
+      syncDataGPU();
+   };
+#endif
+
+   bool operator==(const NonuniformGPUCommData& other) { return this == &other; }
+   bool operator!=(const NonuniformGPUCommData& other) { return this != &other; }
+
+   PartialCoalescenceMaskField& getMaskField() { return maskField_; }
+   const PartialCoalescenceMaskField& getMaskField() const { return maskField_; }
+
+   PartialCoalescenceMaskFieldGPU& getMaskFieldGPU() { return maskFieldGPU_; }
+   const PartialCoalescenceMaskFieldGPU& getMaskFieldGPU() const { return maskFieldGPU_; }
+
+ private:
+#if defined(USE_CELL_INTERVALS)
+   void prepareIntervals();
+   void setFlagOnInterval(const CellInterval & ci, const uint_t fIdx);
+#else
+   void prepareFlags();
+   void resetCornerSkippingOriginFlags();
+#endif
+
+   void setupCornerSkippingOrigins(stencil::Direction commDir);
+   void setupBitMaskSlice(stencil::Direction commDir, stencil::Direction streamDir);
+
+   bool haveSmallestIdInIntersection(Vector3<cell_idx_t> cornerDir);
+
+   const IBlock* const block_;
+   PartialCoalescenceMaskField maskField_;
+   PartialCoalescenceMaskFieldGPU maskFieldGPU_;
+
+#if defined(USE_CELL_INTERVALS)
+   const CellInterval interiorInterval;
+   std::vector< CellInterval > passThroughIntervals_;
+   std::vector< CellInterval > cornerSkippingOriginIntervals_;
+#endif
+};
+
+
+template< typename LatticeStorageSpecification_T >
+class NonuniformGPUCommDataHandling
+   : public blockforest::AlwaysInitializeBlockDataHandling< NonuniformGPUCommData< LatticeStorageSpecification_T > >
+{
+ public:
+   using CommmData_T = NonuniformGPUCommData< LatticeStorageSpecification_T >;
+
+   NonuniformGPUCommDataHandling(const weak_ptr< StructuredBlockForest >& blocks)
+      : blocks_(blocks){};
+
+   CommmData_T* initialize(IBlock* const block) override
+   {
+      WALBERLA_ASSERT_NOT_NULLPTR(block)
+      auto blocks = blocks_.lock();
+      WALBERLA_CHECK_NOT_NULLPTR(blocks)
+
+      return new CommmData_T(block, blocks->getNumberOfXCells(*block), blocks->getNumberOfYCells(*block),
+                             blocks->getNumberOfZCells(*block));
+   }
+
+ private:
+   const weak_ptr< StructuredBlockStorage > blocks_;
+};
+
+} // walberla::lbm_generated
+
+#include "lbm_generated/gpu/NonuniformGPUCommData.impl.h"
diff --git a/src/lbm_generated/gpu/NonuniformGPUCommData.impl.h b/src/lbm_generated/gpu/NonuniformGPUCommData.impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..47d6f033046b46d9d6156b6c91c0ffff6e82cf91
--- /dev/null
+++ b/src/lbm_generated/gpu/NonuniformGPUCommData.impl.h
@@ -0,0 +1,322 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NonuniformGPUCommData.impl.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "blockforest/all.h"
+
+#include "lbm_generated/gpu/NonuniformGPUCommData.h"
+
+#include "stencil/Directions.h"
+
+#define IDX_FLAG(d) (1 << d)
+
+#if !defined(USE_CELL_INTERVALS)
+#define INTERIOR_FLAG_BIT 29
+#define INTERIOR_FLAG (1 << INTERIOR_FLAG_BIT)
+
+#define PASS_THROUGH_FLAG_BIT 30
+#define PASS_THROUGH_FLAG (1 << PASS_THROUGH_FLAG_BIT)
+
+#define CORNER_SKIPPING_ORIGIN_FLAG_BIT 31
+#define CORNER_SKIPPING_ORIGIN_FLAG (1 << CORNER_SKIPPING_ORIGIN_FLAG_BIT)
+#endif
+
+using namespace walberla::lbm_generated::util;
+
+namespace walberla::lbm_generated {
+
+/***********************************************************************************************************************
+ *                                               Bit Mask Computation                                                  *
+ **********************************************************************************************************************/
+
+template< typename LatticeStorageSpecification_T >
+void NonuniformGPUCommData< LatticeStorageSpecification_T >::registerFlags()
+{
+#if !defined(USE_CELL_INTERVALS)
+   maskField_.registerFlag(FlagUID(true), INTERIOR_FLAG_BIT);
+   maskField_.registerFlag(FlagUID(true), PASS_THROUGH_FLAG_BIT);
+   maskField_.registerFlag(FlagUID(true), CORNER_SKIPPING_ORIGIN_FLAG_BIT);
+#endif
+
+   for(auto it = Stencil::beginNoCenter(); it != Stencil::end(); ++it){
+      maskField_.registerFlag(FlagUID(true), Stencil::idx[*it]);
+   }
+}
+
+#if defined(USE_CELL_INTERVALS)
+
+template< typename LatticeStorageSpecification_T >
+inline void NonuniformGPUCommData< LatticeStorageSpecification_T >::prepareIntervals()
+{
+   passThroughIntervals_.clear();
+   const Block * b = dynamic_cast< const Block * >(block_);
+
+   for(auto commDir = CommunicationStencil::beginNoCenter(); commDir != CommunicationStencil::end(); ++commDir){
+      uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(*commDir);
+      if(!b->neighborhoodSectionHasEquallySizedBlock(nSecIdx)){
+         CellInterval ci;
+         maskField_.getGhostRegion(*commDir, ci, 2);
+         passThroughIntervals_.push_back(ci);
+      }
+   }
+}
+
+template< typename LatticeStorageSpecification_T >
+inline void NonuniformGPUCommData< LatticeStorageSpecification_T >::setFlagOnInterval(const CellInterval & ci,
+                                                                                   const uint_t fIdx)
+{
+   for(auto c : ci){
+      maskField_.addFlag(c, IDX_FLAG(fIdx));
+   }
+}
+
+#else
+
+/**
+ * Prepares the INTERIOR and PASS_THROUGH flags.
+ * Sets the domain interior to INTERIOR. Sets any ghost layers corresponding to a coarse block
+ * or no block to PASS_THROUGH.
+ */
+template< typename LatticeStorageSpecification_T >
+void NonuniformCommData< LatticeStorageSpecification_T >::prepareFlags()
+{
+   const Block * b = dynamic_cast< const Block * >(block_);
+
+   // Set interior to origin
+   for (auto it = maskField_.beginXYZ(); it != maskField_.end(); ++it)
+   {
+      maskField_.addFlag(it.cell(), INTERIOR_FLAG);
+   }
+
+   // Set GLs to pass-through
+   for(auto commDir = CommunicationStencil::beginNoCenter(); commDir != CommunicationStencil::end(); ++commDir){
+      uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(*commDir);
+      if(!b->neighborhoodSectionHasEquallySizedBlock(nSecIdx)){
+         for(auto it = maskField_.beginGhostLayerOnlyXYZ(2, *commDir); it != maskField_.end(); ++it){
+            maskField_.addFlag(it.cell(), PASS_THROUGH_FLAG);
+         }
+      }
+   }
+}
+
+/**
+ * Resets the origin flag on any ghost layers.
+ */
+template< typename LatticeStorageSpecification_T >
+inline void NonuniformCommData< LatticeStorageSpecification_T >::resetCornerSkippingOriginFlags()
+{
+   const Block * b = dynamic_cast< const Block * >(block_);
+
+   // Remove origin flag from any ghost layers
+   for(auto commDir = CommunicationStencil::beginNoCenter(); commDir != CommunicationStencil::end(); ++commDir){
+      uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(*commDir);
+      if(!b->neighborhoodSectionHasEquallySizedBlock(nSecIdx)){
+         for(auto it = maskField_.beginGhostLayerOnlyXYZ(2, *commDir); it != maskField_.end(); ++it){
+            maskField_.removeFlag(it.cell(), CORNER_SKIPPING_ORIGIN_FLAG);
+         }
+      }
+   }
+}
+
+#endif
+
+
+/**
+ * Determines whether the current block has the smallest BlockID among all fine blocks of a
+ * given intersection volume.
+ * @tparam LatticeStorageSpecification_T
+ * @param cornerDir
+ * @return
+ */
+template< typename LatticeStorageSpecification_T >
+inline bool NonuniformGPUCommData< LatticeStorageSpecification_T >::haveSmallestIdInIntersection(Vector3<cell_idx_t> cornerDir)
+{
+   const IBlockID& myId = block_->getId();
+   const Block* b = dynamic_cast< const Block* >(block_);
+   return forEachSubdirectionCancel(cornerDir, [&](Vector3< cell_idx_t > dirVec) {
+     const uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(dirVec[0], dirVec[1], dirVec[2]);
+     if (b->neighborhoodSectionHasEquallySizedBlock(nSecIdx))
+     {
+        if (b->getNeighbor(nSecIdx, 0).getId() < myId) return false;
+     }
+     return true;
+   });
+}
+
+
+/**
+ * Sets up the feasible space for the given communication direction.
+ * Additionally to the field interior, marks every ghost layer slice corresponding to an adjacent coarse block,
+ * and the corresponding corner as feasible, if that corner also belongs to a coarse block and the current block
+ * has the smallest BlockID participating in the intersection.
+ * @param commDir A communication direction pointing toward an adjacent coarse block
+ */
+template< typename LatticeStorageSpecification_T >
+inline void NonuniformGPUCommData< LatticeStorageSpecification_T >::setupCornerSkippingOrigins(stencil::Direction commDir)
+{
+#if defined(USE_CELL_INTERVALS)
+   cornerSkippingOriginIntervals_.clear();
+#else
+   resetCornerSkippingOriginFlags();
+#endif
+
+   const Block* b = dynamic_cast< const Block* >(block_);
+   Vector3<cell_idx_t> commDirVec(stencil::cx[commDir], stencil::cy[commDir], stencil::cz[commDir]);
+
+   // Iterate all orthogonal comm directions
+   forEachOrthogonalDirection< CommunicationStencil >(commDirVec, [&](Vector3< cell_idx_t > toSourceVec) {
+      const uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(toSourceVec[0], toSourceVec[1], toSourceVec[2]);
+      // Find if there is a coarse block or no block at all in this neighborhood
+      // There are three possibilities: Coarse block, Same-level block or no block
+      // Finer block is not possible because of 2:1 balance
+      if (!b->neighborhoodSectionHasEquallySizedBlock(nSecIdx))
+      {
+         // From this adjacent coarse block (or not-block, for boundary handling), corner skipping must be handled.
+         // Also, if there is no block, boundary handling in that region must be done on only
+         // one of the participating fine blocks.
+         Vector3< cell_idx_t > cornerDirVec = toSourceVec + commDirVec;
+
+         // If the current block has the smallest participating ID...
+         if (haveSmallestIdInIntersection(cornerDirVec))
+         {
+            const stencil::Direction toSourceDir = stencil::vectorToDirection(toSourceVec);
+
+            // ... Mark source GL region as corner skipping origin.
+#if defined(USE_CELL_INTERVALS)
+            CellInterval ci;
+            maskField_.getGhostRegion(toSourceDir, ci, 2);
+            cornerSkippingOriginIntervals_.push_back(ci);
+#else
+            for (auto it = maskField_.beginGhostLayerOnlyXYZ(toSourceDir); it != maskField_.end(); ++it)
+            {
+               maskField_.addFlag(it.cell(), CORNER_SKIPPING_ORIGIN_FLAG);
+            }
+#endif
+         }
+      }
+   });
+}
+
+
+template< typename LatticeStorageSpecification_T >
+inline void NonuniformGPUCommData< LatticeStorageSpecification_T >::setupBitMaskSlice(stencil::Direction commDir, stencil::Direction streamDir)
+{
+   uint_t fIdx = Stencil::idx[streamDir];
+   Cell streamVec(stencil::cx[streamDir], stencil::cy[streamDir], stencil::cz[streamDir]);
+
+#if defined(USE_CELL_INTERVALS)
+   CellInterval commSliceInterval;
+   maskField_.getGhostRegion(commDir, commSliceInterval, 2);
+
+   // Shift back once
+   commSliceInterval.shift(-streamVec);
+
+   // Intersect with interior and set flag on intersection volume
+   CellInterval interiorIntersection(interiorInterval);
+   interiorIntersection.intersect(commSliceInterval);
+   if(!interiorIntersection.empty()){
+      interiorIntersection.shift(streamVec);
+      setFlagOnInterval(interiorIntersection, fIdx);
+   }
+
+   // Intersect with pass-through regions...
+   for(auto passThroughIntersection : std::as_const(passThroughIntervals_)){
+      passThroughIntersection.intersect(commSliceInterval);
+      if(passThroughIntersection.empty()) continue;
+
+      // ... shift back once more ...
+      passThroughIntersection.shift(-streamVec);
+
+      // ... intersect with interior ...
+      interiorIntersection = interiorInterval;
+      interiorIntersection.intersect(passThroughIntersection);
+      if(!interiorIntersection.empty()){
+         interiorIntersection.shift(2*streamVec.x(), 2* streamVec.y(), 2*streamVec.z());
+         setFlagOnInterval(interiorIntersection, fIdx);
+      }
+
+      // ... and with corner-skipping origin regions
+      for(auto originIntersection : std::as_const(cornerSkippingOriginIntervals_)){
+         originIntersection.intersect(passThroughIntersection);
+         if(!originIntersection.empty()){
+            originIntersection.shift(2*streamVec.x(), 2* streamVec.y(), 2*streamVec.z());
+            setFlagOnInterval(originIntersection, fIdx);
+         }
+      }
+   }
+#else
+   for(auto it = maskField_.beginGhostLayerOnlyXYZ(2, commDir); it != maskField_.end(); ++it){
+      Cell currentCell = it.cell();
+
+      // Shift back once
+      Cell shiftedCell = currentCell - streamVec;
+
+      if (maskField_.isFlagSet(shiftedCell, INTERIOR_FLAG)){
+         maskField_.addFlag(currentCell, IDX_FLAG(fIdx));
+      }
+      else if (maskField_.isFlagSet(shiftedCell, PASS_THROUGH_FLAG)){
+         // Shift back twice
+         shiftedCell -= streamVec;
+         if (maskField_.isPartOfMaskSet(shiftedCell, INTERIOR_FLAG | CORNER_SKIPPING_ORIGIN_FLAG)){
+            maskField_.addFlag(currentCell, IDX_FLAG(fIdx));
+         }
+
+      }
+      // else continue;
+   }
+#endif
+}
+
+/**
+ * Computes the partial coalescence bit mask on the mask field.
+ * Assumes that all flags are already registered at the field, and that the field
+ * has been initialized to zero.
+ */
+template< typename LatticeStorageSpecification_T >
+void NonuniformGPUCommData< LatticeStorageSpecification_T >::computeBitMask()
+{
+#if defined(USE_CELL_INTERVALS)
+   prepareIntervals();
+#else
+   prepareFlags();
+#endif
+
+   const Block* b = dynamic_cast< const Block* >(block_);
+   for(auto commIt = CommunicationStencil::beginNoCenter(); commIt != CommunicationStencil::end(); ++commIt){
+      stencil::Direction commDir = *commIt;
+      const uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(commDir);
+      if(b->neighborhoodSectionHasLargerBlock(nSecIdx)){
+         setupCornerSkippingOrigins(commDir);
+
+         for(uint_t streamDirIdx = 0; streamDirIdx < Stencil::d_per_d_length[commDir]; streamDirIdx++){
+            stencil::Direction streamDir = Stencil::d_per_d[commDir][streamDirIdx];
+            setupBitMaskSlice(commDir, streamDir);
+         }
+      }
+   }
+}
+
+template< typename LatticeStorageSpecification_T >
+void NonuniformGPUCommData< LatticeStorageSpecification_T >::syncDataGPU()
+{
+   gpu::fieldCpy(maskFieldGPU_, maskField_);
+}
+} // walberla::lbm_generated
diff --git a/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h b/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..d6ac87010a6889b899380514ec51d717159bd6f8
--- /dev/null
+++ b/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h
@@ -0,0 +1,332 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NonuniformGeneratedGPUPdfPackInfo.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "core/DataTypes.h"
+
+#include "gpu/GPUWrapper.h"
+#include "gpu/communication/GeneratedNonUniformGPUPackInfo.h"
+
+#include "lbm_generated/gpu/NonuniformGPUCommData.h"
+#include "lbm_generated/field/PdfField.h"
+
+namespace walberla::lbm_generated
+{
+using stencil::Direction;
+
+namespace internal
+{
+/*
+ * Base Template for Packing Kernels Wrapper. This wrapper is required for passing the time step to
+ * kernels generated for in-place streaming patterns. The generated code should not be templated.
+ */
+template< typename PdfField_T, bool inplace >
+class NonuniformGPUPackingKernelsWrapper
+{
+ public:
+   void packAll(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, gpuStream_t stream = nullptr) const  = 0;
+   void unpackAll(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, gpuStream_t stream = nullptr) const = 0;
+   void localCopyAll(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+                     CellInterval dstInterval, gpuStream_t stream = nullptr) const                                    = 0;
+
+   void packDirection(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, Direction dir, gpuStream_t stream = nullptr) const  = 0;
+   void unpackDirection(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, Direction dir, gpuStream_t stream = nullptr) const = 0;
+   void localCopyDirection(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+                           CellInterval dstInterval, Direction dir, gpuStream_t stream) const               = 0;
+
+   void unpackRedistribute(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer,
+                           stencil::Direction dir, gpuStream_t stream = nullptr) const = 0;
+
+   void packPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskFieldGPU* maskField, CellInterval ci,
+                               unsigned char* outBuffer, Direction dir, gpuStream_t stream = nullptr) const                                   = 0;
+   void zeroCoalescenceRegion(PdfField_T* dstField, CellInterval ci, Direction dir) const                      = 0;
+   void unpackCoalescence(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, Direction dir, gpuStream_t stream = nullptr) const = 0;
+
+   uint_t size(CellInterval ci, Direction dir) const                   = 0;
+   uint_t size(CellInterval ci) const                                  = 0;
+   uint_t redistributeSize(CellInterval ci) const                      = 0;
+   uint_t partialCoalescenceSize(CellInterval ci, Direction dir) const = 0;
+};
+
+/*
+ * Template Specialization for two-fields patterns, with trivial method wrappers.
+ */
+template< typename PdfField_T >
+class NonuniformGPUPackingKernelsWrapper< PdfField_T, false >
+{
+ public:
+   using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
+   using PackingKernels_T              = typename LatticeStorageSpecification_T::PackKernels;
+
+   void packAll(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, gpuStream_t stream = nullptr) const
+   {
+      kernels_.packAll(srcField, ci, outBuffer, stream);
+   }
+
+   void unpackAll(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, gpuStream_t stream = nullptr) const
+   {
+      kernels_.unpackAll(dstField, ci, inBuffer, stream);
+   }
+
+   void localCopyAll(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+                     CellInterval dstInterval, gpuStream_t stream = nullptr) const
+   {
+      kernels_.localCopyAll(srcField, srcInterval, dstField, dstInterval, stream);
+   }
+
+   void packDirection(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, Direction dir, gpuStream_t stream = nullptr) const
+   {
+      kernels_.packDirection(srcField, ci, outBuffer, dir, stream);
+   }
+
+   void unpackDirection(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, Direction dir, gpuStream_t stream = nullptr) const
+   {
+      kernels_.unpackDirection(dstField, ci, inBuffer, dir, stream);
+   }
+
+   void localCopyDirection(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+                           CellInterval dstInterval, Direction dir, gpuStream_t stream) const
+   {
+      kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir, stream);
+   }
+
+   void unpackRedistribute(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer,
+                           stencil::Direction dir, gpuStream_t stream = nullptr) const
+   {
+      kernels_.unpackRedistribute(dstField, ci, inBuffer, dir, stream);
+   }
+
+   void packPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskFieldGPU* maskField, CellInterval ci,
+                               unsigned char* outBuffer, Direction dir, gpuStream_t stream = nullptr) const
+   {
+      kernels_.packPartialCoalescence(srcField, maskField, ci, outBuffer, dir, stream);
+   }
+
+   void unpackCoalescence(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, Direction dir, gpuStream_t stream = nullptr) const
+   {
+      kernels_.unpackCoalescence(dstField, ci, inBuffer, dir, stream);
+   }
+
+   void zeroCoalescenceRegion(PdfField_T* dstField, CellInterval ci, Direction dir, gpuStream_t stream = nullptr) const
+   {
+      kernels_.zeroCoalescenceRegion(dstField, ci, dir, stream);
+   }
+
+   uint_t size(CellInterval ci, Direction dir) const { return kernels_.size(ci, dir); }
+   uint_t size(CellInterval ci) const { return kernels_.size(ci); }
+   uint_t redistributeSize(CellInterval ci) const { return kernels_.redistributeSize(ci); }
+   uint_t partialCoalescenceSize(CellInterval ci, Direction dir) const
+   {
+      return kernels_.partialCoalescenceSize(ci, dir);
+   }
+
+ private:
+   PackingKernels_T kernels_;
+};
+
+/*
+ * Template Specialization for in-place patterns, extracting the timestep from the lattice model.
+ */
+template< typename PdfField_T >
+class NonuniformGPUPackingKernelsWrapper< PdfField_T, true >
+{
+ public:
+   using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
+   using PackingKernels_T              = typename LatticeStorageSpecification_T::PackKernels;
+
+   void packAll(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, gpuStream_t stream = nullptr) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      kernels_.packAll(srcField, ci, outBuffer, timestep, stream);
+   }
+
+   void unpackAll(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, gpuStream_t stream = nullptr) const
+   {
+      uint8_t timestep = dstField->getTimestep();
+      kernels_.unpackAll(dstField, ci, inBuffer, timestep, stream);
+   }
+
+   void localCopyAll(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+                     CellInterval dstInterval, gpuStream_t stream = nullptr) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      WALBERLA_ASSERT_EQUAL(timestep, dstField->getTimestep())
+      kernels_.localCopyAll(srcField, srcInterval, dstField, dstInterval, timestep, stream);
+   }
+
+   void packDirection(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, Direction dir, gpuStream_t stream = nullptr) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      kernels_.packDirection(srcField, ci, outBuffer, dir, timestep, stream);
+   }
+
+   void unpackDirection(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, Direction dir, gpuStream_t stream = nullptr) const
+   {
+      uint8_t timestep = dstField->getTimestep();
+      kernels_.unpackDirection(dstField, ci, inBuffer, dir, timestep, stream);
+   }
+
+   void localCopyDirection(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+                           CellInterval dstInterval, Direction dir, gpuStream_t stream) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      WALBERLA_ASSERT_EQUAL(timestep, dstField->getTimestep())
+      kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir, timestep, stream);
+   }
+
+   void unpackRedistribute(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer,
+                           stencil::Direction dir, gpuStream_t stream = nullptr) const
+   {
+      uint8_t timestep = dstField->getTimestep();
+      kernels_.unpackRedistribute(dstField, ci, inBuffer, dir, timestep, stream);
+   }
+
+   void packPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskFieldGPU* maskField, CellInterval ci,
+                               unsigned char* outBuffer, Direction dir, gpuStream_t stream = nullptr) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      kernels_.packPartialCoalescence(srcField, maskField, ci, outBuffer, dir, timestep, stream);
+   }
+
+   void zeroCoalescenceRegion(PdfField_T* dstField, CellInterval ci, Direction dir, gpuStream_t stream = nullptr) const
+   {
+      uint8_t timestep = dstField->getTimestep();
+      kernels_.zeroCoalescenceRegion(dstField, ci, dir, timestep, stream);
+   }
+
+   void unpackCoalescence(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, Direction dir, gpuStream_t stream = nullptr) const
+   {
+      uint8_t timestep = dstField->getTimestep();
+      kernels_.unpackCoalescence(dstField, ci, inBuffer, dir, timestep, stream);
+   }
+
+   uint_t size(CellInterval ci, Direction dir) const { return kernels_.size(ci, dir); }
+   uint_t size(CellInterval ci) const { return kernels_.size(ci); }
+   uint_t redistributeSize(CellInterval ci) const { return kernels_.redistributeSize(ci); }
+   uint_t partialCoalescenceSize(CellInterval ci, Direction dir) const
+   {
+      return kernels_.partialCoalescenceSize(ci, dir);
+   }
+
+ private:
+   PackingKernels_T kernels_;
+};
+} // namespace internal
+
+/***********************************************************************************************************************
+ *                                                  Class Declaration                                                  *
+ **********************************************************************************************************************/
+
+template< typename PdfField_T >
+class NonuniformGeneratedGPUPdfPackInfo : public walberla::gpu::GeneratedNonUniformGPUPackInfo
+{
+ public:
+   using VoidFunction                  = std::function< void(gpuStream_t) >;
+   using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
+   using PackingKernels_T              = typename LatticeStorageSpecification_T::PackKernels;
+   using Stencil                       = typename LatticeStorageSpecification_T::Stencil;
+   using CommunicationStencil          = typename LatticeStorageSpecification_T::CommunicationStencil;
+   using CommData_T                    = NonuniformGPUCommData< LatticeStorageSpecification_T >;
+
+   NonuniformGeneratedGPUPdfPackInfo(const BlockDataID pdfFieldID, const BlockDataID commDataID)
+      : pdfFieldID_(pdfFieldID), commDataID_(commDataID){};
+
+   bool constantDataExchange() const override { return true; };
+   bool threadsafeReceiving() const override { return false; };
+
+   /// Equal Level
+   void unpackDataEqualLevel(Block* receiver, Direction dir, GpuBuffer_T& buffer) override;
+   void communicateLocalEqualLevel(const Block* sender, Block* receiver, stencil::Direction dir,
+                                   gpuStream_t stream) override;
+   void getLocalEqualLevelCommFunction(std::vector< VoidFunction >& commFunctions, const Block* sender, Block* receiver,
+                                       stencil::Direction dir) override;
+
+   /// Coarse to Fine
+   void unpackDataCoarseToFine(Block* fineReceiver, const BlockID& coarseSender, stencil::Direction dir,
+                               GpuBuffer_T& buffer) override;
+   void communicateLocalCoarseToFine(const Block* coarseSender, Block* fineReceiver, stencil::Direction dir) override;
+   void communicateLocalCoarseToFine(const Block* coarseSender, Block* fineReceiver, stencil::Direction dir,
+                                     GpuBuffer_T& buffer, gpuStream_t stream) override;
+   void getLocalCoarseToFineCommFunction(std::vector< VoidFunction >& commFunctions, const Block* coarseSender,
+                                         Block* fineReceiver, stencil::Direction dir, GpuBuffer_T& buffer) override;
+
+   /// Fine to Coarse
+   void prepareCoalescence(Block* coarseReceiver, gpuStream_t gpuStream = nullptr);
+   void unpackDataFineToCoarse(Block* coarseReceiver, const BlockID& fineSender, stencil::Direction dir,
+                               GpuBuffer_T& buffer) override;
+
+   void communicateLocalFineToCoarse(const Block* fineSender, Block* coarseReceiver, stencil::Direction dir) override;
+   void communicateLocalFineToCoarse(const Block* fineSender, Block* coarseReceiver, stencil::Direction dir,
+                                     GpuBuffer_T& buffer, gpuStream_t stream) override;
+   void getLocalFineToCoarseCommFunction(std::vector< VoidFunction >& commFunctions, const Block* fineSender,
+                                         Block* coarseReceiver, stencil::Direction dir, GpuBuffer_T& buffer) override;
+
+   uint_t sizeEqualLevelSend(const Block* sender, stencil::Direction dir) override;
+   uint_t sizeCoarseToFineSend(const Block* coarseSender, const BlockID& fineReceiver, stencil::Direction dir) override;
+   uint_t sizeFineToCoarseSend(const Block* fineSender, stencil::Direction dir) override;
+
+ protected:
+   void packDataEqualLevelImpl(const Block* sender, stencil::Direction dir, GpuBuffer_T& buffer) const override;
+
+   void packDataCoarseToFineImpl(const Block* coarseSender, const BlockID& fineReceiver, stencil::Direction dir,
+                                 GpuBuffer_T& buffer) const override;
+   void packDataFineToCoarseImpl(const Block* fineSender, const BlockID& coarseReceiver, stencil::Direction dir,
+                                 GpuBuffer_T& buffer) const override;
+
+ private:
+   /// Helper Functions
+   /// As in PdfFieldPackInfo.h
+   Vector3< cell_idx_t > getNeighborShift(const BlockID& fineBlock, stencil::Direction dir) const;
+   bool areNeighborsInDirection(const Block* block, const BlockID& neighborID,
+                                const Vector3< cell_idx_t > dirVec) const;
+
+   CellInterval intervalHullInDirection(const CellInterval& ci, const Vector3< cell_idx_t > tangentialDir,
+                                        cell_idx_t width) const;
+   bool skipsThroughCoarseBlock(const Block* block, const Direction dir) const;
+
+   void getCoarseBlockCommIntervals(const BlockID& fineBlockID, const Direction dir, const PdfField_T* field,
+                                    std::vector< std::pair< Direction, CellInterval > >& intervals) const;
+   void getFineBlockCommIntervals(const BlockID& fineBlockID, const Direction dir, const PdfField_T* field,
+                                  std::vector< std::pair< Direction, CellInterval > >& intervals) const;
+
+   CellInterval getCoarseBlockCoalescenceInterval(const Block* coarseBlock, const BlockID& fineBlockID, Direction dir,
+                                                  const PdfField_T* field) const;
+
+   const BlockDataID pdfFieldID_;
+   internal::NonuniformGPUPackingKernelsWrapper< PdfField_T, LatticeStorageSpecification_T::inplace > kernels_;
+
+ public:
+   const BlockDataID commDataID_;
+};
+
+/***********************************************************************************************************************
+ *                                                  Factory Functions                                                  *
+ **********************************************************************************************************************/
+
+template< typename PdfField_T >
+std::shared_ptr< NonuniformGeneratedGPUPdfPackInfo< PdfField_T > >
+   setupNonuniformGPUPdfCommunication(const std::weak_ptr< StructuredBlockForest >& blocks,
+                                      const BlockDataID pdfFieldID,
+                                      const std::string& dataIdentifier = "NonuniformGPUCommData");
+
+} // namespace walberla::lbm_generated
+
+#include "lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.impl.h"
diff --git a/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.impl.h b/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..adfbb419a8d3a3c82217fecf974977b28bb2a19b
--- /dev/null
+++ b/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.impl.h
@@ -0,0 +1,713 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file NonuniformGeneratedGPUPdfPackInfo.impl.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "NonuniformGeneratedGPUPdfPackInfo.h"
+
+using namespace walberla::lbm_generated::util;
+
+namespace walberla::lbm_generated {
+
+/***********************************************************************************************************************
+ *                                                  Factory Functions                                                  *
+ **********************************************************************************************************************/
+
+
+/**
+ * Sets up a NonuniformGeneratedPdfPackInfo.
+ *
+ * @tparam LatticeStorageSpecification_T
+ * @tparam PackingKernels_T
+ * @param blocks
+ * @param pdfFieldID
+ * @param dataIdentifier
+ * @return
+ */
+template< typename PdfField_T>
+std::shared_ptr< NonuniformGeneratedGPUPdfPackInfo< PdfField_T > >
+   setupNonuniformGPUPdfCommunication( const std::weak_ptr< StructuredBlockForest > & blocks,
+                                 const BlockDataID pdfFieldID,
+                                 const std::string & dataIdentifier)
+{
+   using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
+
+   auto sbf = blocks.lock();
+   WALBERLA_CHECK_NOT_NULLPTR(sbf, "Trying to create Nonuniform GPU Packinfo for a block storage object that doesn't exist anymore" );
+
+   auto handling = std::make_shared<NonuniformGPUCommDataHandling< LatticeStorageSpecification_T > >(blocks);
+   BlockDataID commDataID = sbf->addBlockData(handling, dataIdentifier);
+
+   return std::make_shared<NonuniformGeneratedGPUPdfPackInfo< PdfField_T > >(pdfFieldID, commDataID);
+}
+
+
+/***********************************************************************************************************************
+ *                                          Equal Level Communication                                                  *
+ **********************************************************************************************************************/
+
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::unpackDataEqualLevel(Block* receiver,
+                                                                           Direction dir,
+                                                                           GpuBuffer_T & buffer)
+{
+   auto field = receiver->getData< PdfField_T >(pdfFieldID_);
+   CellInterval ci;
+   cell_idx_t gls = skipsThroughCoarseBlock(receiver, dir) ? 2 : 1;
+   field->getGhostRegion(dir, ci, gls, false);
+   uint_t size              = kernels_.size(ci, dir);
+   auto bufferPtr = buffer.advanceNoResize(size);
+   kernels_.unpackDirection(field, ci, bufferPtr, dir);
+}
+
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalEqualLevel(
+   const Block* sender, Block* receiver, stencil::Direction dir, gpuStream_t stream)
+{
+   auto srcField = const_cast< Block* >(sender)->getData< PdfField_T >(pdfFieldID_);
+   auto dstField = receiver->getData< PdfField_T >(pdfFieldID_);
+
+   CellInterval srcRegion;
+   CellInterval dstRegion;
+   cell_idx_t gls = skipsThroughCoarseBlock(sender, dir) ? 2 : 1;
+   srcField->getSliceBeforeGhostLayer(dir, srcRegion, gls, false);
+   dstField->getGhostRegion(stencil::inverseDir[dir], dstRegion, gls, false);
+   kernels_.localCopyDirection(srcField, srcRegion, dstField, dstRegion, dir, stream);
+}
+
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::getLocalEqualLevelCommFunction(
+   std::vector< VoidFunction >& commFunctions, const Block* sender, Block* receiver,
+   stencil::Direction dir)
+{
+   auto srcField = const_cast< Block* >(sender)->getData< PdfField_T >(pdfFieldID_);
+   auto dstField = receiver->getData< PdfField_T >(pdfFieldID_);
+
+   CellInterval srcRegion;
+   CellInterval dstRegion;
+   cell_idx_t gls = skipsThroughCoarseBlock(sender, dir) ? 2 : 1;
+   srcField->getSliceBeforeGhostLayer(dir, srcRegion, gls, false);
+   dstField->getGhostRegion(stencil::inverseDir[dir], dstRegion, gls, false);
+
+//   VoidFunction t = std::bind(kernels_.localCopyDirection,
+//                                         srcField, srcRegion, dstField, dstRegion, dir, std::placeholders::_1 );
+
+//   CellInterval test(srcRegion.min(), srcRegion.max());
+//   CellInterval test2(dstRegion.min(), dstRegion.max());
+
+
+   auto commFunction = [this, srcField, srcRegion, dstField, dstRegion, dir](gpuStream_t gpuStream)
+   {
+      kernels_.localCopyDirection(srcField, srcRegion, dstField, dstRegion, dir, gpuStream);
+   };
+   commFunctions.emplace_back(commFunction);
+}
+
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::packDataEqualLevelImpl(
+   const Block* sender, stencil::Direction dir, GpuBuffer_T & buffer) const
+{
+   auto field = const_cast< Block* >(sender)->getData< PdfField_T >(pdfFieldID_);
+   CellInterval ci;
+   cell_idx_t gls = skipsThroughCoarseBlock(sender, dir) ? 2 : 1;
+   field->getSliceBeforeGhostLayer(dir, ci, gls, false);
+   uint_t size              = kernels_.size(ci, dir);
+   auto bufferPtr = buffer.advanceNoResize(size);
+   kernels_.packDirection(field, ci, bufferPtr, dir);
+}
+
+/***********************************************************************************************************************
+ *                                          Coarse to Fine Communication                                               *
+ **********************************************************************************************************************/
+
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::packDataCoarseToFineImpl(
+   const Block* coarseSender, const BlockID& fineReceiver, stencil::Direction dir, GpuBuffer_T & buffer) const
+{
+   auto field = const_cast< Block* >(coarseSender)->getData< PdfField_T >(pdfFieldID_);
+
+   std::vector< std::pair< Direction, CellInterval > > intervals;
+   getCoarseBlockCommIntervals(fineReceiver, dir, field, intervals);
+
+   for (auto t : intervals)
+   {
+      CellInterval ci          = t.second;
+      uint_t size              = kernels_.size(ci);
+      auto bufferPtr = buffer.advanceNoResize(size);
+      kernels_.packAll(field, ci, bufferPtr);
+   }
+}
+
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::unpackDataCoarseToFine(
+   Block* fineReceiver, const BlockID& /*coarseSender*/, stencil::Direction dir, GpuBuffer_T & buffer)
+{
+   auto field = fineReceiver->getData< PdfField_T >(pdfFieldID_);
+
+   std::vector< std::pair< Direction, CellInterval > > intervals;
+   getFineBlockCommIntervals(fineReceiver->getId(), dir, field, intervals);
+
+   for (auto t : intervals)
+   {
+      Direction d              = t.first;
+      CellInterval ci          = t.second;
+      uint_t size              = kernels_.redistributeSize(ci);
+      auto bufferPtr = buffer.advanceNoResize(size);
+      kernels_.unpackRedistribute(field, ci, bufferPtr, d);
+   }
+}
+
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalCoarseToFine(
+   const Block* coarseSender, Block* fineReceiver, stencil::Direction dir)
+{
+   auto srcField = const_cast< Block* >(coarseSender)->getData< PdfField_T >(pdfFieldID_);
+   auto dstField = fineReceiver->getData< PdfField_T >(pdfFieldID_);
+
+   std::vector< std::pair< Direction, CellInterval > > srcIntervals;
+   getCoarseBlockCommIntervals(fineReceiver->getId(), dir, srcField, srcIntervals);
+
+   std::vector< std::pair< Direction, CellInterval > > dstIntervals;
+   getFineBlockCommIntervals(fineReceiver->getId(), stencil::inverseDir[dir], dstField, dstIntervals);
+
+   WALBERLA_ASSERT_EQUAL(srcIntervals.size(), dstIntervals.size())
+
+   for(size_t index = 0; index < srcIntervals.size(); index++)
+   {
+      CellInterval srcInterval = srcIntervals[index].second;
+
+      Direction const unpackDir      = dstIntervals[index].first;
+      CellInterval dstInterval = dstIntervals[index].second;
+
+      uint_t packSize      = kernels_.size(srcInterval);
+
+#ifndef NDEBUG
+      Direction const packDir        = srcIntervals[index].first;
+      WALBERLA_ASSERT_EQUAL(packDir, stencil::inverseDir[unpackDir])
+      uint_t unpackSize = kernels_.redistributeSize(dstInterval);
+      WALBERLA_ASSERT_EQUAL(packSize, unpackSize)
+#endif
+
+      // TODO: This is a dirty workaround. Code-generate direct redistribution!
+      unsigned char *buffer;
+      WALBERLA_GPU_CHECK( gpuMalloc( &buffer, packSize))
+      kernels_.packAll(srcField, srcInterval, buffer);
+      kernels_.unpackRedistribute(dstField, dstInterval, buffer, unpackDir);
+      WALBERLA_GPU_CHECK(gpuFree(buffer))
+   }
+}
+
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalCoarseToFine(
+   const Block* coarseSender, Block* fineReceiver, stencil::Direction dir, GpuBuffer_T & buffer, gpuStream_t stream)
+{
+   auto srcField = const_cast< Block* >(coarseSender)->getData< PdfField_T >(pdfFieldID_);
+   auto dstField = fineReceiver->getData< PdfField_T >(pdfFieldID_);
+
+   std::vector< std::pair< Direction, CellInterval > > srcIntervals;
+   getCoarseBlockCommIntervals(fineReceiver->getId(), dir, srcField, srcIntervals);
+
+   std::vector< std::pair< Direction, CellInterval > > dstIntervals;
+   getFineBlockCommIntervals(fineReceiver->getId(), stencil::inverseDir[dir], dstField, dstIntervals);
+
+   WALBERLA_ASSERT_EQUAL(srcIntervals.size(), dstIntervals.size())
+
+   for(size_t index = 0; index < srcIntervals.size(); index++)
+   {
+      CellInterval srcInterval = srcIntervals[index].second;
+
+      Direction const unpackDir      = dstIntervals[index].first;
+      CellInterval dstInterval = dstIntervals[index].second;
+
+      uint_t packSize      = kernels_.size(srcInterval);
+
+#ifndef NDEBUG
+      Direction const packDir        = srcIntervals[index].first;
+      WALBERLA_ASSERT_EQUAL(packDir, stencil::inverseDir[unpackDir])
+      uint_t unpackSize = kernels_.redistributeSize(dstInterval);
+      WALBERLA_ASSERT_EQUAL(packSize, unpackSize)
+#endif
+
+      auto bufferPtr = buffer.advanceNoResize(packSize);
+      kernels_.packAll(srcField, srcInterval, bufferPtr, stream);
+      kernels_.unpackRedistribute(dstField, dstInterval, bufferPtr, unpackDir, stream);
+   }
+}
+
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::getLocalCoarseToFineCommFunction(
+   std::vector< VoidFunction >& commFunctions,
+   const Block* coarseSender, Block* fineReceiver, stencil::Direction dir, GpuBuffer_T & buffer)
+{
+   auto srcField = const_cast< Block* >(coarseSender)->getData< PdfField_T >(pdfFieldID_);
+   auto dstField = fineReceiver->getData< PdfField_T >(pdfFieldID_);
+
+   std::vector< std::pair< Direction, CellInterval > > srcIntervals;
+   getCoarseBlockCommIntervals(fineReceiver->getId(), dir, srcField, srcIntervals);
+
+   std::vector< std::pair< Direction, CellInterval > > dstIntervals;
+   getFineBlockCommIntervals(fineReceiver->getId(), stencil::inverseDir[dir], dstField, dstIntervals);
+
+   WALBERLA_ASSERT_EQUAL(srcIntervals.size(), dstIntervals.size())
+
+   for(size_t index = 0; index < srcIntervals.size(); index++)
+   {
+      CellInterval srcInterval = srcIntervals[index].second;
+
+      Direction const unpackDir      = dstIntervals[index].first;
+      CellInterval dstInterval = dstIntervals[index].second;
+
+      uint_t packSize      = kernels_.size(srcInterval);
+
+#ifndef NDEBUG
+      Direction const packDir        = srcIntervals[index].first;
+      WALBERLA_ASSERT_EQUAL(packDir, stencil::inverseDir[unpackDir])
+      uint_t unpackSize = kernels_.redistributeSize(dstInterval);
+      WALBERLA_ASSERT_EQUAL(packSize, unpackSize)
+#endif
+
+      auto bufferPtr = buffer.advanceNoResize(packSize);
+
+      auto commFunction = [this, srcField, srcInterval, bufferPtr, dstField, dstInterval, unpackDir](gpuStream_t gpuStream)
+      {
+         kernels_.packAll(srcField, srcInterval, bufferPtr, gpuStream);
+         kernels_.unpackRedistribute(dstField, dstInterval, bufferPtr, unpackDir, gpuStream);
+      };
+      commFunctions.emplace_back(commFunction);
+   }
+}
+
+
+
+/***********************************************************************************************************************
+ *                                          Fine to Coarse Communication                                               *
+ **********************************************************************************************************************/
+
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::prepareCoalescence(Block* coarseReceiver, gpuStream_t gpuStream)
+{
+   auto dstField = coarseReceiver->getData<PdfField_T>(pdfFieldID_);
+
+   for(auto it = CommunicationStencil::beginNoCenter(); it != CommunicationStencil::end(); ++it){
+      uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(*it);
+      if(coarseReceiver->neighborhoodSectionHasSmallerBlocks(nSecIdx)){
+         CellInterval ci;
+         dstField->getSliceBeforeGhostLayer(*it, ci, 1);
+         kernels_.zeroCoalescenceRegion(dstField, ci, *it, gpuStream);
+      }
+   }
+}
+
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::unpackDataFineToCoarse(
+   Block* coarseReceiver, const walberla::BlockID& fineSender, walberla::stencil::Direction dir,
+   GpuBuffer_T & buffer)
+{
+   auto dstField = coarseReceiver->getData<PdfField_T>(pdfFieldID_);
+
+   CellInterval ci = getCoarseBlockCoalescenceInterval(coarseReceiver, fineSender, dir, dstField);
+   uint_t size = kernels_.size(ci, dir);
+   unsigned char* bufferPtr = buffer.advanceNoResize(size);
+   kernels_.unpackCoalescence(dstField, ci, bufferPtr, dir);
+}
+
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalFineToCoarse(
+   const Block* fineSender, Block* coarseReceiver, walberla::stencil::Direction dir)
+{
+   auto varFineSender = const_cast< Block * >(fineSender);
+   auto srcField   = varFineSender->getData< PdfField_T >(pdfFieldID_);
+   auto srcCommData   = varFineSender->getData< CommData_T >(commDataID_);
+   PartialCoalescenceMaskFieldGPU * maskField = &(srcCommData->getMaskFieldGPU());
+   auto dstField = coarseReceiver->getData<PdfField_T>(pdfFieldID_);
+   Direction invDir = stencil::inverseDir[dir];
+
+   CellInterval srcInterval;
+   srcField->getGhostRegion(dir, srcInterval, 2);
+   uint_t packSize = kernels_.partialCoalescenceSize(srcInterval, dir);
+
+   CellInterval dstInterval = getCoarseBlockCoalescenceInterval(coarseReceiver, fineSender->getId(),
+                                                                invDir, dstField);
+
+#ifndef NDEBUG
+   uint_t unpackSize = kernels_.size(dstInterval, invDir);
+   WALBERLA_ASSERT_EQUAL(packSize, unpackSize)
+#endif
+
+   // TODO: This is a dirty workaround. Code-generate direct redistribution!
+   unsigned char *buffer;
+   WALBERLA_GPU_CHECK( gpuMalloc( &buffer, packSize))
+   kernels_.packPartialCoalescence(srcField, maskField, srcInterval, buffer, dir);
+   kernels_.unpackCoalescence(dstField, dstInterval, buffer, invDir);
+   WALBERLA_GPU_CHECK(gpuFree(buffer))
+}
+
+
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalFineToCoarse(
+   const Block* fineSender, Block* coarseReceiver, walberla::stencil::Direction dir, GpuBuffer_T & buffer, gpuStream_t stream)
+{
+   auto varFineSender = const_cast< Block * >(fineSender);
+   auto srcField   = varFineSender->getData< PdfField_T >(pdfFieldID_);
+   auto srcCommData   = varFineSender->getData< CommData_T >(commDataID_);
+   PartialCoalescenceMaskFieldGPU * maskField = &(srcCommData->getMaskFieldGPU());
+   auto dstField = coarseReceiver->getData<PdfField_T>(pdfFieldID_);
+   Direction invDir = stencil::inverseDir[dir];
+
+   CellInterval srcInterval;
+   srcField->getGhostRegion(dir, srcInterval, 2);
+   uint_t packSize = kernels_.partialCoalescenceSize(srcInterval, dir);
+
+   CellInterval dstInterval = getCoarseBlockCoalescenceInterval(coarseReceiver, fineSender->getId(),
+                                                                invDir, dstField);
+
+#ifndef NDEBUG
+   uint_t unpackSize = kernels_.size(dstInterval, invDir);
+   WALBERLA_ASSERT_EQUAL(packSize, unpackSize)
+#endif
+
+   auto bufferPtr = buffer.advanceNoResize(packSize);
+   kernels_.packPartialCoalescence(srcField, maskField, srcInterval, bufferPtr, dir, stream);
+   kernels_.unpackCoalescence(dstField, dstInterval, bufferPtr, invDir, stream);
+}
+
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::getLocalFineToCoarseCommFunction(
+   std::vector< VoidFunction >& commFunctions,
+   const Block* fineSender, Block* coarseReceiver, walberla::stencil::Direction dir, GpuBuffer_T & buffer)
+{
+   auto varFineSender = const_cast< Block * >(fineSender);
+   auto srcField   = varFineSender->getData< PdfField_T >(pdfFieldID_);
+   auto srcCommData   = varFineSender->getData< CommData_T >(commDataID_);
+   PartialCoalescenceMaskFieldGPU * maskField = &(srcCommData->getMaskFieldGPU());
+   auto dstField = coarseReceiver->getData<PdfField_T>(pdfFieldID_);
+   Direction invDir = stencil::inverseDir[dir];
+
+   CellInterval srcInterval;
+   srcField->getGhostRegion(dir, srcInterval, 2);
+   uint_t packSize = kernels_.partialCoalescenceSize(srcInterval, dir);
+
+   CellInterval dstInterval = getCoarseBlockCoalescenceInterval(coarseReceiver, fineSender->getId(),
+                                                                invDir, dstField);
+
+#ifndef NDEBUG
+   uint_t unpackSize = kernels_.size(dstInterval, invDir);
+   WALBERLA_ASSERT_EQUAL(packSize, unpackSize)
+#endif
+
+   auto bufferPtr = buffer.advanceNoResize(packSize);
+   auto commFunction = [this, srcField, maskField, srcInterval, bufferPtr, dir, dstField, dstInterval, invDir](gpuStream_t gpuStream)
+   {
+      kernels_.packPartialCoalescence(srcField, maskField, srcInterval, bufferPtr, dir, gpuStream);
+      kernels_.unpackCoalescence(dstField, dstInterval, bufferPtr, invDir, gpuStream);
+   };
+   commFunctions.emplace_back(commFunction);
+}
+
+
+template< typename PdfField_T>
+uint_t NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::sizeEqualLevelSend( const Block * sender, stencil::Direction dir)
+{
+   auto field = const_cast< Block* >(sender)->getData< PdfField_T >(pdfFieldID_);
+   CellInterval ci;
+   cell_idx_t gls = skipsThroughCoarseBlock(sender, dir) ? 2 : 1;
+   field->getSliceBeforeGhostLayer(dir, ci, gls, false);
+   return kernels_.size(ci, dir);
+}
+
+
+
+template< typename PdfField_T>
+uint_t NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::sizeCoarseToFineSend ( const Block * coarseSender, const BlockID & fineReceiver, stencil::Direction dir)
+{
+   auto field = const_cast< Block* >(coarseSender)->getData< PdfField_T >(pdfFieldID_);
+
+   std::vector< std::pair< Direction, CellInterval > > intervals;
+   getCoarseBlockCommIntervals(fineReceiver, dir, field, intervals);
+
+   uint_t size = 0;
+
+   for (auto t : intervals)
+   {
+      CellInterval ci          = t.second;
+      size += kernels_.size(ci);
+   }
+   WALBERLA_ASSERT_GREATER(size, 0)
+   return size;
+}
+
+
+
+template< typename PdfField_T>
+uint_t NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::sizeFineToCoarseSend ( const Block * sender, stencil::Direction dir)
+{
+   auto field = const_cast< Block* >(sender)->getData< PdfField_T >(pdfFieldID_);
+
+   CellInterval ci;
+   field->getGhostRegion(dir, ci, 2);
+   return kernels_.partialCoalescenceSize(ci, dir);
+}
+
+
+
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::packDataFineToCoarseImpl(
+   const Block* fineSender, const walberla::BlockID& /*coarseReceiver*/, walberla::stencil::Direction dir,
+   GpuBuffer_T & buffer) const
+{
+   auto varBlock = const_cast< Block* >(fineSender);
+   auto srcField   = varBlock->getData< PdfField_T >(pdfFieldID_);
+   auto commData  = varBlock->getData< CommData_T >(commDataID_);
+   PartialCoalescenceMaskFieldGPU * maskField = &(commData->getMaskFieldGPU());
+
+   CellInterval ci;
+   srcField->getGhostRegion(dir, ci, 2);
+   uint_t size = kernels_.partialCoalescenceSize(ci, dir);
+   auto bufferPtr = buffer.advanceNoResize(size);
+   kernels_.packPartialCoalescence(srcField, maskField, ci, bufferPtr, dir);
+}
+
+/***********************************************************************************************************************
+ *                                                  Helper Functions                                                   *
+ **********************************************************************************************************************/
+
+template< typename PdfField_T>
+inline Vector3< cell_idx_t >
+   NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::getNeighborShift(const BlockID& fineBlock,
+                                                                     stencil::Direction dir) const
+{
+   // dir: direction from coarse to fine block, or vice versa
+   Vector3< cell_idx_t > shift;
+
+   uint_t const branchId = fineBlock.getBranchId();
+
+   shift[0] = (stencil::cx[dir] == 0) ? (((branchId & uint_t(1)) == uint_t(0)) ? cell_idx_t(-1) : cell_idx_t(1)) :
+              cell_idx_t(0);
+   shift[1] = (stencil::cy[dir] == 0) ? (((branchId & uint_t(2)) == uint_t(0)) ? cell_idx_t(-1) : cell_idx_t(1)) :
+              cell_idx_t(0);
+   shift[2] = (Stencil::D == uint_t(3)) ?
+              ((stencil::cz[dir] == 0) ? (((branchId & uint_t(4)) == uint_t(0)) ? cell_idx_t(-1) : cell_idx_t(1)) :
+               cell_idx_t(0)) :
+              cell_idx_t(0);
+
+   return shift;
+}
+
+/**
+ * Returns the part of a cell interval's hull of given width in direction dirVec.
+ * @param ci        The original cell interval
+ * @param dirVec    Direction Vector
+ * @param width     Width of the hull
+ * @return          Interval forming the part of the hull
+ */
+template< typename PdfField_T>
+inline CellInterval NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::intervalHullInDirection(
+   const CellInterval& ci, const Vector3< cell_idx_t > dirVec, cell_idx_t width) const
+{
+   CellInterval result(ci);
+   for (uint_t i = 0; i < Stencil::D; i++)
+   {
+      if (dirVec[i] == 1)
+      {
+         result.min()[i] = result.max()[i] + cell_idx_t(1);
+         result.max()[i] += width;
+      }
+      if (dirVec[i] == -1)
+      {
+         result.max()[i] = result.min()[i] - cell_idx_t(1);
+         result.min()[i] -= width;
+      }
+   }
+
+   return result;
+}
+
+/**
+ * For edge or corner directions, checks if a coarser block is part of the respective edge or corner intersection.
+ * @param block The local block
+ * @param dir   The direction to check
+ * @return      `true`  if dir is an edge or corner direction skipping through a coarser block.
+ */
+template< typename PdfField_T>
+inline bool NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::skipsThroughCoarseBlock(
+   const Block* block, const Direction dir) const
+{
+   Vector3< cell_idx_t > dirVec(stencil::cx[dir], stencil::cy[dir], stencil::cz[dir]);
+   bool coarseBlockFound = false;
+   forEachSubdirectionCancel(dirVec, [&](Vector3< cell_idx_t > subdir) {
+     coarseBlockFound =
+        coarseBlockFound || block->neighborhoodSectionHasLargerBlock(
+           blockforest::getBlockNeighborhoodSectionIndex(subdir[0], subdir[1], subdir[2]));
+     return !coarseBlockFound;
+   });
+
+   return coarseBlockFound;
+}
+
+/**
+ * For coarse-to-fine and fine-to-coarse communication, returns a list of pairs (Direction, CellInterval)
+ * mapping sub-directions of the communication direction to cell intervals on the coarse block interior
+ * whose data must be communicated <i>as if</i> communicating in those sub-directions.
+ * @param fineBlockID   ID of the fine block
+ * @param dir           Direction from the coarse to the fine block
+ * @param field         Pointer to the PDF field on the coarse block
+ * @param intervals     Vector that will be filled with the computed intervals
+ */
+template< typename PdfField_T>
+inline void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::getCoarseBlockCommIntervals(
+   const BlockID& fineBlockID, const Direction dir, const PdfField_T* field,
+   std::vector< std::pair< Direction, CellInterval > >& intervals) const
+{
+   Vector3< cell_idx_t > shift = getNeighborShift(fineBlockID, dir);
+
+   CellInterval mainSlice;
+   field->getSliceBeforeGhostLayer(dir, mainSlice, 1, false);
+
+   // In all directions, restrict the slice to the lower or upper half, depending on neighbor shift
+   for (uint_t i = 0; i != Stencil::D; ++i)
+   {
+      if (shift[i] == cell_idx_t(-1))
+      {
+         WALBERLA_ASSERT_EQUAL(mainSlice.size(i) & 1, 0)
+         mainSlice.max()[i] = mainSlice.min()[i] + cell_idx_c(mainSlice.size(i) / uint_t(2)) - cell_idx_t(1);
+      }
+      if (shift[i] == cell_idx_t(1))
+      {
+         WALBERLA_ASSERT_EQUAL(mainSlice.size(i) & 1, 0)
+         mainSlice.min()[i] = mainSlice.min()[i] + cell_idx_c(mainSlice.size(i) / uint_t(2));
+      }
+   }
+
+   intervals.emplace_back(dir, mainSlice);
+
+   Vector3< cell_idx_t > const commDirVec{ stencil::cx[dir], stencil::cy[dir], stencil::cz[dir] };
+
+   // Get extended slices in all tangential directions for the diagonal part of communication
+   forEachSubdirection(-shift, [&](Vector3< cell_idx_t > t) {
+     CellInterval hullInterval = intervalHullInDirection(mainSlice, t, cell_idx_t(1));
+     Direction subCommDir      = stencil::vectorToDirection(commDirVec - t);
+     if(CommunicationStencil::containsDir(subCommDir)){
+        intervals.emplace_back(subCommDir, hullInterval);
+     }
+   });
+}
+
+/**
+ * For coarse-to-fine and fine-to-coarse communication, returns a list of pairs (Direction, CellInterval)
+ * mapping sub-directions of the communication direction to cell intervals on the fine block whose data must
+ * be communicated <i>as if</i> communicating in those sub-directions.
+ * @param fineBlockID   ID of the fine block
+ * @param dir           Direction from the fine to the coarse block
+ * @param field         Pointer to the PDF Field on the fine block
+ * @param intervals     Vector that will be filled with the computed intervals
+ */
+template< typename PdfField_T>
+inline void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::getFineBlockCommIntervals(
+   const BlockID& fineBlockID, const Direction dir, const PdfField_T* field,
+   std::vector< std::pair< Direction, CellInterval > >& intervals) const
+{
+   Vector3< cell_idx_t > shift = getNeighborShift(fineBlockID, dir);
+
+   CellInterval mainSlice;
+   field->getGhostRegion(dir, mainSlice, 2, false);
+   intervals.emplace_back(dir, mainSlice);
+
+   Vector3< cell_idx_t > const commDirVec{ stencil::cx[dir], stencil::cy[dir], stencil::cz[dir] };
+
+   forEachSubdirection(-shift, [&](Vector3< cell_idx_t > t) {
+     CellInterval hullInterval = intervalHullInDirection(mainSlice, t, cell_idx_t(2));
+     Direction subCommDir      = stencil::vectorToDirection(commDirVec + t);
+     if(CommunicationStencil::containsDir(subCommDir)){
+        intervals.emplace_back(subCommDir, hullInterval);
+     }
+   });
+}
+/**
+ * Checks whether or not the block with ID `neighborID` is a neighbor of `block` in direction `dir`.
+ */
+template< typename PdfField_T>
+bool NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::areNeighborsInDirection(
+   const Block* block, const BlockID& neighborID, const Vector3< cell_idx_t> dirVec) const
+{
+   uint_t const nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(dirVec[0], dirVec[1], dirVec[2]);
+   uint_t const nSecSize = block->getNeighborhoodSectionSize(nSecIdx);
+
+   for(uint_t i = 0; i < nSecSize; i++){
+      if(block->getNeighborId(nSecIdx, i) == neighborID){
+         return true;
+      }
+   }
+   return false;
+}
+
+template< typename PdfField_T>
+CellInterval NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::getCoarseBlockCoalescenceInterval(
+   const Block* coarseBlock, const BlockID& fineBlockID, Direction dir, const PdfField_T* field) const
+{
+   Direction mainDir(dir);
+   Vector3< cell_idx_t > commDirVec(stencil::cx[dir], stencil::cy[dir], stencil::cz[dir]);
+   Vector3< cell_idx_t > mainDirVec(commDirVec);
+   bool isAsymmetric = !areNeighborsInDirection(coarseBlock, fineBlockID, commDirVec);
+
+   // If asymmetric, find the main subdirection
+   if(isAsymmetric){
+      mainDirVec = Vector3< cell_idx_t >(0);
+      forEachSubdirection(commDirVec, [&](Vector3< cell_idx_t > subdirVec){
+         if(areNeighborsInDirection(coarseBlock, fineBlockID, subdirVec)){
+            // -dir is one main communication direction from F to C, but, due to periodicity,
+            // it might not be the only one. Find the main comm direction from the subdirections
+            // that is largest in the 1-norm.
+            if(subdirVec.sqrLength() > mainDirVec.sqrLength()) mainDirVec = subdirVec;
+         }
+      });
+      mainDir = stencil::vectorToDirection(mainDirVec);
+   }
+
+   Vector3< cell_idx_t > shift = getNeighborShift(fineBlockID, mainDir);
+
+   CellInterval mainSlice;
+   field->getSliceBeforeGhostLayer(mainDir, mainSlice, 1, false);
+
+   // In all directions, restrict the slice to the lower or upper half, depending on neighbor shift
+   for (uint_t i = 0; i != Stencil::D; ++i)
+   {
+      if (shift[i] == cell_idx_t(-1))
+      {
+         WALBERLA_ASSERT_EQUAL(mainSlice.size(i) & 1, 0)
+         mainSlice.max()[i] = mainSlice.min()[i] + cell_idx_c(mainSlice.size(i) / uint_t(2)) - cell_idx_t(1);
+      }
+      if (shift[i] == cell_idx_t(1))
+      {
+         WALBERLA_ASSERT_EQUAL(mainSlice.size(i) & 1, 0)
+         mainSlice.min()[i] = mainSlice.min()[i] + cell_idx_c(mainSlice.size(i) / uint_t(2));
+      }
+   }
+
+   CellInterval commSlice(mainSlice);
+
+   // If asymmetric, find coalescence slice as hull of main slice
+   if(isAsymmetric){
+      commSlice = intervalHullInDirection(mainSlice, mainDirVec - commDirVec, 1);
+   }
+
+   return commSlice;
+}
+
+} // walberla::lbm_generated
diff --git a/src/lbm_generated/gpu/UniformGeneratedGPUPdfPackInfo.h b/src/lbm_generated/gpu/UniformGeneratedGPUPdfPackInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..894eb38034881feeda40c1a3d051455cbe98e173
--- /dev/null
+++ b/src/lbm_generated/gpu/UniformGeneratedGPUPdfPackInfo.h
@@ -0,0 +1,272 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file UniformGeneratedGPUPdfPackInfo.h
+//! \ingroup lbm
+//! \author Markus Holzer <markus.holzer@fau.de>
+//! \brief Class Template for Lattice Boltzmann PDF Pack Infos using code-generated kernels
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+
+#include "gpu/GPUWrapper.h"
+#include "gpu/communication/GeneratedGPUPackInfo.h"
+
+#include "lbm/field/PdfField.h"
+
+#include "stencil/Directions.h"
+
+namespace walberla
+{
+using gpu::GeneratedGPUPackInfo;
+
+namespace lbm_generated
+{
+using stencil::Direction;
+
+namespace internal
+{
+/*
+ * Base Template for Packing Kernels Wrapper. This wrapper is required for passing the time step to
+ * kernels generated for in-place streaming patterns. The generated code should not be templated.
+ */
+template< typename PdfField_T, bool inplace >
+class UniformPackingGPUKernelsWrapper
+{
+ public:
+   void packAll(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer, gpuStream_t stream) const  = 0;
+   void unpackAll(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, gpuStream_t stream) const = 0;
+   void localCopyAll(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField, CellInterval& dstInterval,
+                     gpuStream_t stream) const                                                               = 0;
+
+   void packDirection(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer, Direction dir,
+                      gpuStream_t stream) const                                                = 0;
+   void unpackDirection(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, Direction dir,
+                        gpuStream_t stream) const                                              = 0;
+   void localCopyDirection(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField,
+                           CellInterval& dstInterval, Direction dir, gpuStream_t stream) const = 0;
+
+   uint_t size(CellInterval& ci, Direction dir) const = 0;
+   uint_t size(CellInterval& ci) const                = 0;
+};
+
+/*
+ * Template Specialization for two-fields patterns, with trivial method wrappers.
+ */
+template< typename PdfField_T >
+class UniformPackingGPUKernelsWrapper< PdfField_T, false >
+{
+ public:
+   using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
+   using PackingKernels_T              = typename LatticeStorageSpecification_T::PackKernels;
+
+   void packAll(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer, gpuStream_t stream) const
+   {
+      kernels_.packAll(srcField, ci, outBuffer, stream);
+   }
+
+   void unpackAll(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, gpuStream_t stream) const
+   {
+      kernels_.unpackAll(dstField, ci, inBuffer, stream);
+   }
+
+   void localCopyAll(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField, CellInterval& dstInterval,
+                     gpuStream_t stream) const
+   {
+      kernels_.localCopyAll(srcField, srcInterval, dstField, dstInterval, stream);
+   }
+
+   void packDirection(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer, Direction dir,
+                      gpuStream_t stream) const
+   {
+      kernels_.packDirection(srcField, ci, outBuffer, dir, stream);
+   }
+
+   void unpackDirection(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, Direction dir,
+                        gpuStream_t stream) const
+   {
+      kernels_.unpackDirection(dstField, ci, inBuffer, dir, stream);
+   }
+
+   void localCopyDirection(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField,
+                           CellInterval& dstInterval, Direction dir, gpuStream_t stream) const
+   {
+      kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir, stream);
+   }
+
+   uint_t size(CellInterval& ci, Direction dir) const { return kernels_.size(ci, dir); }
+   uint_t size(CellInterval& ci) const { return kernels_.size(ci); }
+
+ private:
+   PackingKernels_T kernels_;
+};
+
+/*
+ * Template Specialization for in-place patterns, extracting the timestep from the lattice model.
+ */
+template< typename PdfField_T >
+class UniformPackingGPUKernelsWrapper< PdfField_T, true >
+{
+ public:
+   using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
+   using PackingKernels_T              = typename LatticeStorageSpecification_T::PackKernels;
+
+   void packAll(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer, gpuStream_t stream) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      kernels_.packAll(srcField, ci, outBuffer, timestep, stream);
+   }
+
+   void unpackAll(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, gpuStream_t stream) const
+   {
+      uint8_t timestep = dstField->getTimestep();
+      kernels_.unpackAll(dstField, ci, inBuffer, timestep, stream);
+   }
+
+   void localCopyAll(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField, CellInterval& dstInterval,
+                     gpuStream_t stream) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      WALBERLA_ASSERT_EQUAL(timestep, dstField->getTimestep())
+      kernels_.localCopyAll(srcField, srcInterval, dstField, dstInterval, timestep, stream);
+   }
+
+   void packDirection(PdfField_T* srcField, CellInterval& ci, unsigned char* outBuffer, Direction dir,
+                      gpuStream_t stream) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      kernels_.packDirection(srcField, ci, outBuffer, dir, timestep, stream);
+   }
+
+   void unpackDirection(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, Direction dir,
+                        gpuStream_t stream) const
+   {
+      uint8_t timestep = dstField->getTimestep();
+      kernels_.unpackDirection(dstField, ci, inBuffer, dir, timestep, stream);
+   }
+
+   void localCopyDirection(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField,
+                           CellInterval& dstInterval, Direction dir, gpuStream_t stream) const
+   {
+      uint8_t timestep = srcField->getTimestep();
+      WALBERLA_ASSERT_EQUAL(timestep, dstField->getTimestep())
+      kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir, timestep, stream);
+   }
+
+   uint_t size(CellInterval& ci, Direction dir) const { return kernels_.size(ci, dir); }
+   uint_t size(CellInterval& ci) const { return kernels_.size(ci); }
+
+ private:
+   PackingKernels_T kernels_;
+};
+} // namespace internal
+
+/**
+ * Pack Info class template for lattice Boltzmann PDF fields. Relies on a code-generated
+ * class providing kernel implementations for packing, unpacking and local copying of data.
+ *
+ * This template relies on a PackingKernels implementation generated by lbmpy_walberla.packing_kernels.
+ * The code generated part provides the kernels for transferring data between communication buffers
+ * and fields. The iteration slices are constructed by this class.
+ *
+ * The code-generated substructure enables the usage of arbitrary, in particular in-place streaming
+ * patterns.
+ *
+ * @tparam  PackingKernels_T Type of a PackingKernels implementation generated using
+ *          `lbmpy_walberla.generate_packing_kernels`.
+ *
+ * \ingroup lbm
+ */
+template< typename PdfField_T >
+class UniformGeneratedGPUPdfPackInfo : public GeneratedGPUPackInfo
+{
+ public:
+   using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
+   using PackingKernels_T              = typename LatticeStorageSpecification_T::PackKernels;
+   using Stencil                       = typename LatticeStorageSpecification_T::Stencil;
+
+   UniformGeneratedGPUPdfPackInfo(const BlockDataID pdfFieldID, cell_idx_t cellLayersToSend = 1, bool sendAll = false)
+      : pdfFieldID_(pdfFieldID), ghostLayersToSend_(cellLayersToSend), sendAll_(sendAll)
+   {}
+
+   void pack(stencil::Direction dir, unsigned char* buffer, IBlock* block, gpuStream_t stream) override;
+   void communicateLocal(stencil::Direction dir, const IBlock* sender, IBlock* receiver, gpuStream_t stream) override;
+   void unpack(stencil::Direction dir, unsigned char* buffer, IBlock* block, gpuStream_t stream) override;
+   uint_t size(stencil::Direction dir, IBlock* block) override;
+
+ private:
+   const BlockDataID pdfFieldID_;
+   internal::UniformPackingGPUKernelsWrapper< PdfField_T, LatticeStorageSpecification_T::inplace > kernels_;
+   cell_idx_t ghostLayersToSend_;
+   bool sendAll_;
+};
+
+template< typename PdfField_T >
+void UniformGeneratedGPUPdfPackInfo< PdfField_T >::unpack(stencil::Direction dir, unsigned char* buffer, IBlock* block,
+                                                          gpuStream_t stream)
+{
+   auto field = block->getData< PdfField_T >(pdfFieldID_);
+   CellInterval ci;
+   field->getGhostRegion(dir, ci, ghostLayersToSend_, false);
+
+   if (sendAll_) { kernels_.unpackAll(field, ci, buffer, stream); }
+   else { kernels_.unpackDirection(field, ci, buffer, dir, stream); }
+}
+
+template< typename PdfField_T >
+void UniformGeneratedGPUPdfPackInfo< PdfField_T >::pack(stencil::Direction dir, unsigned char* buffer, IBlock* block,
+                                                        gpuStream_t stream)
+{
+   auto field = const_cast< IBlock* >(block)->getData< PdfField_T >(pdfFieldID_);
+   CellInterval ci;
+   field->getSliceBeforeGhostLayer(dir, ci, ghostLayersToSend_, false);
+
+   if (sendAll_) { kernels_.packAll(field, ci, buffer, stream); }
+   else { kernels_.packDirection(field, ci, buffer, dir, stream); }
+}
+
+template< typename PdfField_T >
+void UniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocal(stencil::Direction dir, const IBlock* sender,
+                                                                    IBlock* receiver, gpuStream_t stream)
+{
+   auto srcField = const_cast< IBlock* >(sender)->getData< PdfField_T >(pdfFieldID_);
+   auto dstField = receiver->getData< PdfField_T >(pdfFieldID_);
+
+   CellInterval srcRegion;
+   CellInterval dstRegion;
+   srcField->getSliceBeforeGhostLayer(dir, srcRegion, ghostLayersToSend_, false);
+   dstField->getGhostRegion(stencil::inverseDir[dir], dstRegion, ghostLayersToSend_, false);
+
+   if (sendAll_) { kernels_.localCopyAll(srcField, srcRegion, dstField, dstRegion, stream); }
+   else { kernels_.localCopyDirection(srcField, srcRegion, dstField, dstRegion, dir, stream); }
+}
+
+template< typename PdfField_T >
+uint_t UniformGeneratedGPUPdfPackInfo< PdfField_T >::size(stencil::Direction dir, IBlock* block)
+{
+   auto field = block->getData< PdfField_T >(pdfFieldID_);
+   CellInterval ci;
+   field->getGhostRegion(dir, ci, 1, false);
+
+   uint_t elementsPerCell = kernels_.size(ci, dir);
+   return elementsPerCell;
+}
+
+} // namespace lbm_generated
+} // namespace walberla
\ No newline at end of file
diff --git a/src/lbm_generated/refinement/BasicRecursiveTimeStep.h b/src/lbm_generated/refinement/BasicRecursiveTimeStep.h
new file mode 100644
index 0000000000000000000000000000000000000000..6b0a2a7ece5768fb071776e6aa9d0ea05dc9b797
--- /dev/null
+++ b/src/lbm_generated/refinement/BasicRecursiveTimeStep.h
@@ -0,0 +1,97 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file BasicRecursiveTimeStep.h
+//! \author Frederik Hennig <frederik.hennig@fau.de>
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "blockforest/communication/NonUniformBufferedScheme.h"
+
+#include "lbm/field/PdfField.h"
+#include "lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h"
+
+#include "timeloop/SweepTimeloop.h"
+
+namespace walberla {
+
+using blockforest::communication::NonUniformBufferedScheme;
+
+namespace lbm_generated {
+
+/**
+ *
+ * @tparam LatticeStorageSpecification_T   Generated storage specification
+ * @tparam SweepCollection_T LBM SweepCollection (must be able to call stream, collide, streamCollide and streamOnlyNoAdvancement)
+ * @tparam BoundaryCollection_T LBM Boundary collection (Functor that runs all boundary kernels at call)
+ */
+template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T>
+class BasicRecursiveTimeStep
+{
+ public:
+   using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
+   using Stencil = typename LatticeStorageSpecification_T::Stencil;
+   using CommunicationStencil = typename LatticeStorageSpecification_T::CommunicationStencil;
+   using CommScheme = NonUniformBufferedScheme< CommunicationStencil >;
+   using PackInfo = lbm_generated::NonuniformGeneratedPdfPackInfo< PdfField_T >;
+
+   BasicRecursiveTimeStep(std::shared_ptr< StructuredBlockForest > & sbfs,
+                          const BlockDataID & pdfFieldId, SweepCollection_T & sweepCollection, BoundaryCollection_T & boundaryCollection,
+                          std::shared_ptr< CommScheme > & commScheme, std::shared_ptr< PackInfo > & pdfFieldPackInfo):
+      sbfs_(sbfs), pdfFieldId_(pdfFieldId), pdfFieldPackInfo_(pdfFieldPackInfo), commScheme_(commScheme),
+      sweepCollection_(sweepCollection), boundaryCollection_(boundaryCollection)
+      {
+#ifndef NDEBUG
+      for (auto& block : *sbfs)
+         WALBERLA_ASSERT(block.isDataOfType<PdfField_T>(pdfFieldId_), "Template parameter PdfField_T is of different type than BlockDataID pdfFieldId that is provided as constructor argument")
+#endif
+      maxLevel_ = sbfs->getDepth();
+
+      for (uint_t level = 0; level <= maxLevel_; level++)
+      {
+         std::vector<Block *> blocks;
+         sbfs->getBlocks(blocks, level);
+         blocks_.push_back(blocks);
+      }
+     };
+
+   void operator() () { timestep(0); };
+   void addRefinementToTimeLoop(timeloop::SweepTimeloop & timeloop, uint_t level=0);
+
+ private:
+   void timestep(uint_t level);
+   void ghostLayerPropagation(Block * block);
+   std::function<void()> executeStreamCollideOnLevel(uint_t level, bool withGhostLayerPropagation=false);
+   std::function<void()> executeBoundaryHandlingOnLevel(uint_t level);
+
+   std::shared_ptr< StructuredBlockForest > sbfs_;
+   uint_t maxLevel_;
+   std::vector<std::vector<Block *>> blocks_;
+
+   const BlockDataID pdfFieldId_;
+   std::shared_ptr< PackInfo > pdfFieldPackInfo_;
+   std::shared_ptr< CommScheme > commScheme_;
+
+   SweepCollection_T & sweepCollection_;
+   BoundaryCollection_T & boundaryCollection_;
+};
+
+} // namespace lbm_generated
+} // namespace walberla
+
+#include "lbm_generated/refinement/BasicRecursiveTimeStep.impl.h"
diff --git a/src/lbm_generated/refinement/BasicRecursiveTimeStep.impl.h b/src/lbm_generated/refinement/BasicRecursiveTimeStep.impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..7e6d9b5944e0e526287fba475c42e07f70695e7d
--- /dev/null
+++ b/src/lbm_generated/refinement/BasicRecursiveTimeStep.impl.h
@@ -0,0 +1,266 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file BasicRecursiveTimeStep.impl.h
+//! \author Frederik Hennig <frederik.hennig@fau.de>
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "BasicRecursiveTimeStep.h"
+
+namespace walberla {
+namespace lbm_generated {
+
+template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
+void BasicRecursiveTimeStep< PdfField_T, SweepCollection_T, BoundaryCollection_T >::timestep(uint_t level)
+{
+   // 1.1 Collision
+   for(auto b: blocks_[level]){
+      sweepCollection_.streamCollide(b);
+   }
+
+   // 1.2 Recursive Descent
+   if(level < maxLevel_){
+      timestep(level + 1);
+   }
+
+   // 1.3 Coarse to Fine Communication, receiving end
+   if(level != 0){
+      commScheme_->communicateCoarseToFine(level);
+   }
+
+   // 1.4 Equal-Level Communication
+   commScheme_->communicateEqualLevel(level);
+
+   // 1.5 Boundary Handling and Coalescence Preparation
+   for(auto b : blocks_[level]){
+      boundaryCollection_(b);
+      if(level != maxLevel_) pdfFieldPackInfo_->prepareCoalescence(b);
+   }
+
+   // 1.6 Fine to Coarse Communication, receiving end
+   if(level < maxLevel_){
+      commScheme_->communicateFineToCoarse(level + 1);
+   }
+
+   // Stop here if on coarsest level.
+   // Otherwise, continue to second subcycle.
+   if(level == 0) return;
+
+   // 2.1 Collision and Ghost-Layer Propagation
+   for(auto b: blocks_[level]){
+      ghostLayerPropagation(b);  // GL-Propagation first without swapping arrays...
+      sweepCollection_.streamCollide(b);                // then Stream-Collide on interior, and swap arrays
+   }
+
+   // 2.2 Recursive Descent
+   if(level < maxLevel_){
+      timestep(level + 1);
+   }
+
+   // 2.4 Equal-Level Communication
+   commScheme_->communicateEqualLevel(level);
+
+   // 2.5 Boundary Handling and Coalescence Preparation
+   for(auto b : blocks_[level]){
+      boundaryCollection_(b);
+      if(level != maxLevel_) pdfFieldPackInfo_->prepareCoalescence(b);
+   }
+
+   // 2.6 Fine to Coarse Communication, receiving end
+   if(level < maxLevel_){
+      commScheme_->communicateFineToCoarse(level + 1);
+   }
+}
+
+
+template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
+void BasicRecursiveTimeStep< PdfField_T, SweepCollection_T, BoundaryCollection_T >::addRefinementToTimeLoop(timeloop::SweepTimeloop & timeloop, uint_t level)
+{
+   // 1.1 Collision
+   timeloop.addFuncBeforeTimeStep(executeStreamCollideOnLevel(level), "Refinement Cycle: streamCollide on level " + std::to_string(level));
+
+   // 1.2 Recursive Descent
+   if(level < maxLevel_){
+      addRefinementToTimeLoop(timeloop, level + 1);
+   }
+
+   // 1.3 Coarse to Fine Communication, receiving end
+   if(level != 0){
+      timeloop.addFuncBeforeTimeStep(commScheme_->communicateCoarseToFineFunctor(level), "Refinement Cycle: communicate coarse to fine on level " + std::to_string(level));
+   }
+
+   // 1.4 Equal-Level Communication
+   timeloop.addFuncBeforeTimeStep(commScheme_->communicateEqualLevelFunctor(level), "Refinement Cycle: communicate equal level on level " + std::to_string(level));
+
+
+   // 1.5 Boundary Handling and Coalescence Preparation
+   timeloop.addFuncBeforeTimeStep(executeBoundaryHandlingOnLevel(level), "Refinement Cycle: boundary handling on level " + std::to_string(level));
+
+   // 1.6 Fine to Coarse Communication, receiving end
+   if(level < maxLevel_){
+      timeloop.addFuncBeforeTimeStep(commScheme_->communicateFineToCoarseFunctor(level + 1), "Refinement Cycle: communicate fine to coarse on level " + std::to_string(level + 1));
+   }
+
+   // Stop here if on coarsest level.
+   // Otherwise, continue to second subcycle.
+   if(level == 0) return;
+
+   // 2.1 Collision and Ghost-Layer Propagation
+   timeloop.addFuncBeforeTimeStep(executeStreamCollideOnLevel(level, true), "Refinement Cycle: streamCollide with ghost layer propagation on level " + std::to_string(level));
+
+   // 2.2 Recursive Descent
+   if(level < maxLevel_)
+      addRefinementToTimeLoop(timeloop, level + 1);
+
+
+   // 2.4 Equal-Level Communication
+   timeloop.addFuncBeforeTimeStep(commScheme_->communicateEqualLevelFunctor(level), "Refinement Cycle: communicate equal level on level " + std::to_string(level));
+
+   // 2.5 Boundary Handling and Coalescence Preparation
+   timeloop.addFuncBeforeTimeStep(executeBoundaryHandlingOnLevel(level), "Refinement Cycle: boundary handling on level " + std::to_string(level));
+
+   // 2.6 Fine to Coarse Communication, receiving end
+   if(level < maxLevel_)
+      timeloop.addFuncBeforeTimeStep(commScheme_->communicateFineToCoarseFunctor(level + 1), "Refinement Cycle: communicate fine to coarse on level " + std::to_string(level + 1));
+
+}
+
+
+template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
+std::function<void()> BasicRecursiveTimeStep< PdfField_T, SweepCollection_T, BoundaryCollection_T >::executeStreamCollideOnLevel(uint_t level, bool withGhostLayerPropagation)
+{
+   return [level, withGhostLayerPropagation, this]()
+   {
+      if (withGhostLayerPropagation)
+      {
+         for(auto b: blocks_[level]){
+            ghostLayerPropagation(b);
+            sweepCollection_.streamCollide(b);
+         }
+      }
+      else
+      {
+         for(auto b: blocks_[level]){
+            sweepCollection_.streamCollide(b);
+         }
+      }
+   };
+}
+
+
+template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
+std::function<void()>  BasicRecursiveTimeStep< PdfField_T, SweepCollection_T, BoundaryCollection_T >::executeBoundaryHandlingOnLevel(uint_t level)
+{
+   return [level, this]() {
+      for (auto b : blocks_[level])
+      {
+         boundaryCollection_(b);
+         if (level != maxLevel_) pdfFieldPackInfo_->prepareCoalescence(b);
+      }
+   };
+}
+
+
+template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
+void BasicRecursiveTimeStep< PdfField_T, SweepCollection_T, BoundaryCollection_T >::ghostLayerPropagation(
+   Block * block)
+{
+   auto pdfField = block->getData<PdfField_T>(pdfFieldId_);
+
+   for(auto it = CommunicationStencil::beginNoCenter(); it != CommunicationStencil::end(); ++it){
+      uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(*it);
+      // Propagate on ghost layers shadowing coarse or no blocks
+      if(!block->neighborhoodSectionHasSmallerBlocks(nSecIdx)){
+         CellInterval ci;
+         pdfField->getGhostRegion(*it, ci, 1);
+         sweepCollection_.streamOnlyNoAdvancementCellInterval(block, ci);
+      }
+   }
+}
+
+// Refinement Timestep from post collision state:
+//template< typename PdfField_T, typename LbSweep_T >
+//void BasicRecursiveTimeStep< PdfField_T, LbSweep_T >::timestep(uint_t level)
+//{
+//   std::vector<Block *> blocks;
+//   sbfs_->getBlocks(blocks, level);
+//
+//   uint_t maxLevel = sbfs_->getDepth();
+//
+//   // 1.1 Equal-Level Communication
+//   commScheme_->communicateEqualLevel(level);
+//
+//   // 1.2 Coarse to Fine Communication
+//   if(level < maxLevel){
+//      commScheme_->communicateCoarseToFine(level + 1);
+//   }
+//
+//   // 1.3 Boundary Handling and
+//   // 1.4 Prepare Coalescence (which happens during the recursive descent)
+//   for(auto b : blocks){
+//      boundaryFunctor_(b);
+//      if(level != maxLevel) pdfFieldPackInfo_->prepareCoalescence(b);
+//   }
+//
+//   // 1.5 Recursive Descent
+//   if(level < maxLevel){
+//      timestep(level + 1);
+//   }
+//
+//   // 1.6 First Collision and ghost-layer propagation
+//   for(auto b: blocks){
+//      if(level != 0) ghostLayerPropagation(b);  // GL-Propagation first without swapping arrays...
+//      sweepCollection_.streamCollide(b);                // then Stream-Collide on interior, and swap arrays
+//   }
+//
+//   // Stop here if on coarsest level.
+//   // Otherwise, continue to second subcycle.
+//   if(level == 0) return;
+//
+//   // 2.1 Equal-Level Communication
+//   commScheme_->communicateEqualLevel(level);
+//
+//   // 2.2 Coarse to Fine Communication
+//   if(level < maxLevel){
+//      commScheme_->communicateCoarseToFine(level + 1);
+//   }
+//
+//   // 2.3 Boundary Handling and
+//   // 2.4 Prepare Coalescence (which happens during the recursive descent)
+//   for(auto b : blocks){
+//      boundaryFunctor_(b);
+//      if(level != maxLevel) pdfFieldPackInfo_->prepareCoalescence(b);
+//   }
+//
+//   // 2.5 Recursive Descent
+//   if(level < maxLevel){
+//      timestep(level + 1);
+//   }
+//
+//   // 2.6 Fine to Coarse Communication
+//   commScheme_->communicateFineToCoarse(level);
+//
+//   // 2.7 Second Collision
+//   for(auto b: blocks){
+//      sweepCollection_.streamCollide(b);
+//   }
+//}
+
+} // namespace lbm_generated
+} // namespace walberla
diff --git a/src/lbm_generated/refinement/CMakeLists.txt b/src/lbm_generated/refinement/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..216b4a2683ebc426c8f30a5135d5c07db3409640
--- /dev/null
+++ b/src/lbm_generated/refinement/CMakeLists.txt
@@ -0,0 +1,6 @@
+target_sources( lbm_generated
+    PRIVATE
+    BasicRecursiveTimeStep.h
+    BasicRecursiveTimeStep.impl.h
+    RefinementScaling.h
+    )
diff --git a/src/lbm_generated/refinement/RefinementScaling.h b/src/lbm_generated/refinement/RefinementScaling.h
new file mode 100644
index 0000000000000000000000000000000000000000..f8015946a4816e4c0e7c54ea43d2f310755aaec3
--- /dev/null
+++ b/src/lbm_generated/refinement/RefinementScaling.h
@@ -0,0 +1,63 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file RefinementScaling.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#include "blockforest/BlockDataHandling.h"
+
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+
+namespace walberla
+{
+namespace lbm_generated
+{
+
+class DefaultRefinementScaling : public blockforest::AlwaysInitializeBlockDataHandling< real_t >
+{
+ public:
+   DefaultRefinementScaling(const weak_ptr< StructuredBlockStorage >& blocks, const real_t parameter)
+      : blocks_(blocks), parameter_(parameter){};
+
+   real_t* initialize(IBlock* const block) override
+   {
+      WALBERLA_ASSERT_NOT_NULLPTR(block)
+      auto blocks = blocks_.lock();
+      WALBERLA_CHECK_NOT_NULLPTR(blocks)
+
+      level_ = block->getBlockStorage().getLevel(*block);
+
+      const real_t level_scale_factor = real_c(uint_t(1) << level_);
+      const real_t one                = real_c(1.0);
+      const real_t half               = real_c(0.5);
+
+      return new real_t(parameter_ / (level_scale_factor * (-parameter_ * half + one) + parameter_ * half));
+   }
+   bool operator==(const DefaultRefinementScaling& other) const { return level_ == other.level_; }
+
+ private:
+   const weak_ptr< StructuredBlockStorage > blocks_;
+   const real_t parameter_;
+
+   uint_t level_;
+};
+
+} // namespace lbm_generated
+} // namespace walberla
\ No newline at end of file
diff --git a/src/lbm_generated/storage_specification/CMakeLists.txt b/src/lbm_generated/storage_specification/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83d211632ca9366f1ac5f719a22d217f7c176061
--- /dev/null
+++ b/src/lbm_generated/storage_specification/CMakeLists.txt
@@ -0,0 +1,7 @@
+target_sources( lbm_generated
+        PRIVATE
+        D3Q19StorageSpecification.h
+        D3Q19StorageSpecification.cpp
+        D3Q27StorageSpecification.h
+        D3Q27StorageSpecification.cpp
+        )
\ No newline at end of file
diff --git a/src/lbm_generated/storage_specification/D3Q19StorageSpecification.cpp b/src/lbm_generated/storage_specification/D3Q19StorageSpecification.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f36797eecca7282cf1f615492ac54cee38be871f
--- /dev/null
+++ b/src/lbm_generated/storage_specification/D3Q19StorageSpecification.cpp
@@ -0,0 +1,1939 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file D3Q19StorageSpecification.cpp
+//! \\author lbmpy
+//======================================================================================================================
+
+#include "D3Q19StorageSpecification.h"
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wfloat-equal"
+#   pragma GCC diagnostic ignored "-Wshadow"
+#   pragma GCC diagnostic ignored "-Wconversion"
+#   pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+/*************************************************************************************
+ *                                Kernel Definitions
+*************************************************************************************/
+namespace internal_d3q19storagespecification_pack_ALL {
+static FUNC_PREFIX void d3q19storagespecification_pack_ALL(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_30 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0;
+      double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_30_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_30;
+         double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
+         double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
+         double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
+         double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
+         double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
+         double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
+         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
+         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
+         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
+         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
+         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
+         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
+         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
+         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
+         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
+         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
+         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
+         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2] = _data_pdfs_src_00_30_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 1] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 3] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 4] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 5] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 6] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 7] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 8] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 9] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 10] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 11] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 12] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 13] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 14] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 15] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 16] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 17] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 18] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_ALL {
+static FUNC_PREFIX void d3q19storagespecification_unpack_ALL(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_30 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0;
+      double * RESTRICT  _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_30_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_30;
+         double * RESTRICT  _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
+         double * RESTRICT  _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
+         double * RESTRICT  _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
+         double * RESTRICT  _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
+         double * RESTRICT  _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
+         double * RESTRICT  _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
+         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
+         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
+         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
+         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
+         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
+         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
+         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
+         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
+         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
+         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
+         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
+         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_30_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2];
+            _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 1];
+            _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 2];
+            _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 3];
+            _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 4];
+            _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 5];
+            _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 6];
+            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 7];
+            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 8];
+            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 9];
+            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 10];
+            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 11];
+            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 12];
+            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 13];
+            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 14];
+            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 15];
+            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 16];
+            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 17];
+            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 18];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_ALL {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_ALL(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_30 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0;
+      double * RESTRICT _data_pdfs_src_00_30 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0;
+      double * RESTRICT  _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_30_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_30;
+         double * RESTRICT _data_pdfs_src_00_30_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_30;
+         double * RESTRICT  _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
+         double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
+         double * RESTRICT  _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
+         double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
+         double * RESTRICT  _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
+         double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
+         double * RESTRICT  _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
+         double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
+         double * RESTRICT  _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
+         double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
+         double * RESTRICT  _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
+         double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
+         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
+         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
+         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
+         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
+         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
+         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
+         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
+         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
+         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
+         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
+         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
+         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
+         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
+         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
+         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
+         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
+         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
+         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
+         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
+         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
+         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
+         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
+         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
+         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_30_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_30_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+
+namespace internal_d3q19storagespecification_pack_TE {
+static FUNC_PREFIX void d3q19storagespecification_pack_TE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_pack_SW {
+static FUNC_PREFIX void d3q19storagespecification_pack_SW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_pack_T {
+static FUNC_PREFIX void d3q19storagespecification_pack_T(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
+         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
+         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
+         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
+         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_pack_BS {
+static FUNC_PREFIX void d3q19storagespecification_pack_BS(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_pack_TN {
+static FUNC_PREFIX void d3q19storagespecification_pack_TN(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_pack_BW {
+static FUNC_PREFIX void d3q19storagespecification_pack_BW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_pack_N {
+static FUNC_PREFIX void d3q19storagespecification_pack_N(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
+         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
+         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
+         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
+         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_pack_E {
+static FUNC_PREFIX void d3q19storagespecification_pack_E(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
+         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
+         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
+         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
+         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_pack_NW {
+static FUNC_PREFIX void d3q19storagespecification_pack_NW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_pack_NE {
+static FUNC_PREFIX void d3q19storagespecification_pack_NE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_pack_TW {
+static FUNC_PREFIX void d3q19storagespecification_pack_TW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_pack_BE {
+static FUNC_PREFIX void d3q19storagespecification_pack_BE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_pack_W {
+static FUNC_PREFIX void d3q19storagespecification_pack_W(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
+         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
+         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
+         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
+         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_pack_S {
+static FUNC_PREFIX void d3q19storagespecification_pack_S(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
+         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
+         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
+         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
+         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_pack_SE {
+static FUNC_PREFIX void d3q19storagespecification_pack_SE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_pack_TS {
+static FUNC_PREFIX void d3q19storagespecification_pack_TS(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_pack_BN {
+static FUNC_PREFIX void d3q19storagespecification_pack_BN(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_pack_B {
+static FUNC_PREFIX void d3q19storagespecification_pack_B(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
+         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
+         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
+         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
+         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_BW {
+static FUNC_PREFIX void d3q19storagespecification_unpack_BW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_N {
+static FUNC_PREFIX void d3q19storagespecification_unpack_N(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
+         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
+         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
+         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
+         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
+            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
+            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
+            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
+            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_SE {
+static FUNC_PREFIX void d3q19storagespecification_unpack_SE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_TE {
+static FUNC_PREFIX void d3q19storagespecification_unpack_TE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_T {
+static FUNC_PREFIX void d3q19storagespecification_unpack_T(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
+         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
+         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
+         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
+         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
+            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
+            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
+            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
+            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_TS {
+static FUNC_PREFIX void d3q19storagespecification_unpack_TS(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_BE {
+static FUNC_PREFIX void d3q19storagespecification_unpack_BE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_NW {
+static FUNC_PREFIX void d3q19storagespecification_unpack_NW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_NE {
+static FUNC_PREFIX void d3q19storagespecification_unpack_NE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_BS {
+static FUNC_PREFIX void d3q19storagespecification_unpack_BS(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_E {
+static FUNC_PREFIX void d3q19storagespecification_unpack_E(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
+         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
+         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
+         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
+         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
+            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
+            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
+            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
+            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_S {
+static FUNC_PREFIX void d3q19storagespecification_unpack_S(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
+         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
+         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
+         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
+         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
+            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
+            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
+            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
+            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_W {
+static FUNC_PREFIX void d3q19storagespecification_unpack_W(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
+         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
+         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
+         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
+         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
+            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
+            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
+            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
+            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_SW {
+static FUNC_PREFIX void d3q19storagespecification_unpack_SW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_B {
+static FUNC_PREFIX void d3q19storagespecification_unpack_B(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
+         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
+         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
+         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
+         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
+            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
+            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
+            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
+            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_TN {
+static FUNC_PREFIX void d3q19storagespecification_unpack_TN(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_BN {
+static FUNC_PREFIX void d3q19storagespecification_unpack_BN(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_unpack_TW {
+static FUNC_PREFIX void d3q19storagespecification_unpack_TW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_NE {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_NE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
+         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_TS {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_TS(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
+         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_BE {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_BE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
+         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_BS {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_BS(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
+         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_BW {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_BW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
+         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_T {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_T(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
+         double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
+         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
+         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
+         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
+         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
+         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
+         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
+         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
+         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_TN {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_TN(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
+         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_W {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_W(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
+         double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
+         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
+         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
+         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
+         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
+         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
+         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
+         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
+         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_E {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_E(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
+         double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
+         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
+         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
+         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
+         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
+         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
+         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
+         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
+         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_TW {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_TW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
+         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_SW {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_SW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
+         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_NW {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_NW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
+         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_BN {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_BN(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
+         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_TE {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_TE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
+         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_B {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_B(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
+         double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
+         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
+         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
+         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
+         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
+         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
+         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
+         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
+         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_N {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_N(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
+         double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
+         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
+         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
+         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
+         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
+         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
+         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
+         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
+         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_S {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_S(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
+         double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
+         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
+         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
+         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
+         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
+         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
+         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
+         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
+         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q19storagespecification_localCopy_SE {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_SE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
+         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+
+
+
+/*************************************************************************************
+ *                                 Kernel Wrappers
+*************************************************************************************/
+
+namespace walberla {
+namespace lbm {
+
+   void D3Q19StorageSpecification::PackKernels::packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer) const
+   {
+      double * buffer = reinterpret_cast<double*>(outBuffer);
+      double * RESTRICT  _data_buffer = buffer;
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      double * RESTRICT const _data_pdfs_src = pdfs_src->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+      const int64_t _size_pdfs_src_0 = int64_t(int64_c(ci.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+      const int64_t _size_pdfs_src_1 = int64_t(int64_c(ci.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+      const int64_t _size_pdfs_src_2 = int64_t(int64_c(ci.zSize()) + 0);
+      const int64_t _stride_pdfs_src_0 = int64_t(pdfs_src->xStride());
+      const int64_t _stride_pdfs_src_1 = int64_t(pdfs_src->yStride());
+      const int64_t _stride_pdfs_src_2 = int64_t(pdfs_src->zStride());
+      const int64_t _stride_pdfs_src_3 = int64_t(1 * int64_t(pdfs_src->fStride()));
+      internal_d3q19storagespecification_pack_ALL::d3q19storagespecification_pack_ALL(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+   }
+
+
+   void D3Q19StorageSpecification::PackKernels::unpackAll(PdfField_T * pdfs_dst, CellInterval & ci, unsigned char * inBuffer) const
+   {
+      double * buffer = reinterpret_cast<double*>(inBuffer);
+      double * RESTRICT const _data_buffer = buffer;
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      double * RESTRICT  _data_pdfs_dst = pdfs_dst->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+      const int64_t _size_pdfs_dst_0 = int64_t(int64_c(ci.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+      const int64_t _size_pdfs_dst_1 = int64_t(int64_c(ci.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+      const int64_t _size_pdfs_dst_2 = int64_t(int64_c(ci.zSize()) + 0);
+      const int64_t _stride_pdfs_dst_0 = int64_t(pdfs_dst->xStride());
+      const int64_t _stride_pdfs_dst_1 = int64_t(pdfs_dst->yStride());
+      const int64_t _stride_pdfs_dst_2 = int64_t(pdfs_dst->zStride());
+      const int64_t _stride_pdfs_dst_3 = int64_t(1 * int64_t(pdfs_dst->fStride()));
+      internal_d3q19storagespecification_unpack_ALL::d3q19storagespecification_unpack_ALL(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+   }
+
+
+   void D3Q19StorageSpecification::PackKernels::localCopyAll(PdfField_T * pdfs_src, CellInterval & srcInterval, PdfField_T * pdfs_dst, CellInterval & dstInterval) const
+   {
+      WALBERLA_ASSERT_EQUAL(srcInterval.xSize(), dstInterval.xSize())
+      WALBERLA_ASSERT_EQUAL(srcInterval.ySize(), dstInterval.ySize())
+      WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize())
+
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.xMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.yMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.zMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      double * RESTRICT  _data_pdfs_dst = pdfs_dst->dataAt(dstInterval.xMin(), dstInterval.yMin(), dstInterval.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.xMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.yMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.zMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      double * RESTRICT const _data_pdfs_src = pdfs_src->dataAt(srcInterval.xMin(), srcInterval.yMin(), srcInterval.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->xSizeWithGhostLayer(), int64_t(int64_c(dstInterval.xSize()) + 0))
+      const int64_t _size_pdfs_dst_0 = int64_t(int64_c(dstInterval.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->ySizeWithGhostLayer(), int64_t(int64_c(dstInterval.ySize()) + 0))
+      const int64_t _size_pdfs_dst_1 = int64_t(int64_c(dstInterval.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->zSizeWithGhostLayer(), int64_t(int64_c(dstInterval.zSize()) + 0))
+      const int64_t _size_pdfs_dst_2 = int64_t(int64_c(dstInterval.zSize()) + 0);
+      const int64_t _stride_pdfs_dst_0 = int64_t(pdfs_dst->xStride());
+      const int64_t _stride_pdfs_dst_1 = int64_t(pdfs_dst->yStride());
+      const int64_t _stride_pdfs_dst_2 = int64_t(pdfs_dst->zStride());
+      const int64_t _stride_pdfs_dst_3 = int64_t(1 * int64_t(pdfs_dst->fStride()));
+      const int64_t _stride_pdfs_src_0 = int64_t(pdfs_src->xStride());
+      const int64_t _stride_pdfs_src_1 = int64_t(pdfs_src->yStride());
+      const int64_t _stride_pdfs_src_2 = int64_t(pdfs_src->zStride());
+      const int64_t _stride_pdfs_src_3 = int64_t(1 * int64_t(pdfs_src->fStride()));
+      internal_d3q19storagespecification_localCopy_ALL::d3q19storagespecification_localCopy_ALL(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+   }
+
+   void D3Q19StorageSpecification::PackKernels::packDirection(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer, stencil::Direction dir) const
+   {
+      double * buffer = reinterpret_cast<double*>(outBuffer);
+      double * RESTRICT  _data_buffer = buffer;
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      double * RESTRICT const _data_pdfs_src = pdfs_src->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+      const int64_t _size_pdfs_src_0 = int64_t(int64_c(ci.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+      const int64_t _size_pdfs_src_1 = int64_t(int64_c(ci.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+      const int64_t _size_pdfs_src_2 = int64_t(int64_c(ci.zSize()) + 0);
+      const int64_t _stride_pdfs_src_0 = int64_t(pdfs_src->xStride());
+      const int64_t _stride_pdfs_src_1 = int64_t(pdfs_src->yStride());
+      const int64_t _stride_pdfs_src_2 = int64_t(pdfs_src->zStride());
+      const int64_t _stride_pdfs_src_3 = int64_t(1 * int64_t(pdfs_src->fStride()));
+      switch (dir) {
+          case stencil::N : {
+              internal_d3q19storagespecification_pack_N::d3q19storagespecification_pack_N(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::S : {
+              internal_d3q19storagespecification_pack_S::d3q19storagespecification_pack_S(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::W : {
+              internal_d3q19storagespecification_pack_W::d3q19storagespecification_pack_W(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::E : {
+              internal_d3q19storagespecification_pack_E::d3q19storagespecification_pack_E(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::T : {
+              internal_d3q19storagespecification_pack_T::d3q19storagespecification_pack_T(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::B : {
+              internal_d3q19storagespecification_pack_B::d3q19storagespecification_pack_B(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::NW : {
+              internal_d3q19storagespecification_pack_NW::d3q19storagespecification_pack_NW(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::NE : {
+              internal_d3q19storagespecification_pack_NE::d3q19storagespecification_pack_NE(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::SW : {
+              internal_d3q19storagespecification_pack_SW::d3q19storagespecification_pack_SW(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::SE : {
+              internal_d3q19storagespecification_pack_SE::d3q19storagespecification_pack_SE(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TN : {
+              internal_d3q19storagespecification_pack_TN::d3q19storagespecification_pack_TN(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TS : {
+              internal_d3q19storagespecification_pack_TS::d3q19storagespecification_pack_TS(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TW : {
+              internal_d3q19storagespecification_pack_TW::d3q19storagespecification_pack_TW(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TE : {
+              internal_d3q19storagespecification_pack_TE::d3q19storagespecification_pack_TE(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BN : {
+              internal_d3q19storagespecification_pack_BN::d3q19storagespecification_pack_BN(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BS : {
+              internal_d3q19storagespecification_pack_BS::d3q19storagespecification_pack_BS(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BW : {
+              internal_d3q19storagespecification_pack_BW::d3q19storagespecification_pack_BW(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BE : {
+              internal_d3q19storagespecification_pack_BE::d3q19storagespecification_pack_BE(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }default: break; 
+      }
+   }
+
+   void D3Q19StorageSpecification::PackKernels::unpackDirection(PdfField_T * pdfs_dst, CellInterval & ci, unsigned char * inBuffer, stencil::Direction dir) const
+   {
+      double * buffer = reinterpret_cast<double*>(inBuffer);
+      double * RESTRICT const _data_buffer = buffer;
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      double * RESTRICT  _data_pdfs_dst = pdfs_dst->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+      const int64_t _size_pdfs_dst_0 = int64_t(int64_c(ci.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+      const int64_t _size_pdfs_dst_1 = int64_t(int64_c(ci.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+      const int64_t _size_pdfs_dst_2 = int64_t(int64_c(ci.zSize()) + 0);
+      const int64_t _stride_pdfs_dst_0 = int64_t(pdfs_dst->xStride());
+      const int64_t _stride_pdfs_dst_1 = int64_t(pdfs_dst->yStride());
+      const int64_t _stride_pdfs_dst_2 = int64_t(pdfs_dst->zStride());
+      const int64_t _stride_pdfs_dst_3 = int64_t(1 * int64_t(pdfs_dst->fStride()));
+      switch (dir) {
+          case stencil::N : {
+              internal_d3q19storagespecification_unpack_N::d3q19storagespecification_unpack_N(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::S : {
+              internal_d3q19storagespecification_unpack_S::d3q19storagespecification_unpack_S(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::W : {
+              internal_d3q19storagespecification_unpack_W::d3q19storagespecification_unpack_W(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::E : {
+              internal_d3q19storagespecification_unpack_E::d3q19storagespecification_unpack_E(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::T : {
+              internal_d3q19storagespecification_unpack_T::d3q19storagespecification_unpack_T(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::B : {
+              internal_d3q19storagespecification_unpack_B::d3q19storagespecification_unpack_B(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::NW : {
+              internal_d3q19storagespecification_unpack_NW::d3q19storagespecification_unpack_NW(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::NE : {
+              internal_d3q19storagespecification_unpack_NE::d3q19storagespecification_unpack_NE(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::SW : {
+              internal_d3q19storagespecification_unpack_SW::d3q19storagespecification_unpack_SW(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::SE : {
+              internal_d3q19storagespecification_unpack_SE::d3q19storagespecification_unpack_SE(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TN : {
+              internal_d3q19storagespecification_unpack_TN::d3q19storagespecification_unpack_TN(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TS : {
+              internal_d3q19storagespecification_unpack_TS::d3q19storagespecification_unpack_TS(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TW : {
+              internal_d3q19storagespecification_unpack_TW::d3q19storagespecification_unpack_TW(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TE : {
+              internal_d3q19storagespecification_unpack_TE::d3q19storagespecification_unpack_TE(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BN : {
+              internal_d3q19storagespecification_unpack_BN::d3q19storagespecification_unpack_BN(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BS : {
+              internal_d3q19storagespecification_unpack_BS::d3q19storagespecification_unpack_BS(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BW : {
+              internal_d3q19storagespecification_unpack_BW::d3q19storagespecification_unpack_BW(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BE : {
+              internal_d3q19storagespecification_unpack_BE::d3q19storagespecification_unpack_BE(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }default: break; 
+      }
+   }
+
+   void D3Q19StorageSpecification::PackKernels::localCopyDirection(PdfField_T * pdfs_src, CellInterval & srcInterval, PdfField_T * pdfs_dst, CellInterval & dstInterval, stencil::Direction dir) const
+   {
+      WALBERLA_ASSERT_EQUAL(srcInterval.xSize(), dstInterval.xSize())
+      WALBERLA_ASSERT_EQUAL(srcInterval.ySize(), dstInterval.ySize())
+      WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize())
+
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.xMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.yMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.zMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      double * RESTRICT  _data_pdfs_dst = pdfs_dst->dataAt(dstInterval.xMin(), dstInterval.yMin(), dstInterval.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.xMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.yMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.zMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      double * RESTRICT const _data_pdfs_src = pdfs_src->dataAt(srcInterval.xMin(), srcInterval.yMin(), srcInterval.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->xSizeWithGhostLayer(), int64_t(int64_c(dstInterval.xSize()) + 0))
+      const int64_t _size_pdfs_dst_0 = int64_t(int64_c(dstInterval.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->ySizeWithGhostLayer(), int64_t(int64_c(dstInterval.ySize()) + 0))
+      const int64_t _size_pdfs_dst_1 = int64_t(int64_c(dstInterval.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->zSizeWithGhostLayer(), int64_t(int64_c(dstInterval.zSize()) + 0))
+      const int64_t _size_pdfs_dst_2 = int64_t(int64_c(dstInterval.zSize()) + 0);
+      const int64_t _stride_pdfs_dst_0 = int64_t(pdfs_dst->xStride());
+      const int64_t _stride_pdfs_dst_1 = int64_t(pdfs_dst->yStride());
+      const int64_t _stride_pdfs_dst_2 = int64_t(pdfs_dst->zStride());
+      const int64_t _stride_pdfs_dst_3 = int64_t(1 * int64_t(pdfs_dst->fStride()));
+      const int64_t _stride_pdfs_src_0 = int64_t(pdfs_src->xStride());
+      const int64_t _stride_pdfs_src_1 = int64_t(pdfs_src->yStride());
+      const int64_t _stride_pdfs_src_2 = int64_t(pdfs_src->zStride());
+      const int64_t _stride_pdfs_src_3 = int64_t(1 * int64_t(pdfs_src->fStride()));
+      switch (dir) {
+          case stencil::N : {
+              internal_d3q19storagespecification_localCopy_N::d3q19storagespecification_localCopy_N(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::S : {
+              internal_d3q19storagespecification_localCopy_S::d3q19storagespecification_localCopy_S(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::W : {
+              internal_d3q19storagespecification_localCopy_W::d3q19storagespecification_localCopy_W(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::E : {
+              internal_d3q19storagespecification_localCopy_E::d3q19storagespecification_localCopy_E(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::T : {
+              internal_d3q19storagespecification_localCopy_T::d3q19storagespecification_localCopy_T(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::B : {
+              internal_d3q19storagespecification_localCopy_B::d3q19storagespecification_localCopy_B(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::NW : {
+              internal_d3q19storagespecification_localCopy_NW::d3q19storagespecification_localCopy_NW(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::NE : {
+              internal_d3q19storagespecification_localCopy_NE::d3q19storagespecification_localCopy_NE(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::SW : {
+              internal_d3q19storagespecification_localCopy_SW::d3q19storagespecification_localCopy_SW(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::SE : {
+              internal_d3q19storagespecification_localCopy_SE::d3q19storagespecification_localCopy_SE(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TN : {
+              internal_d3q19storagespecification_localCopy_TN::d3q19storagespecification_localCopy_TN(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TS : {
+              internal_d3q19storagespecification_localCopy_TS::d3q19storagespecification_localCopy_TS(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TW : {
+              internal_d3q19storagespecification_localCopy_TW::d3q19storagespecification_localCopy_TW(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TE : {
+              internal_d3q19storagespecification_localCopy_TE::d3q19storagespecification_localCopy_TE(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BN : {
+              internal_d3q19storagespecification_localCopy_BN::d3q19storagespecification_localCopy_BN(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BS : {
+              internal_d3q19storagespecification_localCopy_BS::d3q19storagespecification_localCopy_BS(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BW : {
+              internal_d3q19storagespecification_localCopy_BW::d3q19storagespecification_localCopy_BW(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BE : {
+              internal_d3q19storagespecification_localCopy_BE::d3q19storagespecification_localCopy_BE(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }default: break; 
+      }
+   }
+
+   
+}  // namespace lbm
+}  // namespace walberla
\ No newline at end of file
diff --git a/src/lbm_generated/storage_specification/D3Q19StorageSpecification.h b/src/lbm_generated/storage_specification/D3Q19StorageSpecification.h
new file mode 100644
index 0000000000000000000000000000000000000000..5f0342741639be847c78026bacd7763e10f07463
--- /dev/null
+++ b/src/lbm_generated/storage_specification/D3Q19StorageSpecification.h
@@ -0,0 +1,147 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file D3Q19StorageSpecification.h
+//! \\author lbmpy
+//======================================================================================================================
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "core/mpi/SendBuffer.h"
+#include "core/mpi/RecvBuffer.h"
+
+#include "domain_decomposition/IBlock.h"
+#include "field/GhostLayerField.h"
+
+#include "stencil/D3Q19.h"
+#include "stencil/Directions.h"
+
+#define FUNC_PREFIX
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if defined WALBERLA_CXX_COMPILER_IS_GNU || defined WALBERLA_CXX_COMPILER_IS_CLANG
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#endif
+
+namespace walberla
+{
+namespace lbm{
+
+class D3Q19StorageSpecification
+{
+ public:
+   // Used lattice stencil
+   using Stencil = stencil::D3Q19;
+   // Lattice stencil used for the communication (should be used to define which block directions need to be communicated)
+   using CommunicationStencil = stencil::D3Q19;
+
+   // If false used correction: Lattice Boltzmann Model for the Incompressible Navier–Stokes Equation, He 1997
+   static const bool compressible = false;
+   // Cut off for the lattice Boltzmann equilibrium
+   static const int equilibriumAccuracyOrder = 2;
+
+   // If streaming pattern is inplace (esotwist, aa, ...) or not (pull, push)
+   static const bool inplace = false;
+
+   // If true the background deviation (rho_0 = 1) is subtracted for the collision step.
+   static const bool zeroCenteredPDFs = true;
+   // If true the equilibrium is computed in regard to "delta_rho" and not the actual density "rho"
+   static const bool deviationOnlyEquilibrium = true;
+
+   // Compute kernels to pack and unpack MPI buffers
+   class PackKernels {
+
+    public:
+      using PdfField_T = field::GhostLayerField<double, 19>;
+      using value_type = typename PdfField_T::value_type;
+
+      
+
+      static const bool inplace = false;
+
+      /**
+       * Packs all pdfs from the given cell interval to the send buffer.
+       * */
+      void packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer) const;
+
+      /**
+       * Unpacks all pdfs from the send buffer to the given cell interval.
+       * */
+      void unpackAll(PdfField_T * pdfs_dst, CellInterval & ci, unsigned char * inBuffer) const;
+
+      /**
+       * Copies data between two blocks on the same process.
+       * All pdfs from the sending interval are copied onto the receiving interval.
+       * */
+      void localCopyAll(PdfField_T * pdfs_src, CellInterval & srcInterval, PdfField_T * pdfs_dst, CellInterval & dstInterval) const;
+
+      /**
+       * Packs only those populations streaming in directions aligned with the sending direction dir from the given cell interval.
+       * For example, in 2D, if dir == N, the pdfs streaming in directions NW, N, NE are packed.
+       * */
+      void packDirection(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer, stencil::Direction dir) const;
+
+      /**
+       * Unpacks only those populations streaming in directions aligned with the sending direction dir to the given cell interval.
+       * For example, in 2D, if dir == N, the pdfs streaming in directions NW, N, NE are unpacked.
+       * */
+      void unpackDirection(PdfField_T * pdfs_dst, CellInterval & ci, unsigned char * inBuffer, stencil::Direction dir) const;
+
+      /** Copies data between two blocks on the same process.
+        * PDFs streaming aligned with the direction dir are copied from the sending interval onto the receiving interval.
+        * */
+      void localCopyDirection(PdfField_T * pdfs_src, CellInterval & srcInterval, PdfField_T * pdfs_dst, CellInterval & dstInterval, stencil::Direction dir) const;
+
+      /**
+       * Returns the number of bytes that will be packed from / unpacked to the cell interval
+       * when using packDirection / unpackDirection
+       * @param ci  The cell interval
+       * @param dir The communication direction
+       * @return    The required size of the buffer, in bytes
+       * */
+      uint_t size (CellInterval & ci, stencil::Direction dir) const {
+         return ci.numCells() * sizes[dir] * sizeof(value_type);
+      }
+
+      /**
+       * Returns the number of bytes that will be packed from / unpacked to the cell interval
+       * when using packAll / unpackAll
+       * @param ci  The cell interval
+       * @return    The required size of the buffer, in bytes
+       * */
+      uint_t size (CellInterval & ci) const {
+         return ci.numCells() * 19 * sizeof(value_type);
+      }
+
+      
+
+    private:
+      const uint_t sizes[27] { 0, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
+   };
+
+};
+
+}} //lbm/walberla
\ No newline at end of file
diff --git a/src/lbm_generated/storage_specification/D3Q27StorageSpecification.cpp b/src/lbm_generated/storage_specification/D3Q27StorageSpecification.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3ecdf88928bf8292254465e0f4ec19d4a1106373
--- /dev/null
+++ b/src/lbm_generated/storage_specification/D3Q27StorageSpecification.cpp
@@ -0,0 +1,3099 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file D3Q27StorageSpecification.cpp
+//! \\author lbmpy
+//======================================================================================================================
+
+#include "D3Q27StorageSpecification.h"
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wfloat-equal"
+#   pragma GCC diagnostic ignored "-Wshadow"
+#   pragma GCC diagnostic ignored "-Wconversion"
+#   pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+/*************************************************************************************
+ *                                Kernel Definitions
+*************************************************************************************/
+namespace internal_d3q27storagespecification_pack_ALL {
+static FUNC_PREFIX void d3q27storagespecification_pack_ALL(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_30 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0;
+      double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_30_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_30;
+         double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
+         double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
+         double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
+         double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
+         double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
+         double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
+         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
+         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
+         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
+         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
+         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
+         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
+         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
+         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
+         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
+         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
+         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
+         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
+         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
+         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
+         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
+         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
+         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
+         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
+         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
+         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2] = _data_pdfs_src_00_30_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 1] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 3] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 4] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 5] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 6] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 7] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 8] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 9] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 10] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 11] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 12] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 13] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 14] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 15] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 16] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 17] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 18] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 19] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 20] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 21] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 22] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 23] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 24] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 25] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 26] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_ALL {
+static FUNC_PREFIX void d3q27storagespecification_unpack_ALL(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_30 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0;
+      double * RESTRICT  _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_30_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_30;
+         double * RESTRICT  _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
+         double * RESTRICT  _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
+         double * RESTRICT  _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
+         double * RESTRICT  _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
+         double * RESTRICT  _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
+         double * RESTRICT  _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
+         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
+         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
+         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
+         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
+         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
+         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
+         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
+         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
+         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
+         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
+         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
+         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
+         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
+         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
+         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
+         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
+         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
+         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
+         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
+         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_30_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2];
+            _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 1];
+            _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 2];
+            _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 3];
+            _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 4];
+            _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 5];
+            _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 6];
+            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 7];
+            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 8];
+            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 9];
+            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 10];
+            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 11];
+            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 12];
+            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 13];
+            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 14];
+            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 15];
+            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 16];
+            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 17];
+            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 18];
+            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 19];
+            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 20];
+            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 21];
+            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 22];
+            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 23];
+            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 24];
+            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 25];
+            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 26];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_ALL {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_ALL(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_30 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0;
+      double * RESTRICT _data_pdfs_src_00_30 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0;
+      double * RESTRICT  _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_30_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_30;
+         double * RESTRICT _data_pdfs_src_00_30_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_30;
+         double * RESTRICT  _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
+         double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
+         double * RESTRICT  _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
+         double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
+         double * RESTRICT  _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
+         double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
+         double * RESTRICT  _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
+         double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
+         double * RESTRICT  _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
+         double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
+         double * RESTRICT  _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
+         double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
+         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
+         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
+         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
+         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
+         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
+         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
+         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
+         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
+         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
+         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
+         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
+         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
+         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
+         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
+         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
+         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
+         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
+         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
+         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
+         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
+         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
+         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
+         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
+         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
+         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
+         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
+         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
+         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
+         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
+         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
+         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
+         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
+         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
+         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
+         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
+         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
+         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
+         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
+         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
+         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_30_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_30_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+
+namespace internal_d3q27storagespecification_pack_T {
+static FUNC_PREFIX void d3q27storagespecification_pack_T(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
+         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
+         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
+         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
+         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
+         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
+         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
+         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
+         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_BN {
+static FUNC_PREFIX void d3q27storagespecification_pack_BN(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
+         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
+         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_NE {
+static FUNC_PREFIX void d3q27storagespecification_pack_NE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
+         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
+         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_BNE {
+static FUNC_PREFIX void d3q27storagespecification_pack_BNE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_SE {
+static FUNC_PREFIX void d3q27storagespecification_pack_SE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
+         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
+         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_TNW {
+static FUNC_PREFIX void d3q27storagespecification_pack_TNW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_W {
+static FUNC_PREFIX void d3q27storagespecification_pack_W(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
+         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
+         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
+         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
+         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
+         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
+         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
+         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
+         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_TE {
+static FUNC_PREFIX void d3q27storagespecification_pack_TE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
+         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
+         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_N {
+static FUNC_PREFIX void d3q27storagespecification_pack_N(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
+         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
+         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
+         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
+         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
+         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
+         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
+         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
+         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_BSW {
+static FUNC_PREFIX void d3q27storagespecification_pack_BSW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_TSW {
+static FUNC_PREFIX void d3q27storagespecification_pack_TSW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_BE {
+static FUNC_PREFIX void d3q27storagespecification_pack_BE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
+         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
+         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_B {
+static FUNC_PREFIX void d3q27storagespecification_pack_B(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
+         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
+         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
+         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
+         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
+         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
+         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
+         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
+         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_TNE {
+static FUNC_PREFIX void d3q27storagespecification_pack_TNE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_TS {
+static FUNC_PREFIX void d3q27storagespecification_pack_TS(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
+         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
+         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_TN {
+static FUNC_PREFIX void d3q27storagespecification_pack_TN(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
+         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
+         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_BNW {
+static FUNC_PREFIX void d3q27storagespecification_pack_BNW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_TW {
+static FUNC_PREFIX void d3q27storagespecification_pack_TW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
+         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
+         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_BSE {
+static FUNC_PREFIX void d3q27storagespecification_pack_BSE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_NW {
+static FUNC_PREFIX void d3q27storagespecification_pack_NW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
+         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
+         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_S {
+static FUNC_PREFIX void d3q27storagespecification_pack_S(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
+         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
+         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
+         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
+         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
+         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
+         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
+         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
+         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_BS {
+static FUNC_PREFIX void d3q27storagespecification_pack_BS(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
+         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
+         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_TSE {
+static FUNC_PREFIX void d3q27storagespecification_pack_TSE(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_SW {
+static FUNC_PREFIX void d3q27storagespecification_pack_SW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
+         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
+         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_BW {
+static FUNC_PREFIX void d3q27storagespecification_pack_BW(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
+         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
+         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_pack_E {
+static FUNC_PREFIX void d3q27storagespecification_pack_E(double * RESTRICT  _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+   {
+      double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
+      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
+         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
+         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
+         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
+         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
+         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
+         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
+         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
+         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+         {
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+            _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_TSE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TSE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_T {
+static FUNC_PREFIX void d3q27storagespecification_unpack_T(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
+         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
+         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
+         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
+         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
+         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
+         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
+         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
+         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
+            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
+            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
+            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
+            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
+            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
+            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
+            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
+            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_TN {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TN(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
+         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
+         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
+            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
+            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_SW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_SW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
+         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
+         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
+            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
+            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_TNE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TNE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_BN {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BN(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
+         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
+         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
+            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
+            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_W {
+static FUNC_PREFIX void d3q27storagespecification_unpack_W(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
+         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
+         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
+         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
+         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
+         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
+         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
+         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
+         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
+            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
+            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
+            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
+            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
+            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
+            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
+            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
+            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_E {
+static FUNC_PREFIX void d3q27storagespecification_unpack_E(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
+         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
+         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
+         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
+         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
+         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
+         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
+         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
+         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
+            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
+            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
+            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
+            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
+            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
+            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
+            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
+            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_BNE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BNE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_TNW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TNW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_BSE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BSE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_BSW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BSW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_SE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_SE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
+         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
+         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
+            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
+            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_N {
+static FUNC_PREFIX void d3q27storagespecification_unpack_N(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
+         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
+         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
+         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
+         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
+         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
+         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
+         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
+         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
+            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
+            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
+            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
+            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
+            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
+            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
+            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
+            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_NE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_NE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
+         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
+         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
+            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
+            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_TE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
+         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
+         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
+            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
+            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_B {
+static FUNC_PREFIX void d3q27storagespecification_unpack_B(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
+         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
+         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
+         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
+         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
+         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
+         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
+         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
+         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
+            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
+            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
+            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
+            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
+            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
+            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
+            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
+            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_NW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_NW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
+         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
+         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
+            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
+            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_S {
+static FUNC_PREFIX void d3q27storagespecification_unpack_S(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
+         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
+         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
+         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
+         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
+         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
+         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
+         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
+         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
+            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
+            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
+            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
+            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
+            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
+            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
+            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
+            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_TSW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TSW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_BE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BE(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
+         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
+         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
+            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
+            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_BS {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BS(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
+         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
+         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
+            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
+            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_BW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
+         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
+         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
+            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
+            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_TS {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TS(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
+         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
+         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
+            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
+            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_BNW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BNW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_unpack_TW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TW(const double * RESTRICT const _data_buffer, double * RESTRICT  _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
+      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
+         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
+         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
+            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
+            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_SE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_SE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
+         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
+         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
+         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
+         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
+         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_TS {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TS(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
+         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
+         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
+         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
+         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
+         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_BNW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BNW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
+         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_TSW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TSW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
+         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_TE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
+         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
+         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
+         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
+         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
+         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_TNE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TNE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
+         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_BS {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BS(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
+         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
+         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
+         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
+         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
+         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_W {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_W(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
+         double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
+         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
+         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
+         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
+         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
+         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
+         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
+         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
+         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
+         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
+         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
+         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
+         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
+         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
+         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
+         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
+         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_TSE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TSE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
+         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_NE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_NE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
+         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
+         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
+         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
+         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
+         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_B {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_B(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
+         double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
+         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
+         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
+         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
+         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
+         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
+         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
+         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
+         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
+         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
+         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
+         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
+         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
+         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
+         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
+         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
+         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_TNW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TNW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
+         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_NW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_NW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
+         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
+         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
+         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
+         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
+         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_BN {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BN(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
+         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
+         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
+         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
+         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
+         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_TW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
+         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
+         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
+         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
+         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
+         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_BW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
+         double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
+         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
+         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
+         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
+         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_SW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_SW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
+         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
+         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
+         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
+         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
+         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_T {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_T(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
+         double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
+         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
+         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
+         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
+         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
+         double * RESTRICT  _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
+         double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
+         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
+         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
+         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
+         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
+         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
+         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
+         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
+         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
+         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
+         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_BSW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BSW(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
+         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_S {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_S(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
+         double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
+         double * RESTRICT  _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
+         double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
+         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
+         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
+         double * RESTRICT  _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
+         double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
+         double * RESTRICT  _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
+         double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
+         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
+         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
+         double * RESTRICT  _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
+         double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
+         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
+         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
+         double * RESTRICT  _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
+         double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_TN {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TN(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
+         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
+         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
+         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
+         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
+         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_E {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_E(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
+         double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
+         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
+         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
+         double * RESTRICT  _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
+         double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
+         double * RESTRICT  _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
+         double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
+         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
+         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
+         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
+         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
+         double * RESTRICT  _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
+         double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
+         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
+         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
+         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
+         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_N {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_N(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
+         double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
+         double * RESTRICT  _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
+         double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
+         double * RESTRICT  _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
+         double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
+         double * RESTRICT  _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
+         double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
+         double * RESTRICT  _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
+         double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
+         double * RESTRICT  _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
+         double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
+         double * RESTRICT  _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
+         double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
+         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
+         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
+         double * RESTRICT  _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
+         double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_BSE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BSE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
+         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_BE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
+      double * RESTRICT  _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
+         double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
+         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
+         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
+         double * RESTRICT  _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
+         double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+            _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+namespace internal_d3q27storagespecification_localCopy_BNE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BNE(double * RESTRICT  _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+{
+   for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+   {
+      double * RESTRICT  _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
+      double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
+         double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
+         for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+         {
+            _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+         }
+      }
+   }
+}
+}
+
+
+
+
+/*************************************************************************************
+ *                                 Kernel Wrappers
+*************************************************************************************/
+
+namespace walberla {
+namespace lbm {
+
+   void D3Q27StorageSpecification::PackKernels::packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer) const
+   {
+      double * buffer = reinterpret_cast<double*>(outBuffer);
+      double * RESTRICT  _data_buffer = buffer;
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      double * RESTRICT const _data_pdfs_src = pdfs_src->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+      const int64_t _size_pdfs_src_0 = int64_t(int64_c(ci.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+      const int64_t _size_pdfs_src_1 = int64_t(int64_c(ci.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+      const int64_t _size_pdfs_src_2 = int64_t(int64_c(ci.zSize()) + 0);
+      const int64_t _stride_pdfs_src_0 = int64_t(pdfs_src->xStride());
+      const int64_t _stride_pdfs_src_1 = int64_t(pdfs_src->yStride());
+      const int64_t _stride_pdfs_src_2 = int64_t(pdfs_src->zStride());
+      const int64_t _stride_pdfs_src_3 = int64_t(1 * int64_t(pdfs_src->fStride()));
+      internal_d3q27storagespecification_pack_ALL::d3q27storagespecification_pack_ALL(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+   }
+
+
+   void D3Q27StorageSpecification::PackKernels::unpackAll(PdfField_T * pdfs_dst, CellInterval & ci, unsigned char * inBuffer) const
+   {
+      double * buffer = reinterpret_cast<double*>(inBuffer);
+      double * RESTRICT const _data_buffer = buffer;
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      double * RESTRICT  _data_pdfs_dst = pdfs_dst->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+      const int64_t _size_pdfs_dst_0 = int64_t(int64_c(ci.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+      const int64_t _size_pdfs_dst_1 = int64_t(int64_c(ci.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+      const int64_t _size_pdfs_dst_2 = int64_t(int64_c(ci.zSize()) + 0);
+      const int64_t _stride_pdfs_dst_0 = int64_t(pdfs_dst->xStride());
+      const int64_t _stride_pdfs_dst_1 = int64_t(pdfs_dst->yStride());
+      const int64_t _stride_pdfs_dst_2 = int64_t(pdfs_dst->zStride());
+      const int64_t _stride_pdfs_dst_3 = int64_t(1 * int64_t(pdfs_dst->fStride()));
+      internal_d3q27storagespecification_unpack_ALL::d3q27storagespecification_unpack_ALL(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+   }
+
+
+   void D3Q27StorageSpecification::PackKernels::localCopyAll(PdfField_T * pdfs_src, CellInterval & srcInterval, PdfField_T * pdfs_dst, CellInterval & dstInterval) const
+   {
+      WALBERLA_ASSERT_EQUAL(srcInterval.xSize(), dstInterval.xSize())
+      WALBERLA_ASSERT_EQUAL(srcInterval.ySize(), dstInterval.ySize())
+      WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize())
+
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.xMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.yMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.zMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      double * RESTRICT  _data_pdfs_dst = pdfs_dst->dataAt(dstInterval.xMin(), dstInterval.yMin(), dstInterval.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.xMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.yMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.zMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      double * RESTRICT const _data_pdfs_src = pdfs_src->dataAt(srcInterval.xMin(), srcInterval.yMin(), srcInterval.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->xSizeWithGhostLayer(), int64_t(int64_c(dstInterval.xSize()) + 0))
+      const int64_t _size_pdfs_dst_0 = int64_t(int64_c(dstInterval.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->ySizeWithGhostLayer(), int64_t(int64_c(dstInterval.ySize()) + 0))
+      const int64_t _size_pdfs_dst_1 = int64_t(int64_c(dstInterval.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->zSizeWithGhostLayer(), int64_t(int64_c(dstInterval.zSize()) + 0))
+      const int64_t _size_pdfs_dst_2 = int64_t(int64_c(dstInterval.zSize()) + 0);
+      const int64_t _stride_pdfs_dst_0 = int64_t(pdfs_dst->xStride());
+      const int64_t _stride_pdfs_dst_1 = int64_t(pdfs_dst->yStride());
+      const int64_t _stride_pdfs_dst_2 = int64_t(pdfs_dst->zStride());
+      const int64_t _stride_pdfs_dst_3 = int64_t(1 * int64_t(pdfs_dst->fStride()));
+      const int64_t _stride_pdfs_src_0 = int64_t(pdfs_src->xStride());
+      const int64_t _stride_pdfs_src_1 = int64_t(pdfs_src->yStride());
+      const int64_t _stride_pdfs_src_2 = int64_t(pdfs_src->zStride());
+      const int64_t _stride_pdfs_src_3 = int64_t(1 * int64_t(pdfs_src->fStride()));
+      internal_d3q27storagespecification_localCopy_ALL::d3q27storagespecification_localCopy_ALL(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+   }
+
+   void D3Q27StorageSpecification::PackKernels::packDirection(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer, stencil::Direction dir) const
+   {
+      double * buffer = reinterpret_cast<double*>(outBuffer);
+      double * RESTRICT  _data_buffer = buffer;
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      double * RESTRICT const _data_pdfs_src = pdfs_src->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+      const int64_t _size_pdfs_src_0 = int64_t(int64_c(ci.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+      const int64_t _size_pdfs_src_1 = int64_t(int64_c(ci.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_src->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+      const int64_t _size_pdfs_src_2 = int64_t(int64_c(ci.zSize()) + 0);
+      const int64_t _stride_pdfs_src_0 = int64_t(pdfs_src->xStride());
+      const int64_t _stride_pdfs_src_1 = int64_t(pdfs_src->yStride());
+      const int64_t _stride_pdfs_src_2 = int64_t(pdfs_src->zStride());
+      const int64_t _stride_pdfs_src_3 = int64_t(1 * int64_t(pdfs_src->fStride()));
+      switch (dir) {
+          case stencil::N : {
+              internal_d3q27storagespecification_pack_N::d3q27storagespecification_pack_N(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::S : {
+              internal_d3q27storagespecification_pack_S::d3q27storagespecification_pack_S(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::W : {
+              internal_d3q27storagespecification_pack_W::d3q27storagespecification_pack_W(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::E : {
+              internal_d3q27storagespecification_pack_E::d3q27storagespecification_pack_E(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::T : {
+              internal_d3q27storagespecification_pack_T::d3q27storagespecification_pack_T(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::B : {
+              internal_d3q27storagespecification_pack_B::d3q27storagespecification_pack_B(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::NW : {
+              internal_d3q27storagespecification_pack_NW::d3q27storagespecification_pack_NW(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::NE : {
+              internal_d3q27storagespecification_pack_NE::d3q27storagespecification_pack_NE(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::SW : {
+              internal_d3q27storagespecification_pack_SW::d3q27storagespecification_pack_SW(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::SE : {
+              internal_d3q27storagespecification_pack_SE::d3q27storagespecification_pack_SE(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TN : {
+              internal_d3q27storagespecification_pack_TN::d3q27storagespecification_pack_TN(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TS : {
+              internal_d3q27storagespecification_pack_TS::d3q27storagespecification_pack_TS(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TW : {
+              internal_d3q27storagespecification_pack_TW::d3q27storagespecification_pack_TW(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TE : {
+              internal_d3q27storagespecification_pack_TE::d3q27storagespecification_pack_TE(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BN : {
+              internal_d3q27storagespecification_pack_BN::d3q27storagespecification_pack_BN(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BS : {
+              internal_d3q27storagespecification_pack_BS::d3q27storagespecification_pack_BS(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BW : {
+              internal_d3q27storagespecification_pack_BW::d3q27storagespecification_pack_BW(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BE : {
+              internal_d3q27storagespecification_pack_BE::d3q27storagespecification_pack_BE(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TNE : {
+              internal_d3q27storagespecification_pack_TNE::d3q27storagespecification_pack_TNE(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TNW : {
+              internal_d3q27storagespecification_pack_TNW::d3q27storagespecification_pack_TNW(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TSE : {
+              internal_d3q27storagespecification_pack_TSE::d3q27storagespecification_pack_TSE(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TSW : {
+              internal_d3q27storagespecification_pack_TSW::d3q27storagespecification_pack_TSW(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BNE : {
+              internal_d3q27storagespecification_pack_BNE::d3q27storagespecification_pack_BNE(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BNW : {
+              internal_d3q27storagespecification_pack_BNW::d3q27storagespecification_pack_BNW(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BSE : {
+              internal_d3q27storagespecification_pack_BSE::d3q27storagespecification_pack_BSE(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BSW : {
+              internal_d3q27storagespecification_pack_BSW::d3q27storagespecification_pack_BSW(_data_buffer, _data_pdfs_src, _size_pdfs_src_0, _size_pdfs_src_1, _size_pdfs_src_2, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }default: break; 
+      }
+   }
+
+   void D3Q27StorageSpecification::PackKernels::unpackDirection(PdfField_T * pdfs_dst, CellInterval & ci, unsigned char * inBuffer, stencil::Direction dir) const
+   {
+      double * buffer = reinterpret_cast<double*>(inBuffer);
+      double * RESTRICT const _data_buffer = buffer;
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      double * RESTRICT  _data_pdfs_dst = pdfs_dst->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+      const int64_t _size_pdfs_dst_0 = int64_t(int64_c(ci.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+      const int64_t _size_pdfs_dst_1 = int64_t(int64_c(ci.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+      const int64_t _size_pdfs_dst_2 = int64_t(int64_c(ci.zSize()) + 0);
+      const int64_t _stride_pdfs_dst_0 = int64_t(pdfs_dst->xStride());
+      const int64_t _stride_pdfs_dst_1 = int64_t(pdfs_dst->yStride());
+      const int64_t _stride_pdfs_dst_2 = int64_t(pdfs_dst->zStride());
+      const int64_t _stride_pdfs_dst_3 = int64_t(1 * int64_t(pdfs_dst->fStride()));
+      switch (dir) {
+          case stencil::N : {
+              internal_d3q27storagespecification_unpack_N::d3q27storagespecification_unpack_N(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::S : {
+              internal_d3q27storagespecification_unpack_S::d3q27storagespecification_unpack_S(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::W : {
+              internal_d3q27storagespecification_unpack_W::d3q27storagespecification_unpack_W(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::E : {
+              internal_d3q27storagespecification_unpack_E::d3q27storagespecification_unpack_E(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::T : {
+              internal_d3q27storagespecification_unpack_T::d3q27storagespecification_unpack_T(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::B : {
+              internal_d3q27storagespecification_unpack_B::d3q27storagespecification_unpack_B(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::NW : {
+              internal_d3q27storagespecification_unpack_NW::d3q27storagespecification_unpack_NW(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::NE : {
+              internal_d3q27storagespecification_unpack_NE::d3q27storagespecification_unpack_NE(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::SW : {
+              internal_d3q27storagespecification_unpack_SW::d3q27storagespecification_unpack_SW(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::SE : {
+              internal_d3q27storagespecification_unpack_SE::d3q27storagespecification_unpack_SE(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TN : {
+              internal_d3q27storagespecification_unpack_TN::d3q27storagespecification_unpack_TN(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TS : {
+              internal_d3q27storagespecification_unpack_TS::d3q27storagespecification_unpack_TS(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TW : {
+              internal_d3q27storagespecification_unpack_TW::d3q27storagespecification_unpack_TW(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TE : {
+              internal_d3q27storagespecification_unpack_TE::d3q27storagespecification_unpack_TE(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BN : {
+              internal_d3q27storagespecification_unpack_BN::d3q27storagespecification_unpack_BN(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BS : {
+              internal_d3q27storagespecification_unpack_BS::d3q27storagespecification_unpack_BS(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BW : {
+              internal_d3q27storagespecification_unpack_BW::d3q27storagespecification_unpack_BW(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BE : {
+              internal_d3q27storagespecification_unpack_BE::d3q27storagespecification_unpack_BE(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TNE : {
+              internal_d3q27storagespecification_unpack_TNE::d3q27storagespecification_unpack_TNE(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TNW : {
+              internal_d3q27storagespecification_unpack_TNW::d3q27storagespecification_unpack_TNW(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TSE : {
+              internal_d3q27storagespecification_unpack_TSE::d3q27storagespecification_unpack_TSE(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::TSW : {
+              internal_d3q27storagespecification_unpack_TSW::d3q27storagespecification_unpack_TSW(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BNE : {
+              internal_d3q27storagespecification_unpack_BNE::d3q27storagespecification_unpack_BNE(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BNW : {
+              internal_d3q27storagespecification_unpack_BNW::d3q27storagespecification_unpack_BNW(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BSE : {
+              internal_d3q27storagespecification_unpack_BSE::d3q27storagespecification_unpack_BSE(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }
+          case stencil::BSW : {
+              internal_d3q27storagespecification_unpack_BSW::d3q27storagespecification_unpack_BSW(_data_buffer, _data_pdfs_dst, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3);
+              break;
+          }default: break; 
+      }
+   }
+
+   void D3Q27StorageSpecification::PackKernels::localCopyDirection(PdfField_T * pdfs_src, CellInterval & srcInterval, PdfField_T * pdfs_dst, CellInterval & dstInterval, stencil::Direction dir) const
+   {
+      WALBERLA_ASSERT_EQUAL(srcInterval.xSize(), dstInterval.xSize())
+      WALBERLA_ASSERT_EQUAL(srcInterval.ySize(), dstInterval.ySize())
+      WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize())
+
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.xMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.yMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(dstInterval.zMin(), -int_c(pdfs_dst->nrOfGhostLayers()))
+      double * RESTRICT  _data_pdfs_dst = pdfs_dst->dataAt(dstInterval.xMin(), dstInterval.yMin(), dstInterval.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.xMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.yMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      WALBERLA_ASSERT_GREATER_EQUAL(srcInterval.zMin(), -int_c(pdfs_src->nrOfGhostLayers()))
+      double * RESTRICT const _data_pdfs_src = pdfs_src->dataAt(srcInterval.xMin(), srcInterval.yMin(), srcInterval.zMin(), 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->xSizeWithGhostLayer(), int64_t(int64_c(dstInterval.xSize()) + 0))
+      const int64_t _size_pdfs_dst_0 = int64_t(int64_c(dstInterval.xSize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->ySizeWithGhostLayer(), int64_t(int64_c(dstInterval.ySize()) + 0))
+      const int64_t _size_pdfs_dst_1 = int64_t(int64_c(dstInterval.ySize()) + 0);
+      WALBERLA_ASSERT_GREATER_EQUAL(pdfs_dst->zSizeWithGhostLayer(), int64_t(int64_c(dstInterval.zSize()) + 0))
+      const int64_t _size_pdfs_dst_2 = int64_t(int64_c(dstInterval.zSize()) + 0);
+      const int64_t _stride_pdfs_dst_0 = int64_t(pdfs_dst->xStride());
+      const int64_t _stride_pdfs_dst_1 = int64_t(pdfs_dst->yStride());
+      const int64_t _stride_pdfs_dst_2 = int64_t(pdfs_dst->zStride());
+      const int64_t _stride_pdfs_dst_3 = int64_t(1 * int64_t(pdfs_dst->fStride()));
+      const int64_t _stride_pdfs_src_0 = int64_t(pdfs_src->xStride());
+      const int64_t _stride_pdfs_src_1 = int64_t(pdfs_src->yStride());
+      const int64_t _stride_pdfs_src_2 = int64_t(pdfs_src->zStride());
+      const int64_t _stride_pdfs_src_3 = int64_t(1 * int64_t(pdfs_src->fStride()));
+      switch (dir) {
+          case stencil::N : {
+              internal_d3q27storagespecification_localCopy_N::d3q27storagespecification_localCopy_N(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::S : {
+              internal_d3q27storagespecification_localCopy_S::d3q27storagespecification_localCopy_S(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::W : {
+              internal_d3q27storagespecification_localCopy_W::d3q27storagespecification_localCopy_W(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::E : {
+              internal_d3q27storagespecification_localCopy_E::d3q27storagespecification_localCopy_E(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::T : {
+              internal_d3q27storagespecification_localCopy_T::d3q27storagespecification_localCopy_T(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::B : {
+              internal_d3q27storagespecification_localCopy_B::d3q27storagespecification_localCopy_B(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::NW : {
+              internal_d3q27storagespecification_localCopy_NW::d3q27storagespecification_localCopy_NW(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::NE : {
+              internal_d3q27storagespecification_localCopy_NE::d3q27storagespecification_localCopy_NE(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::SW : {
+              internal_d3q27storagespecification_localCopy_SW::d3q27storagespecification_localCopy_SW(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::SE : {
+              internal_d3q27storagespecification_localCopy_SE::d3q27storagespecification_localCopy_SE(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TN : {
+              internal_d3q27storagespecification_localCopy_TN::d3q27storagespecification_localCopy_TN(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TS : {
+              internal_d3q27storagespecification_localCopy_TS::d3q27storagespecification_localCopy_TS(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TW : {
+              internal_d3q27storagespecification_localCopy_TW::d3q27storagespecification_localCopy_TW(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TE : {
+              internal_d3q27storagespecification_localCopy_TE::d3q27storagespecification_localCopy_TE(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BN : {
+              internal_d3q27storagespecification_localCopy_BN::d3q27storagespecification_localCopy_BN(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BS : {
+              internal_d3q27storagespecification_localCopy_BS::d3q27storagespecification_localCopy_BS(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BW : {
+              internal_d3q27storagespecification_localCopy_BW::d3q27storagespecification_localCopy_BW(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BE : {
+              internal_d3q27storagespecification_localCopy_BE::d3q27storagespecification_localCopy_BE(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TNE : {
+              internal_d3q27storagespecification_localCopy_TNE::d3q27storagespecification_localCopy_TNE(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TNW : {
+              internal_d3q27storagespecification_localCopy_TNW::d3q27storagespecification_localCopy_TNW(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TSE : {
+              internal_d3q27storagespecification_localCopy_TSE::d3q27storagespecification_localCopy_TSE(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::TSW : {
+              internal_d3q27storagespecification_localCopy_TSW::d3q27storagespecification_localCopy_TSW(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BNE : {
+              internal_d3q27storagespecification_localCopy_BNE::d3q27storagespecification_localCopy_BNE(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BNW : {
+              internal_d3q27storagespecification_localCopy_BNW::d3q27storagespecification_localCopy_BNW(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BSE : {
+              internal_d3q27storagespecification_localCopy_BSE::d3q27storagespecification_localCopy_BSE(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }
+          case stencil::BSW : {
+              internal_d3q27storagespecification_localCopy_BSW::d3q27storagespecification_localCopy_BSW(_data_pdfs_dst, _data_pdfs_src, _size_pdfs_dst_0, _size_pdfs_dst_1, _size_pdfs_dst_2, _stride_pdfs_dst_0, _stride_pdfs_dst_1, _stride_pdfs_dst_2, _stride_pdfs_dst_3, _stride_pdfs_src_0, _stride_pdfs_src_1, _stride_pdfs_src_2, _stride_pdfs_src_3);
+              break;
+          }default: break; 
+      }
+   }
+
+   
+}  // namespace lbm
+}  // namespace walberla
\ No newline at end of file
diff --git a/src/lbm_generated/storage_specification/D3Q27StorageSpecification.h b/src/lbm_generated/storage_specification/D3Q27StorageSpecification.h
new file mode 100644
index 0000000000000000000000000000000000000000..49aa692873b51ba5f25590faf9faffd9a018afdc
--- /dev/null
+++ b/src/lbm_generated/storage_specification/D3Q27StorageSpecification.h
@@ -0,0 +1,147 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file D3Q27StorageSpecification.h
+//! \\author lbmpy
+//======================================================================================================================
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "core/mpi/SendBuffer.h"
+#include "core/mpi/RecvBuffer.h"
+
+#include "domain_decomposition/IBlock.h"
+#include "field/GhostLayerField.h"
+
+#include "stencil/D3Q27.h"
+#include "stencil/Directions.h"
+
+#define FUNC_PREFIX
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if defined WALBERLA_CXX_COMPILER_IS_GNU || defined WALBERLA_CXX_COMPILER_IS_CLANG
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#endif
+
+namespace walberla
+{
+namespace lbm{
+
+class D3Q27StorageSpecification
+{
+ public:
+   // Used lattice stencil
+   using Stencil = stencil::D3Q27;
+   // Lattice stencil used for the communication (should be used to define which block directions need to be communicated)
+   using CommunicationStencil = stencil::D3Q27;
+
+   // If false used correction: Lattice Boltzmann Model for the Incompressible Navier–Stokes Equation, He 1997
+   static const bool compressible = false;
+   // Cut off for the lattice Boltzmann equilibrium
+   static const int equilibriumAccuracyOrder = 2;
+
+   // If streaming pattern is inplace (esotwist, aa, ...) or not (pull, push)
+   static const bool inplace = false;
+
+   // If true the background deviation (rho_0 = 1) is subtracted for the collision step.
+   static const bool zeroCenteredPDFs = true;
+   // If true the equilibrium is computed in regard to "delta_rho" and not the actual density "rho"
+   static const bool deviationOnlyEquilibrium = true;
+
+   // Compute kernels to pack and unpack MPI buffers
+   class PackKernels {
+
+    public:
+      using PdfField_T = field::GhostLayerField<double, 27>;
+      using value_type = typename PdfField_T::value_type;
+
+      
+
+      static const bool inplace = false;
+
+      /**
+       * Packs all pdfs from the given cell interval to the send buffer.
+       * */
+      void packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer) const;
+
+      /**
+       * Unpacks all pdfs from the send buffer to the given cell interval.
+       * */
+      void unpackAll(PdfField_T * pdfs_dst, CellInterval & ci, unsigned char * inBuffer) const;
+
+      /**
+       * Copies data between two blocks on the same process.
+       * All pdfs from the sending interval are copied onto the receiving interval.
+       * */
+      void localCopyAll(PdfField_T * pdfs_src, CellInterval & srcInterval, PdfField_T * pdfs_dst, CellInterval & dstInterval) const;
+
+      /**
+       * Packs only those populations streaming in directions aligned with the sending direction dir from the given cell interval.
+       * For example, in 2D, if dir == N, the pdfs streaming in directions NW, N, NE are packed.
+       * */
+      void packDirection(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer, stencil::Direction dir) const;
+
+      /**
+       * Unpacks only those populations streaming in directions aligned with the sending direction dir to the given cell interval.
+       * For example, in 2D, if dir == N, the pdfs streaming in directions NW, N, NE are unpacked.
+       * */
+      void unpackDirection(PdfField_T * pdfs_dst, CellInterval & ci, unsigned char * inBuffer, stencil::Direction dir) const;
+
+      /** Copies data between two blocks on the same process.
+        * PDFs streaming aligned with the direction dir are copied from the sending interval onto the receiving interval.
+        * */
+      void localCopyDirection(PdfField_T * pdfs_src, CellInterval & srcInterval, PdfField_T * pdfs_dst, CellInterval & dstInterval, stencil::Direction dir) const;
+
+      /**
+       * Returns the number of bytes that will be packed from / unpacked to the cell interval
+       * when using packDirection / unpackDirection
+       * @param ci  The cell interval
+       * @param dir The communication direction
+       * @return    The required size of the buffer, in bytes
+       * */
+      uint_t size (CellInterval & ci, stencil::Direction dir) const {
+         return ci.numCells() * sizes[dir] * sizeof(value_type);
+      }
+
+      /**
+       * Returns the number of bytes that will be packed from / unpacked to the cell interval
+       * when using packAll / unpackAll
+       * @param ci  The cell interval
+       * @return    The required size of the buffer, in bytes
+       * */
+      uint_t size (CellInterval & ci) const {
+         return ci.numCells() * 27 * sizeof(value_type);
+      }
+
+      
+
+    private:
+      const uint_t sizes[27] { 0, 9, 9, 9, 9, 9, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1 };
+   };
+
+};
+
+}} //lbm/walberla
\ No newline at end of file
diff --git a/src/lbm_generated/storage_specification/storage_specification_generation_script.py b/src/lbm_generated/storage_specification/storage_specification_generation_script.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7432ee70d6233edbd4c408199f1d89ae4fe1e6d
--- /dev/null
+++ b/src/lbm_generated/storage_specification/storage_specification_generation_script.py
@@ -0,0 +1,32 @@
+import sympy as sp
+
+from pystencils import Target
+
+from lbmpy.creationfunctions import create_lb_method
+from lbmpy import LBMConfig, Stencil, Method, LBStencil
+from pystencils_walberla import ManualCodeGenerationContext, generate_info_header
+from lbmpy_walberla.storage_specification import generate_lbm_storage_specification
+
+
+with ManualCodeGenerationContext(openmp=False, optimize_for_localhost=False,
+                                 mpi=True, double_accuracy=True, cuda=False) as ctx:
+
+    for stencil in [LBStencil(Stencil.D3Q19), LBStencil(Stencil.D3Q27)]:
+        target = Target.GPU if ctx.cuda else Target.CPU
+        data_type = "float64" if ctx.double_accuracy else "float32"
+
+        method = Method.SRT
+        relaxation_rate = sp.symbols("omega")
+        streaming_pattern = 'pull'
+        nonuniform = False
+
+        lbm_config = LBMConfig(stencil=stencil, method=method, relaxation_rate=relaxation_rate,
+                               streaming_pattern=streaming_pattern)
+
+        lb_method = create_lb_method(lbm_config=lbm_config)
+
+        storage_spec_name = f'{stencil.name}StorageSpecification'
+        generate_lbm_storage_specification(ctx, storage_spec_name, lb_method, lbm_config,
+                                           nonuniform=nonuniform, target=target, data_type=data_type)
+
+        ctx.write_all_files()
diff --git a/src/lbm_generated/sweep_collection/CMakeLists.txt b/src/lbm_generated/sweep_collection/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91fbfb9d64fa55c4f870875be3c58a3f65a06c98
--- /dev/null
+++ b/src/lbm_generated/sweep_collection/CMakeLists.txt
@@ -0,0 +1,7 @@
+target_sources( lbm_generated
+        PRIVATE
+        D3Q19SRT.h
+        D3Q19SRT.cpp
+        D3Q27SRT.h
+        D3Q27SRT.cpp
+        )
\ No newline at end of file
diff --git a/src/lbm_generated/sweep_collection/D3Q19SRT.cpp b/src/lbm_generated/sweep_collection/D3Q19SRT.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b2ed08360d0699e51e3e47e1906727ef739a2e17
--- /dev/null
+++ b/src/lbm_generated/sweep_collection/D3Q19SRT.cpp
@@ -0,0 +1,1012 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file D3Q19SRT.cpp
+//! \\author pystencils
+//======================================================================================================================
+#include "D3Q19SRT.h"
+
+#define FUNC_PREFIX
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL )
+#pragma warning push
+#pragma warning( disable :  1599 )
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+
+namespace internal_d3q19srt_kernel_streamCollide {
+static FUNC_PREFIX void d3q19srt_kernel_streamCollide(double * RESTRICT const _data_pdfs, double * RESTRICT  _data_pdfs_tmp, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_pdfs_tmp_0, int64_t const _stride_pdfs_tmp_1, int64_t const _stride_pdfs_tmp_2, int64_t const _stride_pdfs_tmp_3, double omega)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
+   {
+      double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
+      double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
+      double * RESTRICT  _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
+         double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
+         double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
+         double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
+         double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
+         double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
+         double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
+         double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
+         double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
+         double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
+         double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
+         double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
+         double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
+         double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
+         double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
+         double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
+         double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
+         double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
+         double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
+         double * RESTRICT  _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
+         double * RESTRICT  _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
+         double * RESTRICT  _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
+         double * RESTRICT  _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
+         double * RESTRICT  _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
+         double * RESTRICT  _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
+         double * RESTRICT  _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
+         double * RESTRICT  _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
+         double * RESTRICT  _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
+         double * RESTRICT  _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
+         double * RESTRICT  _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
+         double * RESTRICT  _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
+         double * RESTRICT  _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
+         double * RESTRICT  _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
+         double * RESTRICT  _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
+         double * RESTRICT  _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
+         double * RESTRICT  _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
+         double * RESTRICT  _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
+         double * RESTRICT  _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
+         {
+            const double vel0Term = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double vel1Term = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
+            const double vel2Term = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
+            const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
+            const double u_0 = vel0Term - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double u_1 = vel1Term - 1.0*_data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double u_2 = vel2Term - 1.0*_data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double u0Mu1 = u_0 + u_1*-1.0;
+            const double u0Pu1 = u_0 + u_1;
+            const double u1Pu2 = u_1 + u_2;
+            const double u1Mu2 = u_1 + u_2*-1.0;
+            const double u0Mu2 = u_0 + u_2*-1.0;
+            const double u0Pu2 = u_0 + u_2;
+            const double f_eq_common = delta_rho - 1.0*(u_0*u_0) - 1.0*(u_1*u_1) - 1.0*(u_2*u_2);
+            _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.33333333333333331 - 1.0*_data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*0.16666666666666666 - 1.0*_data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*-0.16666666666666666 - 1.0*_data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*-0.16666666666666666 - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0)) + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*0.16666666666666666 - 1.0*_data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0)) + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*0.16666666666666666 - 1.0*_data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*-0.16666666666666666 - 1.0*_data_pdfs_21_36_10[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*-0.083333333333333329 - 1.0*_data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*0.083333333333333329 - 1.0*_data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*-0.083333333333333329 - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*0.083333333333333329 - 1.0*_data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*0.083333333333333329 - 1.0*_data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*-0.083333333333333329 - 1.0*_data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*-0.083333333333333329 - 1.0*_data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*0.083333333333333329 - 1.0*_data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*0.083333333333333329 - 1.0*_data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*-0.083333333333333329 - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*-0.083333333333333329 - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*0.083333333333333329 - 1.0*_data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+         }
+      }
+   }
+}
+}
+
+
+namespace internal_d3q19srt_kernel_collide {
+static FUNC_PREFIX void d3q19srt_kernel_collide(double * RESTRICT  _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double omega)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
+   {
+      double * RESTRICT  _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
+      double * RESTRICT  _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
+         double * RESTRICT  _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
+         double * RESTRICT  _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
+         double * RESTRICT  _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
+         double * RESTRICT  _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
+         double * RESTRICT  _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
+         double * RESTRICT  _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
+         double * RESTRICT  _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
+         double * RESTRICT  _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
+         double * RESTRICT  _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
+         double * RESTRICT  _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
+         double * RESTRICT  _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
+         double * RESTRICT  _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
+         double * RESTRICT  _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
+         double * RESTRICT  _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
+         double * RESTRICT  _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
+         double * RESTRICT  _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
+         double * RESTRICT  _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
+         double * RESTRICT  _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
+         {
+            const double xi_1 = _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0];
+            const double xi_2 = _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0];
+            const double xi_3 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
+            const double xi_4 = _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0];
+            const double xi_5 = _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0];
+            const double xi_6 = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0];
+            const double xi_7 = _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0];
+            const double xi_8 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0];
+            const double xi_9 = _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
+            const double xi_10 = _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0];
+            const double xi_11 = _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0];
+            const double xi_12 = _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
+            const double xi_13 = _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0];
+            const double xi_14 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0];
+            const double xi_15 = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0];
+            const double xi_16 = _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0];
+            const double xi_17 = _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0];
+            const double xi_18 = _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0];
+            const double xi_19 = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0];
+            const double vel0Term = xi_15 + xi_17 + xi_2 + xi_8 + xi_9;
+            const double vel1Term = xi_1 + xi_4 + xi_5 + xi_6;
+            const double vel2Term = xi_11 + xi_13 + xi_19;
+            const double delta_rho = vel0Term + vel1Term + vel2Term + xi_10 + xi_12 + xi_14 + xi_16 + xi_18 + xi_3 + xi_7;
+            const double u_0 = vel0Term + xi_11*-1.0 + xi_12*-1.0 + xi_14*-1.0 + xi_16*-1.0 + xi_5*-1.0;
+            const double u_1 = vel1Term + xi_12*-1.0 + xi_15*-1.0 + xi_18*-1.0 + xi_19*-1.0 + xi_7*-1.0 + xi_9;
+            const double u_2 = vel2Term + xi_1*-1.0 + xi_10*-1.0 + xi_16*-1.0 + xi_17 + xi_18*-1.0 + xi_2*-1.0 + xi_6;
+            const double u0Mu1 = u_0 + u_1*-1.0;
+            const double u0Pu1 = u_0 + u_1;
+            const double u1Pu2 = u_1 + u_2;
+            const double u1Mu2 = u_1 + u_2*-1.0;
+            const double u0Mu2 = u_0 + u_2*-1.0;
+            const double u0Pu2 = u_0 + u_2;
+            const double f_eq_common = delta_rho - 1.0*(u_0*u_0) - 1.0*(u_1*u_1) - 1.0*(u_2*u_2);
+            _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.33333333333333331 + xi_3*-1.0) + xi_3;
+            _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*0.16666666666666666 + xi_4*-1.0 + 0.33333333333333331*(u_1*u_1)) + xi_4;
+            _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*-0.16666666666666666 + xi_7*-1.0 + 0.33333333333333331*(u_1*u_1)) + xi_7;
+            _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*-0.16666666666666666 + xi_14*-1.0 + 0.33333333333333331*(u_0*u_0)) + xi_14;
+            _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*0.16666666666666666 + xi_8*-1.0 + 0.33333333333333331*(u_0*u_0)) + xi_8;
+            _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*0.16666666666666666 + xi_13*-1.0 + 0.33333333333333331*(u_2*u_2)) + xi_13;
+            _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*-0.16666666666666666 + xi_10*-1.0 + 0.33333333333333331*(u_2*u_2)) + xi_10;
+            _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*-0.083333333333333329 + xi_5*-1.0 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + xi_5;
+            _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*0.083333333333333329 + xi_9*-1.0 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + xi_9;
+            _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*-0.083333333333333329 + xi_12*-1.0 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + xi_12;
+            _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*0.083333333333333329 + xi_15*-1.0 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + xi_15;
+            _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*0.083333333333333329 + xi_6*-1.0 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + xi_6;
+            _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*-0.083333333333333329 + xi_19*-1.0 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + xi_19;
+            _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*-0.083333333333333329 + xi_11*-1.0 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + xi_11;
+            _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*0.083333333333333329 + xi_17*-1.0 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + xi_17;
+            _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*0.083333333333333329 + xi_1*-1.0 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + xi_1;
+            _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*-0.083333333333333329 + xi_18*-1.0 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + xi_18;
+            _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*-0.083333333333333329 + xi_16*-1.0 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + xi_16;
+            _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*0.083333333333333329 + xi_2*-1.0 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + xi_2;
+         }
+      }
+   }
+}
+}
+
+
+namespace internal_d3q19srt_kernel_stream {
+static FUNC_PREFIX void d3q19srt_kernel_stream(double * RESTRICT const _data_pdfs, double * RESTRICT  _data_pdfs_tmp, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_pdfs_tmp_0, int64_t const _stride_pdfs_tmp_1, int64_t const _stride_pdfs_tmp_2, int64_t const _stride_pdfs_tmp_3)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
+   {
+      double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
+      double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
+      double * RESTRICT  _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
+         double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
+         double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
+         double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
+         double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
+         double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
+         double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
+         double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
+         double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
+         double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
+         double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
+         double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
+         double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
+         double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
+         double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
+         double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
+         double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
+         double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
+         double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
+         double * RESTRICT  _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
+         double * RESTRICT  _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
+         double * RESTRICT  _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
+         double * RESTRICT  _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
+         double * RESTRICT  _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
+         double * RESTRICT  _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
+         double * RESTRICT  _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
+         double * RESTRICT  _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
+         double * RESTRICT  _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
+         double * RESTRICT  _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
+         double * RESTRICT  _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
+         double * RESTRICT  _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
+         double * RESTRICT  _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
+         double * RESTRICT  _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
+         double * RESTRICT  _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
+         double * RESTRICT  _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
+         double * RESTRICT  _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
+         double * RESTRICT  _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
+         double * RESTRICT  _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
+         {
+            const double streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
+            const double streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
+            const double streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
+            const double streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
+            const double streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
+            const double streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
+            const double streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
+            const double streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
+            const double streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
+            const double streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = streamed_0;
+            _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = streamed_1;
+            _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = streamed_2;
+            _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = streamed_3;
+            _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = streamed_4;
+            _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = streamed_5;
+            _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = streamed_6;
+            _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = streamed_7;
+            _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = streamed_8;
+            _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = streamed_9;
+            _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = streamed_10;
+            _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = streamed_11;
+            _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = streamed_12;
+            _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = streamed_13;
+            _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = streamed_14;
+            _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = streamed_15;
+            _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = streamed_16;
+            _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = streamed_17;
+            _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = streamed_18;
+         }
+      }
+   }
+}
+}
+
+
+namespace internal_d3q19srt_kernel_streamOnlyNoAdvancement {
+static FUNC_PREFIX void d3q19srt_kernel_streamOnlyNoAdvancement(double * RESTRICT const _data_pdfs, double * RESTRICT  _data_pdfs_tmp, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_pdfs_tmp_0, int64_t const _stride_pdfs_tmp_1, int64_t const _stride_pdfs_tmp_2, int64_t const _stride_pdfs_tmp_3)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
+   {
+      double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
+      double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
+      double * RESTRICT  _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
+         double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
+         double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
+         double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
+         double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
+         double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
+         double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
+         double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
+         double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
+         double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
+         double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
+         double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
+         double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
+         double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
+         double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
+         double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
+         double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
+         double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
+         double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
+         double * RESTRICT  _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
+         double * RESTRICT  _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
+         double * RESTRICT  _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
+         double * RESTRICT  _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
+         double * RESTRICT  _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
+         double * RESTRICT  _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
+         double * RESTRICT  _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
+         double * RESTRICT  _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
+         double * RESTRICT  _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
+         double * RESTRICT  _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
+         double * RESTRICT  _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
+         double * RESTRICT  _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
+         double * RESTRICT  _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
+         double * RESTRICT  _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
+         double * RESTRICT  _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
+         double * RESTRICT  _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
+         double * RESTRICT  _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
+         double * RESTRICT  _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
+         double * RESTRICT  _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
+         {
+            const double streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
+            const double streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
+            const double streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
+            const double streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
+            const double streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
+            const double streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
+            const double streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
+            const double streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
+            const double streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
+            const double streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = streamed_0;
+            _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = streamed_1;
+            _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = streamed_2;
+            _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = streamed_3;
+            _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = streamed_4;
+            _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = streamed_5;
+            _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = streamed_6;
+            _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = streamed_7;
+            _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = streamed_8;
+            _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = streamed_9;
+            _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = streamed_10;
+            _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = streamed_11;
+            _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = streamed_12;
+            _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = streamed_13;
+            _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = streamed_14;
+            _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = streamed_15;
+            _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = streamed_16;
+            _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = streamed_17;
+            _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = streamed_18;
+         }
+      }
+   }
+}
+}
+
+
+namespace internal_d3q19srt_kernel_initialise {
+static FUNC_PREFIX void d3q19srt_kernel_initialise(double * RESTRICT const _data_density, double * RESTRICT  _data_pdfs, double * RESTRICT const _data_velocity, int64_t const _size_density_0, int64_t const _size_density_1, int64_t const _size_density_2, int64_t const _stride_density_0, int64_t const _stride_density_1, int64_t const _stride_density_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_velocity_0, int64_t const _stride_velocity_1, int64_t const _stride_velocity_2, int64_t const _stride_velocity_3)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_density_2; ctr_2 += 1)
+   {
+      double * RESTRICT _data_density_20_30 = _data_density + _stride_density_2*ctr_2;
+      double * RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2*ctr_2;
+      double * RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2*ctr_2 + _stride_velocity_3;
+      double * RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3;
+      double * RESTRICT  _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
+      double * RESTRICT  _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_density_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_density_20_30_10 = _stride_density_1*ctr_1 + _data_density_20_30;
+         double * RESTRICT _data_velocity_20_30_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_30;
+         double * RESTRICT _data_velocity_20_31_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_31;
+         double * RESTRICT _data_velocity_20_32_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_32;
+         double * RESTRICT  _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
+         double * RESTRICT  _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
+         double * RESTRICT  _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
+         double * RESTRICT  _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
+         double * RESTRICT  _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
+         double * RESTRICT  _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
+         double * RESTRICT  _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
+         double * RESTRICT  _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
+         double * RESTRICT  _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
+         double * RESTRICT  _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
+         double * RESTRICT  _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
+         double * RESTRICT  _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
+         double * RESTRICT  _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
+         double * RESTRICT  _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
+         double * RESTRICT  _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
+         double * RESTRICT  _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
+         double * RESTRICT  _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
+         double * RESTRICT  _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
+         double * RESTRICT  _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
+         for (int64_t ctr_0 = 0; ctr_0 < _size_density_0; ctr_0 += 1)
+         {
+            const double rho = _data_density_20_30_10[_stride_density_0*ctr_0];
+            const double delta_rho = rho - 1.0;
+            const double u_0 = _data_velocity_20_30_10[_stride_velocity_0*ctr_0];
+            const double u_1 = _data_velocity_20_31_10[_stride_velocity_0*ctr_0];
+            const double u_2 = _data_velocity_20_32_10[_stride_velocity_0*ctr_0];
+            _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = delta_rho*0.33333333333333331 - 0.33333333333333331*(u_0*u_0) - 0.33333333333333331*(u_1*u_1) - 0.33333333333333331*(u_2*u_2);
+            _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_1*0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_1*u_1);
+            _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_1*-0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_1*u_1);
+            _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_0*-0.16666666666666666 - 0.16666666666666666*(u_1*u_1) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_0*u_0);
+            _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_0*0.16666666666666666 - 0.16666666666666666*(u_1*u_1) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_0*u_0);
+            _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_2*0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_1*u_1) + 0.16666666666666666*(u_2*u_2);
+            _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_2*-0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_1*u_1) + 0.16666666666666666*(u_2*u_2);
+            _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_1*-0.25 + u_0*-0.083333333333333329 + u_1*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
+            _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_1*0.25 + u_0*0.083333333333333329 + u_1*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
+            _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_1*0.25 + u_0*-0.083333333333333329 + u_1*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
+            _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_1*-0.25 + u_0*0.083333333333333329 + u_1*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
+            _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_1*u_2*0.25 + u_1*0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
+            _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_1*u_2*-0.25 + u_1*-0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
+            _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_2*-0.25 + u_0*-0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
+            _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_2*0.25 + u_0*0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
+            _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_1*u_2*-0.25 + u_1*0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
+            _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_1*u_2*0.25 + u_1*-0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
+            _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_2*0.25 + u_0*-0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
+            _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_2*-0.25 + u_0*0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
+         }
+      }
+   }
+}
+}
+
+
+namespace internal_d3q19srt_kernel_getter {
+static FUNC_PREFIX void d3q19srt_kernel_getter(double * RESTRICT  _data_density, double * RESTRICT const _data_pdfs, double * RESTRICT  _data_velocity, int64_t const _size_density_0, int64_t const _size_density_1, int64_t const _size_density_2, int64_t const _stride_density_0, int64_t const _stride_density_1, int64_t const _stride_density_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_velocity_0, int64_t const _stride_velocity_1, int64_t const _stride_velocity_2, int64_t const _stride_velocity_3)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_density_2; ctr_2 += 1)
+   {
+      double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
+      double * RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
+      double * RESTRICT  _data_density_20_30 = _data_density + _stride_density_2*ctr_2;
+      double * RESTRICT  _data_velocity_20_30 = _data_velocity + _stride_velocity_2*ctr_2;
+      double * RESTRICT  _data_velocity_20_31 = _data_velocity + _stride_velocity_2*ctr_2 + _stride_velocity_3;
+      double * RESTRICT  _data_velocity_20_32 = _data_velocity + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_density_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
+         double * RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
+         double * RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
+         double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
+         double * RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
+         double * RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
+         double * RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
+         double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
+         double * RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
+         double * RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
+         double * RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
+         double * RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
+         double * RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
+         double * RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
+         double * RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
+         double * RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
+         double * RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
+         double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
+         double * RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
+         double * RESTRICT  _data_density_20_30_10 = _stride_density_1*ctr_1 + _data_density_20_30;
+         double * RESTRICT  _data_velocity_20_30_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_30;
+         double * RESTRICT  _data_velocity_20_31_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_31;
+         double * RESTRICT  _data_velocity_20_32_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_32;
+         for (int64_t ctr_0 = 0; ctr_0 < _size_density_0; ctr_0 += 1)
+         {
+            const double vel0Term = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
+            const double momdensity_0 = vel0Term - 1.0*_data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
+            const double vel1Term = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0];
+            const double momdensity_1 = vel1Term - 1.0*_data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
+            const double vel2Term = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0];
+            const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
+            const double momdensity_2 = vel2Term - 1.0*_data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0];
+            const double rho = delta_rho + 1.0;
+            const double u_0 = momdensity_0;
+            const double u_1 = momdensity_1;
+            const double u_2 = momdensity_2;
+            _data_density_20_30_10[_stride_density_0*ctr_0] = rho;
+            _data_velocity_20_30_10[_stride_velocity_0*ctr_0] = u_0;
+            _data_velocity_20_31_10[_stride_velocity_0*ctr_0] = u_1;
+            _data_velocity_20_32_10[_stride_velocity_0*ctr_0] = u_2;
+         }
+      }
+   }
+}
+}
+
+
+
+
+
+void D3Q19SRT::streamCollide( field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 19> * pdfs_tmp, double omega, const cell_idx_t ghost_layers )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   internal_d3q19srt_kernel_streamCollide::d3q19srt_kernel_streamCollide(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, omega);
+}
+void D3Q19SRT::streamCollideCellInterval( field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 19> * pdfs_tmp, double omega, const CellInterval & ci)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   internal_d3q19srt_kernel_streamCollide::d3q19srt_kernel_streamCollide(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, omega);
+}
+
+void D3Q19SRT::collide( field::GhostLayerField<double, 19> * pdfs, double omega, const cell_idx_t ghost_layers )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   internal_d3q19srt_kernel_collide::d3q19srt_kernel_collide(_data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, omega);
+}
+void D3Q19SRT::collideCellInterval( field::GhostLayerField<double, 19> * pdfs, double omega, const CellInterval & ci)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   internal_d3q19srt_kernel_collide::d3q19srt_kernel_collide(_data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, omega);
+}
+
+void D3Q19SRT::stream( field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 19> * pdfs_tmp, const cell_idx_t ghost_layers )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   internal_d3q19srt_kernel_stream::d3q19srt_kernel_stream(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3);
+}
+void D3Q19SRT::streamCellInterval( field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 19> * pdfs_tmp, const CellInterval & ci)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   internal_d3q19srt_kernel_stream::d3q19srt_kernel_stream(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3);
+}
+
+void D3Q19SRT::streamOnlyNoAdvancement( field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 19> * pdfs_tmp, const cell_idx_t ghost_layers )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   internal_d3q19srt_kernel_streamOnlyNoAdvancement::d3q19srt_kernel_streamOnlyNoAdvancement(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3);
+}
+void D3Q19SRT::streamOnlyNoAdvancementCellInterval( field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 19> * pdfs_tmp, const CellInterval & ci)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   internal_d3q19srt_kernel_streamOnlyNoAdvancement::d3q19srt_kernel_streamOnlyNoAdvancement(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3);
+}
+
+void D3Q19SRT::initialise( field::GhostLayerField<double, 1> * density, field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 3> * velocity, const cell_idx_t ghost_layers )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(density->nrOfGhostLayers()))
+   double * RESTRICT const _data_density = density->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(velocity->nrOfGhostLayers()))
+   double * RESTRICT const _data_velocity = velocity->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->xSizeWithGhostLayer(), int64_t(int64_c(density->xSize()) + 2*ghost_layers))
+   const int64_t _size_density_0 = int64_t(int64_c(density->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->ySizeWithGhostLayer(), int64_t(int64_c(density->ySize()) + 2*ghost_layers))
+   const int64_t _size_density_1 = int64_t(int64_c(density->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->zSizeWithGhostLayer(), int64_t(int64_c(density->zSize()) + 2*ghost_layers))
+   const int64_t _size_density_2 = int64_t(int64_c(density->zSize()) + 2*ghost_layers);
+   const int64_t _stride_density_0 = int64_t(density->xStride());
+   const int64_t _stride_density_1 = int64_t(density->yStride());
+   const int64_t _stride_density_2 = int64_t(density->zStride());
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
+   const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+   const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+   const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+   internal_d3q19srt_kernel_initialise::d3q19srt_kernel_initialise(_data_density, _data_pdfs, _data_velocity, _size_density_0, _size_density_1, _size_density_2, _stride_density_0, _stride_density_1, _stride_density_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
+}
+void D3Q19SRT::initialiseCellInterval( field::GhostLayerField<double, 1> * density, field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 3> * velocity, const CellInterval & ci)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(density->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(density->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(density->nrOfGhostLayers()))
+   double * RESTRICT const _data_density = density->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(velocity->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(velocity->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(velocity->nrOfGhostLayers()))
+   double * RESTRICT const _data_velocity = velocity->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_density_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_density_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_density_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_density_0 = int64_t(density->xStride());
+   const int64_t _stride_density_1 = int64_t(density->yStride());
+   const int64_t _stride_density_2 = int64_t(density->zStride());
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
+   const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+   const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+   const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+   internal_d3q19srt_kernel_initialise::d3q19srt_kernel_initialise(_data_density, _data_pdfs, _data_velocity, _size_density_0, _size_density_1, _size_density_2, _stride_density_0, _stride_density_1, _stride_density_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
+}
+
+void D3Q19SRT::calculateMacroscopicParameters( field::GhostLayerField<double, 1> * density, field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 3> * velocity, const cell_idx_t ghost_layers )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(density->nrOfGhostLayers()))
+   double * RESTRICT  _data_density = density->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(velocity->nrOfGhostLayers()))
+   double * RESTRICT  _data_velocity = velocity->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->xSizeWithGhostLayer(), int64_t(int64_c(density->xSize()) + 2*ghost_layers))
+   const int64_t _size_density_0 = int64_t(int64_c(density->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->ySizeWithGhostLayer(), int64_t(int64_c(density->ySize()) + 2*ghost_layers))
+   const int64_t _size_density_1 = int64_t(int64_c(density->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->zSizeWithGhostLayer(), int64_t(int64_c(density->zSize()) + 2*ghost_layers))
+   const int64_t _size_density_2 = int64_t(int64_c(density->zSize()) + 2*ghost_layers);
+   const int64_t _stride_density_0 = int64_t(density->xStride());
+   const int64_t _stride_density_1 = int64_t(density->yStride());
+   const int64_t _stride_density_2 = int64_t(density->zStride());
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
+   const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+   const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+   const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+   internal_d3q19srt_kernel_getter::d3q19srt_kernel_getter(_data_density, _data_pdfs, _data_velocity, _size_density_0, _size_density_1, _size_density_2, _stride_density_0, _stride_density_1, _stride_density_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
+}
+void D3Q19SRT::calculateMacroscopicParametersCellInterval( field::GhostLayerField<double, 1> * density, field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 3> * velocity, const CellInterval & ci)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(density->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(density->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(density->nrOfGhostLayers()))
+   double * RESTRICT  _data_density = density->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(velocity->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(velocity->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(velocity->nrOfGhostLayers()))
+   double * RESTRICT  _data_velocity = velocity->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_density_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_density_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_density_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_density_0 = int64_t(density->xStride());
+   const int64_t _stride_density_1 = int64_t(density->yStride());
+   const int64_t _stride_density_2 = int64_t(density->zStride());
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
+   const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+   const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+   const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+   internal_d3q19srt_kernel_getter::d3q19srt_kernel_getter(_data_density, _data_pdfs, _data_velocity, _size_density_0, _size_density_1, _size_density_2, _stride_density_0, _stride_density_1, _stride_density_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
+}
+
+
+
+} // namespace lbm
+} // namespace walberla
+
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic pop
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL )
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/lbm_generated/sweep_collection/D3Q19SRT.h b/src/lbm_generated/sweep_collection/D3Q19SRT.h
new file mode 100644
index 0000000000000000000000000000000000000000..2fdb3850cb000daf544b265fa4ae3808253ddc00
--- /dev/null
+++ b/src/lbm_generated/sweep_collection/D3Q19SRT.h
@@ -0,0 +1,1131 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file D3Q19SRT.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/logging/Logging.h"
+#include "core/Macros.h"
+
+
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+
+#include "field/SwapableCompare.h"
+#include "field/GhostLayerField.h"
+
+#include <set>
+#include <cmath>
+
+
+
+using namespace std::placeholders;
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wunused-parameter"
+#   pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace lbm {
+
+
+class D3Q19SRT
+{
+public:
+  enum Type { ALL = 0, INNER = 1, OUTER = 2 };
+
+   D3Q19SRT(const shared_ptr< StructuredBlockStorage > & blocks, BlockDataID pdfsID_, BlockDataID densityID_, BlockDataID velocityID_, double omega, const Cell & outerWidth=Cell(1, 1, 1))
+     : blocks_(blocks), pdfsID(pdfsID_), densityID(densityID_), velocityID(velocityID_), omega_(omega), outerWidth_(outerWidth)
+   {
+      
+
+      for (auto& iBlock : *blocks)
+      {
+         if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 ||
+             int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 ||
+             int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
+          WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock")
+      }
+   };
+
+   
+    ~D3Q19SRT() {  
+        for(auto p: cache_pdfs_) {
+            delete p;
+        }
+     }
+
+
+   /*************************************************************************************
+   *                Internal Function Definitions with raw Pointer
+   *************************************************************************************/
+   static void streamCollide (field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 19> * pdfs_tmp, double omega, const cell_idx_t ghost_layers = 0);
+   static void streamCollideCellInterval (field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 19> * pdfs_tmp, double omega, const CellInterval & ci);
+   
+   static void collide (field::GhostLayerField<double, 19> * pdfs, double omega, const cell_idx_t ghost_layers = 0);
+   static void collideCellInterval (field::GhostLayerField<double, 19> * pdfs, double omega, const CellInterval & ci);
+   
+   static void stream (field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 19> * pdfs_tmp, const cell_idx_t ghost_layers = 0);
+   static void streamCellInterval (field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 19> * pdfs_tmp, const CellInterval & ci);
+   
+   static void streamOnlyNoAdvancement (field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 19> * pdfs_tmp, const cell_idx_t ghost_layers = 0);
+   static void streamOnlyNoAdvancementCellInterval (field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 19> * pdfs_tmp, const CellInterval & ci);
+   
+   static void initialise (field::GhostLayerField<double, 1> * density, field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 3> * velocity, const cell_idx_t ghost_layers = 0);
+   static void initialiseCellInterval (field::GhostLayerField<double, 1> * density, field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 3> * velocity, const CellInterval & ci);
+   
+   static void calculateMacroscopicParameters (field::GhostLayerField<double, 1> * density, field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 3> * velocity, const cell_idx_t ghost_layers = 0);
+   static void calculateMacroscopicParametersCellInterval (field::GhostLayerField<double, 1> * density, field::GhostLayerField<double, 19> * pdfs, field::GhostLayerField<double, 3> * velocity, const CellInterval & ci);
+   
+
+   /*************************************************************************************
+   *                Function Definitions for external Usage
+   *************************************************************************************/
+
+   std::function<void (IBlock *)> streamCollide()
+   {
+      return [this](IBlock* block) { streamCollide(block); };
+   }
+
+   std::function<void (IBlock *)> streamCollide(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamCollideInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamCollideOuter(block); };
+         default:
+            return [this](IBlock* block) { streamCollide(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> streamCollide(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamCollideInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamCollideOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { streamCollide(block, ghost_layers); };
+      }
+   }
+
+   
+
+   void streamCollide(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      field::GhostLayerField<double, 19> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      auto & omega = this->omega_;
+      
+      streamCollide(pdfs, pdfs_tmp, omega, ghost_layers);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   void streamCollide(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      field::GhostLayerField<double, 19> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      auto & omega = this->omega_;
+      
+      streamCollide(pdfs, pdfs_tmp, omega, ghost_layers);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   
+
+   void streamCollideCellInterval(IBlock * block, const CellInterval & ci)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      field::GhostLayerField<double, 19> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      auto & omega = this->omega_;
+      
+      streamCollideCellInterval(pdfs, pdfs_tmp, omega, ci);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   void streamCollideInner(IBlock * block)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      field::GhostLayerField<double, 19> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      auto & omega = this->omega_;
+      
+
+      CellInterval inner = pdfs->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      streamCollideCellInterval(pdfs, pdfs_tmp, omega, inner);
+   }
+
+   void streamCollideOuter(IBlock * block)
+   {
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      field::GhostLayerField<double, 19> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      auto & omega = this->omega_;
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         pdfs->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      for( auto & ci: layers_ )
+      {
+         streamCollideCellInterval(pdfs, pdfs_tmp, omega, ci);
+      }
+    
+
+    pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+   
+
+   std::function<void (IBlock *)> collide()
+   {
+      return [this](IBlock* block) { collide(block); };
+   }
+
+   std::function<void (IBlock *)> collide(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { collideInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { collideOuter(block); };
+         default:
+            return [this](IBlock* block) { collide(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> collide(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { collideInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { collideOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { collide(block, ghost_layers); };
+      }
+   }
+
+   
+
+   void collide(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+
+      auto & omega = this->omega_;
+      
+      collide(pdfs, omega, ghost_layers);
+      
+   }
+
+   void collide(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+
+      auto & omega = this->omega_;
+      
+      collide(pdfs, omega, ghost_layers);
+      
+   }
+
+   
+
+   void collideCellInterval(IBlock * block, const CellInterval & ci)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+
+      auto & omega = this->omega_;
+      
+      collideCellInterval(pdfs, omega, ci);
+      
+   }
+
+   void collideInner(IBlock * block)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+
+      auto & omega = this->omega_;
+      
+
+      CellInterval inner = pdfs->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      collideCellInterval(pdfs, omega, inner);
+   }
+
+   void collideOuter(IBlock * block)
+   {
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+
+      auto & omega = this->omega_;
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         pdfs->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      for( auto & ci: layers_ )
+      {
+         collideCellInterval(pdfs, omega, ci);
+      }
+    
+
+    
+   }
+   
+
+   std::function<void (IBlock *)> stream()
+   {
+      return [this](IBlock* block) { stream(block); };
+   }
+
+   std::function<void (IBlock *)> stream(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamOuter(block); };
+         default:
+            return [this](IBlock* block) { stream(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> stream(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { stream(block, ghost_layers); };
+      }
+   }
+
+   
+
+   void stream(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      field::GhostLayerField<double, 19> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      stream(pdfs, pdfs_tmp, ghost_layers);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   void stream(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      field::GhostLayerField<double, 19> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      stream(pdfs, pdfs_tmp, ghost_layers);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   
+
+   void streamCellInterval(IBlock * block, const CellInterval & ci)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      field::GhostLayerField<double, 19> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      streamCellInterval(pdfs, pdfs_tmp, ci);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   void streamInner(IBlock * block)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      field::GhostLayerField<double, 19> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+
+      CellInterval inner = pdfs->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      streamCellInterval(pdfs, pdfs_tmp, inner);
+   }
+
+   void streamOuter(IBlock * block)
+   {
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      field::GhostLayerField<double, 19> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         pdfs->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      for( auto & ci: layers_ )
+      {
+         streamCellInterval(pdfs, pdfs_tmp, ci);
+      }
+    
+
+    pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+   
+
+   std::function<void (IBlock *)> streamOnlyNoAdvancement()
+   {
+      return [this](IBlock* block) { streamOnlyNoAdvancement(block); };
+   }
+
+   std::function<void (IBlock *)> streamOnlyNoAdvancement(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
+         default:
+            return [this](IBlock* block) { streamOnlyNoAdvancement(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> streamOnlyNoAdvancement(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { streamOnlyNoAdvancement(block, ghost_layers); };
+      }
+   }
+
+   
+
+   void streamOnlyNoAdvancement(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      field::GhostLayerField<double, 19> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      streamOnlyNoAdvancement(pdfs, pdfs_tmp, ghost_layers);
+      
+   }
+
+   void streamOnlyNoAdvancement(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      field::GhostLayerField<double, 19> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      streamOnlyNoAdvancement(pdfs, pdfs_tmp, ghost_layers);
+      
+   }
+
+   
+
+   void streamOnlyNoAdvancementCellInterval(IBlock * block, const CellInterval & ci)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      field::GhostLayerField<double, 19> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      streamOnlyNoAdvancementCellInterval(pdfs, pdfs_tmp, ci);
+      
+   }
+
+   void streamOnlyNoAdvancementInner(IBlock * block)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      field::GhostLayerField<double, 19> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+
+      CellInterval inner = pdfs->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      streamOnlyNoAdvancementCellInterval(pdfs, pdfs_tmp, inner);
+   }
+
+   void streamOnlyNoAdvancementOuter(IBlock * block)
+   {
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      field::GhostLayerField<double, 19> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         pdfs->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      for( auto & ci: layers_ )
+      {
+         streamOnlyNoAdvancementCellInterval(pdfs, pdfs_tmp, ci);
+      }
+    
+
+    
+   }
+   
+
+   std::function<void (IBlock *)> initialise()
+   {
+      return [this](IBlock* block) { initialise(block); };
+   }
+
+   std::function<void (IBlock *)> initialise(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { initialiseInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { initialiseOuter(block); };
+         default:
+            return [this](IBlock* block) { initialise(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> initialise(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { initialiseInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { initialiseOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { initialise(block, ghost_layers); };
+      }
+   }
+
+   
+
+   void initialise(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+      initialise(density, pdfs, velocity, ghost_layers);
+      
+   }
+
+   void initialise(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+      initialise(density, pdfs, velocity, ghost_layers);
+      
+   }
+
+   
+
+   void initialiseCellInterval(IBlock * block, const CellInterval & ci)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+      initialiseCellInterval(density, pdfs, velocity, ci);
+      
+   }
+
+   void initialiseInner(IBlock * block)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+
+      CellInterval inner = density->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      initialiseCellInterval(density, pdfs, velocity, inner);
+   }
+
+   void initialiseOuter(IBlock * block)
+   {
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         density->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         density->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         density->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         density->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         density->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         density->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      for( auto & ci: layers_ )
+      {
+         initialiseCellInterval(density, pdfs, velocity, ci);
+      }
+    
+
+    
+   }
+   
+
+   std::function<void (IBlock *)> calculateMacroscopicParameters()
+   {
+      return [this](IBlock* block) { calculateMacroscopicParameters(block); };
+   }
+
+   std::function<void (IBlock *)> calculateMacroscopicParameters(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
+         default:
+            return [this](IBlock* block) { calculateMacroscopicParameters(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> calculateMacroscopicParameters(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { calculateMacroscopicParameters(block, ghost_layers); };
+      }
+   }
+
+   
+
+   void calculateMacroscopicParameters(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+      calculateMacroscopicParameters(density, pdfs, velocity, ghost_layers);
+      
+   }
+
+   void calculateMacroscopicParameters(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+      calculateMacroscopicParameters(density, pdfs, velocity, ghost_layers);
+      
+   }
+
+   
+
+   void calculateMacroscopicParametersCellInterval(IBlock * block, const CellInterval & ci)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+      calculateMacroscopicParametersCellInterval(density, pdfs, velocity, ci);
+      
+   }
+
+   void calculateMacroscopicParametersInner(IBlock * block)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+
+      CellInterval inner = density->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      calculateMacroscopicParametersCellInterval(density, pdfs, velocity, inner);
+   }
+
+   void calculateMacroscopicParametersOuter(IBlock * block)
+   {
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 19> >(pdfsID);
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         density->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         density->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         density->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         density->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         density->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         density->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      for( auto & ci: layers_ )
+      {
+         calculateMacroscopicParametersCellInterval(density, pdfs, velocity, ci);
+      }
+    
+
+    
+   }
+   
+
+   
+
+   private:
+      shared_ptr< StructuredBlockStorage > blocks_;
+      BlockDataID pdfsID;
+    BlockDataID densityID;
+    BlockDataID velocityID;
+    double omega_;
+
+    private: std::set< field::GhostLayerField<double, 19> *, field::SwapableCompare< field::GhostLayerField<double, 19> * > > cache_pdfs_;
+
+      Cell outerWidth_;
+      std::vector<CellInterval> layers_;
+
+      
+};
+
+
+} // namespace lbm
+} // namespace walberla
+
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/lbm_generated/sweep_collection/D3Q27SRT.cpp b/src/lbm_generated/sweep_collection/D3Q27SRT.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ce89749fc60ab603f3172992cb46c65242e57d16
--- /dev/null
+++ b/src/lbm_generated/sweep_collection/D3Q27SRT.cpp
@@ -0,0 +1,1220 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file D3Q27SRT.cpp
+//! \\author pystencils
+//======================================================================================================================
+#include "D3Q27SRT.h"
+
+#define FUNC_PREFIX
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL )
+#pragma warning push
+#pragma warning( disable :  1599 )
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace lbm {
+
+
+namespace internal_d3q27srt_kernel_streamCollide {
+static FUNC_PREFIX void d3q27srt_kernel_streamCollide(double * RESTRICT const _data_pdfs, double * RESTRICT  _data_pdfs_tmp, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_pdfs_tmp_0, int64_t const _stride_pdfs_tmp_1, int64_t const _stride_pdfs_tmp_2, int64_t const _stride_pdfs_tmp_3, double omega)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
+   {
+      double * RESTRICT _data_pdfs_2m1_321 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_319 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_320 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_322 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
+      double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
+      double * RESTRICT  _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_319 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_320 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_321 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_322 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_323 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_324 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_325 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_326 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_2m1_321_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_321;
+         double * RESTRICT _data_pdfs_2m1_319_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_319;
+         double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
+         double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
+         double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
+         double * RESTRICT _data_pdfs_21_325_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_325;
+         double * RESTRICT _data_pdfs_21_323_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_323;
+         double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
+         double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
+         double * RESTRICT _data_pdfs_2m1_320_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_320;
+         double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
+         double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
+         double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
+         double * RESTRICT _data_pdfs_21_324_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_324;
+         double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
+         double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
+         double * RESTRICT _data_pdfs_2m1_322_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_322;
+         double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
+         double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
+         double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
+         double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
+         double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
+         double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
+         double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
+         double * RESTRICT _data_pdfs_21_326_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_326;
+         double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
+         double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
+         double * RESTRICT  _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
+         double * RESTRICT  _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
+         double * RESTRICT  _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
+         double * RESTRICT  _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
+         double * RESTRICT  _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
+         double * RESTRICT  _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
+         double * RESTRICT  _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
+         double * RESTRICT  _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
+         double * RESTRICT  _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
+         double * RESTRICT  _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
+         double * RESTRICT  _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
+         double * RESTRICT  _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
+         double * RESTRICT  _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
+         double * RESTRICT  _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
+         double * RESTRICT  _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
+         double * RESTRICT  _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
+         double * RESTRICT  _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
+         double * RESTRICT  _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
+         double * RESTRICT  _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
+         double * RESTRICT  _data_pdfs_tmp_20_319_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_319;
+         double * RESTRICT  _data_pdfs_tmp_20_320_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_320;
+         double * RESTRICT  _data_pdfs_tmp_20_321_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_321;
+         double * RESTRICT  _data_pdfs_tmp_20_322_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_322;
+         double * RESTRICT  _data_pdfs_tmp_20_323_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_323;
+         double * RESTRICT  _data_pdfs_tmp_20_324_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_324;
+         double * RESTRICT  _data_pdfs_tmp_20_325_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_325;
+         double * RESTRICT  _data_pdfs_tmp_20_326_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_326;
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
+         {
+            const double vel0Term = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double vel1Term = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double vel2Term = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
+            const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
+            const double u_0 = vel0Term - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double u_1 = vel1Term - 1.0*_data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double u_2 = vel2Term - 1.0*_data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double u0Mu1 = u_0 + u_1*-1.0;
+            const double u0Pu1 = u_0 + u_1;
+            const double u1Pu2 = u_1 + u_2;
+            const double u1Mu2 = u_1 + u_2*-1.0;
+            const double u0Mu2 = u_0 + u_2*-1.0;
+            const double u0Pu2 = u_0 + u_2;
+            const double f_eq_common = delta_rho - 1.5*(u_0*u_0) - 1.5*(u_1*u_1) - 1.5*(u_2*u_2);
+            _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.29629629629629628 - 1.0*_data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_1*0.22222222222222221 - 1.0*_data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_1*-0.22222222222222221 - 1.0*_data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_0*-0.22222222222222221 - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0)) + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_0*0.22222222222222221 - 1.0*_data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0)) + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_2*0.22222222222222221 - 1.0*_data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_2*-0.22222222222222221 - 1.0*_data_pdfs_21_36_10[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*-0.055555555555555552 - 1.0*_data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.083333333333333329*(u0Mu1*u0Mu1)) + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*0.055555555555555552 - 1.0*_data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.083333333333333329*(u0Pu1*u0Pu1)) + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*-0.055555555555555552 - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.083333333333333329*(u0Pu1*u0Pu1)) + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*0.055555555555555552 - 1.0*_data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.083333333333333329*(u0Mu1*u0Mu1)) + _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*0.055555555555555552 - 1.0*_data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + 0.083333333333333329*(u1Pu2*u1Pu2)) + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*-0.055555555555555552 - 1.0*_data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + 0.083333333333333329*(u1Mu2*u1Mu2)) + _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*-0.055555555555555552 - 1.0*_data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.083333333333333329*(u0Mu2*u0Mu2)) + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*0.055555555555555552 - 1.0*_data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.083333333333333329*(u0Pu2*u0Pu2)) + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*0.055555555555555552 - 1.0*_data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + 0.083333333333333329*(u1Mu2*u1Mu2)) + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*-0.055555555555555552 - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + 0.083333333333333329*(u1Pu2*u1Pu2)) + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
+            _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*-0.055555555555555552 - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.083333333333333329*(u0Pu2*u0Pu2)) + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*0.055555555555555552 - 1.0*_data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.083333333333333329*(u0Mu2*u0Mu2)) + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            _data_pdfs_tmp_20_319_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*0.013888888888888888 - 1.0*_data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            _data_pdfs_tmp_20_320_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*0.013888888888888888 - 1.0*_data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            _data_pdfs_tmp_20_321_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*0.013888888888888888 - 1.0*_data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            _data_pdfs_tmp_20_322_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*0.013888888888888888 - 1.0*_data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + _data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            _data_pdfs_tmp_20_323_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*-0.013888888888888888 - 1.0*_data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            _data_pdfs_tmp_20_324_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*-0.013888888888888888 - 1.0*_data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + _data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            _data_pdfs_tmp_20_325_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*-0.013888888888888888 - 1.0*_data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + _data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            _data_pdfs_tmp_20_326_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*-0.013888888888888888 - 1.0*_data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + _data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+         }
+      }
+   }
+}
+}
+
+
+namespace internal_d3q27srt_kernel_collide {
+static FUNC_PREFIX void d3q27srt_kernel_collide(double * RESTRICT  _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double omega)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
+   {
+      double * RESTRICT  _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_321 = _data_pdfs + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_319 = _data_pdfs + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_320 = _data_pdfs + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
+      double * RESTRICT  _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_322 = _data_pdfs + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
+      {
+         double * RESTRICT  _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
+         double * RESTRICT  _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
+         double * RESTRICT  _data_pdfs_20_326_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_326;
+         double * RESTRICT  _data_pdfs_20_323_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_323;
+         double * RESTRICT  _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
+         double * RESTRICT  _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
+         double * RESTRICT  _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
+         double * RESTRICT  _data_pdfs_20_321_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_321;
+         double * RESTRICT  _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
+         double * RESTRICT  _data_pdfs_20_324_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_324;
+         double * RESTRICT  _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
+         double * RESTRICT  _data_pdfs_20_319_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_319;
+         double * RESTRICT  _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
+         double * RESTRICT  _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
+         double * RESTRICT  _data_pdfs_20_325_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_325;
+         double * RESTRICT  _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
+         double * RESTRICT  _data_pdfs_20_320_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_320;
+         double * RESTRICT  _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
+         double * RESTRICT  _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
+         double * RESTRICT  _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
+         double * RESTRICT  _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
+         double * RESTRICT  _data_pdfs_20_322_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_322;
+         double * RESTRICT  _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
+         double * RESTRICT  _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
+         double * RESTRICT  _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
+         double * RESTRICT  _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
+         double * RESTRICT  _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
+         {
+            const double xi_1 = _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0];
+            const double xi_2 = _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0];
+            const double xi_3 = _data_pdfs_20_326_10[_stride_pdfs_0*ctr_0];
+            const double xi_4 = _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0];
+            const double xi_5 = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0];
+            const double xi_6 = _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0];
+            const double xi_7 = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0];
+            const double xi_8 = _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0];
+            const double xi_9 = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0];
+            const double xi_10 = _data_pdfs_20_324_10[_stride_pdfs_0*ctr_0];
+            const double xi_11 = _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0];
+            const double xi_12 = _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0];
+            const double xi_13 = _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0];
+            const double xi_14 = _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
+            const double xi_15 = _data_pdfs_20_325_10[_stride_pdfs_0*ctr_0];
+            const double xi_16 = _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0];
+            const double xi_17 = _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0];
+            const double xi_18 = _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
+            const double xi_19 = _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0];
+            const double xi_20 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
+            const double xi_21 = _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0];
+            const double xi_22 = _data_pdfs_20_322_10[_stride_pdfs_0*ctr_0];
+            const double xi_23 = _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0];
+            const double xi_24 = _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0];
+            const double xi_25 = _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0];
+            const double xi_26 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0];
+            const double xi_27 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0];
+            const double vel0Term = xi_12 + xi_14 + xi_15 + xi_24 + xi_25 + xi_26 + xi_4 + xi_5 + xi_8;
+            const double vel1Term = xi_10 + xi_11 + xi_13 + xi_17 + xi_21 + xi_9;
+            const double vel2Term = xi_1 + xi_19 + xi_22 + xi_7;
+            const double delta_rho = vel0Term + vel1Term + vel2Term + xi_16 + xi_18 + xi_2 + xi_20 + xi_23 + xi_27 + xi_3 + xi_6;
+            const double u_0 = vel0Term + xi_1*-1.0 + xi_10*-1.0 + xi_11*-1.0 + xi_17*-1.0 + xi_18*-1.0 + xi_22*-1.0 + xi_23*-1.0 + xi_27*-1.0 + xi_3*-1.0;
+            const double u_1 = vel1Term + xi_12 + xi_14 + xi_15*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_22*-1.0 + xi_3*-1.0 + xi_4 + xi_5*-1.0 + xi_6*-1.0 + xi_7*-1.0 + xi_8*-1.0;
+            const double u_2 = vel2Term + xi_10*-1.0 + xi_12 + xi_15*-1.0 + xi_17 + xi_2*-1.0 + xi_21*-1.0 + xi_23*-1.0 + xi_24 + xi_25*-1.0 + xi_3*-1.0 + xi_4*-1.0 + xi_6*-1.0 + xi_8 + xi_9;
+            const double u0Mu1 = u_0 + u_1*-1.0;
+            const double u0Pu1 = u_0 + u_1;
+            const double u1Pu2 = u_1 + u_2;
+            const double u1Mu2 = u_1 + u_2*-1.0;
+            const double u0Mu2 = u_0 + u_2*-1.0;
+            const double u0Pu2 = u_0 + u_2;
+            const double f_eq_common = delta_rho - 1.5*(u_0*u_0) - 1.5*(u_1*u_1) - 1.5*(u_2*u_2);
+            _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.29629629629629628 + xi_20*-1.0) + xi_20;
+            _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_1*0.22222222222222221 + xi_13*-1.0 + 0.33333333333333331*(u_1*u_1)) + xi_13;
+            _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_1*-0.22222222222222221 + xi_16*-1.0 + 0.33333333333333331*(u_1*u_1)) + xi_16;
+            _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_0*-0.22222222222222221 + xi_27*-1.0 + 0.33333333333333331*(u_0*u_0)) + xi_27;
+            _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_0*0.22222222222222221 + xi_26*-1.0 + 0.33333333333333331*(u_0*u_0)) + xi_26;
+            _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_2*0.22222222222222221 + xi_19*-1.0 + 0.33333333333333331*(u_2*u_2)) + xi_19;
+            _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_2*-0.22222222222222221 + xi_2*-1.0 + 0.33333333333333331*(u_2*u_2)) + xi_2;
+            _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*-0.055555555555555552 + xi_11*-1.0 + 0.083333333333333329*(u0Mu1*u0Mu1)) + xi_11;
+            _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*0.055555555555555552 + xi_14*-1.0 + 0.083333333333333329*(u0Pu1*u0Pu1)) + xi_14;
+            _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*-0.055555555555555552 + xi_18*-1.0 + 0.083333333333333329*(u0Pu1*u0Pu1)) + xi_18;
+            _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*0.055555555555555552 + xi_5*-1.0 + 0.083333333333333329*(u0Mu1*u0Mu1)) + xi_5;
+            _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*0.055555555555555552 + xi_9*-1.0 + 0.083333333333333329*(u1Pu2*u1Pu2)) + xi_9;
+            _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*-0.055555555555555552 + xi_7*-1.0 + 0.083333333333333329*(u1Mu2*u1Mu2)) + xi_7;
+            _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*-0.055555555555555552 + xi_1*-1.0 + 0.083333333333333329*(u0Mu2*u0Mu2)) + xi_1;
+            _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*0.055555555555555552 + xi_24*-1.0 + 0.083333333333333329*(u0Pu2*u0Pu2)) + xi_24;
+            _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*0.055555555555555552 + xi_21*-1.0 + 0.083333333333333329*(u1Mu2*u1Mu2)) + xi_21;
+            _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*-0.055555555555555552 + xi_6*-1.0 + 0.083333333333333329*(u1Pu2*u1Pu2)) + xi_6;
+            _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*-0.055555555555555552 + xi_23*-1.0 + 0.083333333333333329*(u0Pu2*u0Pu2)) + xi_23;
+            _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*0.055555555555555552 + xi_25*-1.0 + 0.083333333333333329*(u0Mu2*u0Mu2)) + xi_25;
+            _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*0.013888888888888888 + xi_12*-1.0 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_12;
+            _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*0.013888888888888888 + xi_17*-1.0 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_17;
+            _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*0.013888888888888888 + xi_8*-1.0 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_8;
+            _data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*0.013888888888888888 + xi_22*-1.0 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_22;
+            _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*-0.013888888888888888 + xi_4*-1.0 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_4;
+            _data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*-0.013888888888888888 + xi_10*-1.0 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_10;
+            _data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*-0.013888888888888888 + xi_15*-1.0 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_15;
+            _data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*-0.013888888888888888 + xi_3*-1.0 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_3;
+         }
+      }
+   }
+}
+}
+
+
+namespace internal_d3q27srt_kernel_stream {
+static FUNC_PREFIX void d3q27srt_kernel_stream(double * RESTRICT const _data_pdfs, double * RESTRICT  _data_pdfs_tmp, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_pdfs_tmp_0, int64_t const _stride_pdfs_tmp_1, int64_t const _stride_pdfs_tmp_2, int64_t const _stride_pdfs_tmp_3)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
+   {
+      double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
+      double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_319 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_320 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_321 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_322 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
+      double * RESTRICT  _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_319 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_320 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_321 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_322 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_323 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_324 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_325 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_326 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
+         double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
+         double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
+         double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
+         double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
+         double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
+         double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
+         double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
+         double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
+         double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
+         double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
+         double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
+         double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
+         double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
+         double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
+         double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
+         double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
+         double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
+         double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
+         double * RESTRICT _data_pdfs_2m1_319_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_319;
+         double * RESTRICT _data_pdfs_2m1_320_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_320;
+         double * RESTRICT _data_pdfs_2m1_321_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_321;
+         double * RESTRICT _data_pdfs_2m1_322_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_322;
+         double * RESTRICT _data_pdfs_21_323_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_323;
+         double * RESTRICT _data_pdfs_21_324_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_324;
+         double * RESTRICT _data_pdfs_21_325_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_325;
+         double * RESTRICT _data_pdfs_21_326_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_326;
+         double * RESTRICT  _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
+         double * RESTRICT  _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
+         double * RESTRICT  _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
+         double * RESTRICT  _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
+         double * RESTRICT  _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
+         double * RESTRICT  _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
+         double * RESTRICT  _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
+         double * RESTRICT  _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
+         double * RESTRICT  _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
+         double * RESTRICT  _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
+         double * RESTRICT  _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
+         double * RESTRICT  _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
+         double * RESTRICT  _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
+         double * RESTRICT  _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
+         double * RESTRICT  _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
+         double * RESTRICT  _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
+         double * RESTRICT  _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
+         double * RESTRICT  _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
+         double * RESTRICT  _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
+         double * RESTRICT  _data_pdfs_tmp_20_319_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_319;
+         double * RESTRICT  _data_pdfs_tmp_20_320_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_320;
+         double * RESTRICT  _data_pdfs_tmp_20_321_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_321;
+         double * RESTRICT  _data_pdfs_tmp_20_322_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_322;
+         double * RESTRICT  _data_pdfs_tmp_20_323_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_323;
+         double * RESTRICT  _data_pdfs_tmp_20_324_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_324;
+         double * RESTRICT  _data_pdfs_tmp_20_325_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_325;
+         double * RESTRICT  _data_pdfs_tmp_20_326_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_326;
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
+         {
+            const double streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
+            const double streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
+            const double streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
+            const double streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
+            const double streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
+            const double streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
+            const double streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
+            const double streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
+            const double streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
+            const double streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_19 = _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_20 = _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_21 = _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_22 = _data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_23 = _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_24 = _data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_25 = _data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_26 = _data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = streamed_0;
+            _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = streamed_1;
+            _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = streamed_2;
+            _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = streamed_3;
+            _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = streamed_4;
+            _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = streamed_5;
+            _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = streamed_6;
+            _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = streamed_7;
+            _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = streamed_8;
+            _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = streamed_9;
+            _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = streamed_10;
+            _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = streamed_11;
+            _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = streamed_12;
+            _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = streamed_13;
+            _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = streamed_14;
+            _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = streamed_15;
+            _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = streamed_16;
+            _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = streamed_17;
+            _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = streamed_18;
+            _data_pdfs_tmp_20_319_10[_stride_pdfs_tmp_0*ctr_0] = streamed_19;
+            _data_pdfs_tmp_20_320_10[_stride_pdfs_tmp_0*ctr_0] = streamed_20;
+            _data_pdfs_tmp_20_321_10[_stride_pdfs_tmp_0*ctr_0] = streamed_21;
+            _data_pdfs_tmp_20_322_10[_stride_pdfs_tmp_0*ctr_0] = streamed_22;
+            _data_pdfs_tmp_20_323_10[_stride_pdfs_tmp_0*ctr_0] = streamed_23;
+            _data_pdfs_tmp_20_324_10[_stride_pdfs_tmp_0*ctr_0] = streamed_24;
+            _data_pdfs_tmp_20_325_10[_stride_pdfs_tmp_0*ctr_0] = streamed_25;
+            _data_pdfs_tmp_20_326_10[_stride_pdfs_tmp_0*ctr_0] = streamed_26;
+         }
+      }
+   }
+}
+}
+
+
+namespace internal_d3q27srt_kernel_streamOnlyNoAdvancement {
+static FUNC_PREFIX void d3q27srt_kernel_streamOnlyNoAdvancement(double * RESTRICT const _data_pdfs, double * RESTRICT  _data_pdfs_tmp, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_pdfs_tmp_0, int64_t const _stride_pdfs_tmp_1, int64_t const _stride_pdfs_tmp_2, int64_t const _stride_pdfs_tmp_3)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
+   {
+      double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
+      double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_319 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_320 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_321 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_2m1_322 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_21_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
+      double * RESTRICT  _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_319 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_320 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_321 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_322 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_323 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_324 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_325 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3;
+      double * RESTRICT  _data_pdfs_tmp_20_326 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
+         double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
+         double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
+         double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
+         double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
+         double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
+         double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
+         double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
+         double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
+         double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
+         double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
+         double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
+         double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
+         double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
+         double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
+         double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
+         double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
+         double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
+         double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
+         double * RESTRICT _data_pdfs_2m1_319_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_319;
+         double * RESTRICT _data_pdfs_2m1_320_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_320;
+         double * RESTRICT _data_pdfs_2m1_321_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_321;
+         double * RESTRICT _data_pdfs_2m1_322_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_322;
+         double * RESTRICT _data_pdfs_21_323_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_323;
+         double * RESTRICT _data_pdfs_21_324_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_324;
+         double * RESTRICT _data_pdfs_21_325_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_325;
+         double * RESTRICT _data_pdfs_21_326_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_326;
+         double * RESTRICT  _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
+         double * RESTRICT  _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
+         double * RESTRICT  _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
+         double * RESTRICT  _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
+         double * RESTRICT  _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
+         double * RESTRICT  _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
+         double * RESTRICT  _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
+         double * RESTRICT  _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
+         double * RESTRICT  _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
+         double * RESTRICT  _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
+         double * RESTRICT  _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
+         double * RESTRICT  _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
+         double * RESTRICT  _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
+         double * RESTRICT  _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
+         double * RESTRICT  _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
+         double * RESTRICT  _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
+         double * RESTRICT  _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
+         double * RESTRICT  _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
+         double * RESTRICT  _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
+         double * RESTRICT  _data_pdfs_tmp_20_319_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_319;
+         double * RESTRICT  _data_pdfs_tmp_20_320_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_320;
+         double * RESTRICT  _data_pdfs_tmp_20_321_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_321;
+         double * RESTRICT  _data_pdfs_tmp_20_322_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_322;
+         double * RESTRICT  _data_pdfs_tmp_20_323_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_323;
+         double * RESTRICT  _data_pdfs_tmp_20_324_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_324;
+         double * RESTRICT  _data_pdfs_tmp_20_325_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_325;
+         double * RESTRICT  _data_pdfs_tmp_20_326_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_326;
+         for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
+         {
+            const double streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
+            const double streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
+            const double streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
+            const double streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
+            const double streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
+            const double streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
+            const double streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
+            const double streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
+            const double streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
+            const double streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_19 = _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_20 = _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_21 = _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_22 = _data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_23 = _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_24 = _data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            const double streamed_25 = _data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+            const double streamed_26 = _data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+            _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = streamed_0;
+            _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = streamed_1;
+            _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = streamed_2;
+            _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = streamed_3;
+            _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = streamed_4;
+            _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = streamed_5;
+            _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = streamed_6;
+            _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = streamed_7;
+            _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = streamed_8;
+            _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = streamed_9;
+            _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = streamed_10;
+            _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = streamed_11;
+            _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = streamed_12;
+            _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = streamed_13;
+            _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = streamed_14;
+            _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = streamed_15;
+            _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = streamed_16;
+            _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = streamed_17;
+            _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = streamed_18;
+            _data_pdfs_tmp_20_319_10[_stride_pdfs_tmp_0*ctr_0] = streamed_19;
+            _data_pdfs_tmp_20_320_10[_stride_pdfs_tmp_0*ctr_0] = streamed_20;
+            _data_pdfs_tmp_20_321_10[_stride_pdfs_tmp_0*ctr_0] = streamed_21;
+            _data_pdfs_tmp_20_322_10[_stride_pdfs_tmp_0*ctr_0] = streamed_22;
+            _data_pdfs_tmp_20_323_10[_stride_pdfs_tmp_0*ctr_0] = streamed_23;
+            _data_pdfs_tmp_20_324_10[_stride_pdfs_tmp_0*ctr_0] = streamed_24;
+            _data_pdfs_tmp_20_325_10[_stride_pdfs_tmp_0*ctr_0] = streamed_25;
+            _data_pdfs_tmp_20_326_10[_stride_pdfs_tmp_0*ctr_0] = streamed_26;
+         }
+      }
+   }
+}
+}
+
+
+namespace internal_d3q27srt_kernel_initialise {
+static FUNC_PREFIX void d3q27srt_kernel_initialise(double * RESTRICT const _data_density, double * RESTRICT  _data_pdfs, double * RESTRICT const _data_velocity, int64_t const _size_density_0, int64_t const _size_density_1, int64_t const _size_density_2, int64_t const _stride_density_0, int64_t const _stride_density_1, int64_t const _stride_density_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_velocity_0, int64_t const _stride_velocity_1, int64_t const _stride_velocity_2, int64_t const _stride_velocity_3)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_density_2; ctr_2 += 1)
+   {
+      double * RESTRICT _data_density_20_30 = _data_density + _stride_density_2*ctr_2;
+      double * RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2*ctr_2;
+      double * RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2*ctr_2 + _stride_velocity_3;
+      double * RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3;
+      double * RESTRICT  _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
+      double * RESTRICT  _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_319 = _data_pdfs + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_320 = _data_pdfs + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_321 = _data_pdfs + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_322 = _data_pdfs + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3;
+      double * RESTRICT  _data_pdfs_20_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_density_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_density_20_30_10 = _stride_density_1*ctr_1 + _data_density_20_30;
+         double * RESTRICT _data_velocity_20_30_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_30;
+         double * RESTRICT _data_velocity_20_31_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_31;
+         double * RESTRICT _data_velocity_20_32_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_32;
+         double * RESTRICT  _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
+         double * RESTRICT  _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
+         double * RESTRICT  _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
+         double * RESTRICT  _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
+         double * RESTRICT  _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
+         double * RESTRICT  _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
+         double * RESTRICT  _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
+         double * RESTRICT  _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
+         double * RESTRICT  _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
+         double * RESTRICT  _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
+         double * RESTRICT  _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
+         double * RESTRICT  _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
+         double * RESTRICT  _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
+         double * RESTRICT  _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
+         double * RESTRICT  _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
+         double * RESTRICT  _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
+         double * RESTRICT  _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
+         double * RESTRICT  _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
+         double * RESTRICT  _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
+         double * RESTRICT  _data_pdfs_20_319_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_319;
+         double * RESTRICT  _data_pdfs_20_320_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_320;
+         double * RESTRICT  _data_pdfs_20_321_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_321;
+         double * RESTRICT  _data_pdfs_20_322_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_322;
+         double * RESTRICT  _data_pdfs_20_323_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_323;
+         double * RESTRICT  _data_pdfs_20_324_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_324;
+         double * RESTRICT  _data_pdfs_20_325_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_325;
+         double * RESTRICT  _data_pdfs_20_326_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_326;
+         for (int64_t ctr_0 = 0; ctr_0 < _size_density_0; ctr_0 += 1)
+         {
+            const double rho = _data_density_20_30_10[_stride_density_0*ctr_0];
+            const double delta_rho = rho - 1.0;
+            const double u_0 = _data_velocity_20_30_10[_stride_velocity_0*ctr_0];
+            const double u_1 = _data_velocity_20_31_10[_stride_velocity_0*ctr_0];
+            const double u_2 = _data_velocity_20_32_10[_stride_velocity_0*ctr_0];
+            _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = delta_rho*0.29629629629629628 - 0.44444444444444442*(u_0*u_0) - 0.44444444444444442*(u_1*u_1) - 0.44444444444444442*(u_2*u_2);
+            _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_1*0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_1*u_1);
+            _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_1*-0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_1*u_1);
+            _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_0*-0.22222222222222221 - 0.1111111111111111*(u_1*u_1) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_0*u_0);
+            _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_0*0.22222222222222221 - 0.1111111111111111*(u_1*u_1) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_0*u_0);
+            _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_2*0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_1*u_1) + 0.22222222222222221*(u_2*u_2);
+            _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_2*-0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_1*u_1) + 0.22222222222222221*(u_2*u_2);
+            _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_1*-0.16666666666666666 + u_0*-0.055555555555555552 + u_1*0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
+            _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_1*0.16666666666666666 + u_0*0.055555555555555552 + u_1*0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
+            _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_1*0.16666666666666666 + u_0*-0.055555555555555552 + u_1*-0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
+            _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_1*-0.16666666666666666 + u_0*0.055555555555555552 + u_1*-0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
+            _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_1*u_2*0.16666666666666666 + u_1*0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
+            _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_1*u_2*-0.16666666666666666 + u_1*-0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
+            _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_2*-0.16666666666666666 + u_0*-0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
+            _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_2*0.16666666666666666 + u_0*0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
+            _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_1*u_2*-0.16666666666666666 + u_1*0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
+            _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_1*u_2*0.16666666666666666 + u_1*-0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
+            _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_2*0.16666666666666666 + u_0*-0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
+            _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_2*-0.16666666666666666 + u_0*0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
+            _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+            _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+            _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+            _data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+            _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+            _data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+            _data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+            _data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+         }
+      }
+   }
+}
+}
+
+
+namespace internal_d3q27srt_kernel_getter {
+static FUNC_PREFIX void d3q27srt_kernel_getter(double * RESTRICT  _data_density, double * RESTRICT const _data_pdfs, double * RESTRICT  _data_velocity, int64_t const _size_density_0, int64_t const _size_density_1, int64_t const _size_density_2, int64_t const _stride_density_0, int64_t const _stride_density_1, int64_t const _stride_density_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_velocity_0, int64_t const _stride_velocity_1, int64_t const _stride_velocity_2, int64_t const _stride_velocity_3)
+{
+   for (int64_t ctr_2 = 0; ctr_2 < _size_density_2; ctr_2 += 1)
+   {
+      double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_319 = _data_pdfs + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_321 = _data_pdfs + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_320 = _data_pdfs + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_322 = _data_pdfs + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
+      double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
+      double * RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
+      double * RESTRICT  _data_density_20_30 = _data_density + _stride_density_2*ctr_2;
+      double * RESTRICT  _data_velocity_20_30 = _data_velocity + _stride_velocity_2*ctr_2;
+      double * RESTRICT  _data_velocity_20_31 = _data_velocity + _stride_velocity_2*ctr_2 + _stride_velocity_3;
+      double * RESTRICT  _data_velocity_20_32 = _data_velocity + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3;
+      for (int64_t ctr_1 = 0; ctr_1 < _size_density_1; ctr_1 += 1)
+      {
+         double * RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
+         double * RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
+         double * RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
+         double * RESTRICT _data_pdfs_20_319_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_319;
+         double * RESTRICT _data_pdfs_20_321_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_321;
+         double * RESTRICT _data_pdfs_20_323_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_323;
+         double * RESTRICT _data_pdfs_20_325_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_325;
+         double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
+         double * RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
+         double * RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
+         double * RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
+         double * RESTRICT _data_pdfs_20_320_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_320;
+         double * RESTRICT _data_pdfs_20_322_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_322;
+         double * RESTRICT _data_pdfs_20_324_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_324;
+         double * RESTRICT _data_pdfs_20_326_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_326;
+         double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
+         double * RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
+         double * RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
+         double * RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
+         double * RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
+         double * RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
+         double * RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
+         double * RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
+         double * RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
+         double * RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
+         double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
+         double * RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
+         double * RESTRICT  _data_density_20_30_10 = _stride_density_1*ctr_1 + _data_density_20_30;
+         double * RESTRICT  _data_velocity_20_30_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_30;
+         double * RESTRICT  _data_velocity_20_31_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_31;
+         double * RESTRICT  _data_velocity_20_32_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_32;
+         for (int64_t ctr_0 = 0; ctr_0 < _size_density_0; ctr_0 += 1)
+         {
+            const double vel0Term = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
+            const double momdensity_0 = vel0Term - 1.0*_data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
+            const double vel1Term = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0];
+            const double momdensity_1 = vel1Term - 1.0*_data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_321_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
+            const double vel2Term = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0];
+            const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
+            const double momdensity_2 = vel2Term - 1.0*_data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0];
+            const double rho = delta_rho + 1.0;
+            const double u_0 = momdensity_0;
+            const double u_1 = momdensity_1;
+            const double u_2 = momdensity_2;
+            _data_density_20_30_10[_stride_density_0*ctr_0] = rho;
+            _data_velocity_20_30_10[_stride_velocity_0*ctr_0] = u_0;
+            _data_velocity_20_31_10[_stride_velocity_0*ctr_0] = u_1;
+            _data_velocity_20_32_10[_stride_velocity_0*ctr_0] = u_2;
+         }
+      }
+   }
+}
+}
+
+
+
+
+
+void D3Q27SRT::streamCollide( field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 27> * pdfs_tmp, double omega, const cell_idx_t ghost_layers )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   internal_d3q27srt_kernel_streamCollide::d3q27srt_kernel_streamCollide(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, omega);
+}
+void D3Q27SRT::streamCollideCellInterval( field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 27> * pdfs_tmp, double omega, const CellInterval & ci)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   internal_d3q27srt_kernel_streamCollide::d3q27srt_kernel_streamCollide(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, omega);
+}
+
+void D3Q27SRT::collide( field::GhostLayerField<double, 27> * pdfs, double omega, const cell_idx_t ghost_layers )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   internal_d3q27srt_kernel_collide::d3q27srt_kernel_collide(_data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, omega);
+}
+void D3Q27SRT::collideCellInterval( field::GhostLayerField<double, 27> * pdfs, double omega, const CellInterval & ci)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   internal_d3q27srt_kernel_collide::d3q27srt_kernel_collide(_data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, omega);
+}
+
+void D3Q27SRT::stream( field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 27> * pdfs_tmp, const cell_idx_t ghost_layers )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   internal_d3q27srt_kernel_stream::d3q27srt_kernel_stream(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3);
+}
+void D3Q27SRT::streamCellInterval( field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 27> * pdfs_tmp, const CellInterval & ci)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   internal_d3q27srt_kernel_stream::d3q27srt_kernel_stream(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3);
+}
+
+void D3Q27SRT::streamOnlyNoAdvancement( field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 27> * pdfs_tmp, const cell_idx_t ghost_layers )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(pdfs->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(pdfs->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(pdfs->zSize()) + 2*ghost_layers);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   internal_d3q27srt_kernel_streamOnlyNoAdvancement::d3q27srt_kernel_streamOnlyNoAdvancement(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3);
+}
+void D3Q27SRT::streamOnlyNoAdvancementCellInterval( field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 27> * pdfs_tmp, const CellInterval & ci)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs_tmp->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
+   const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
+   const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
+   const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
+   internal_d3q27srt_kernel_streamOnlyNoAdvancement::d3q27srt_kernel_streamOnlyNoAdvancement(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3);
+}
+
+void D3Q27SRT::initialise( field::GhostLayerField<double, 1> * density, field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 3> * velocity, const cell_idx_t ghost_layers )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(density->nrOfGhostLayers()))
+   double * RESTRICT const _data_density = density->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(velocity->nrOfGhostLayers()))
+   double * RESTRICT const _data_velocity = velocity->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->xSizeWithGhostLayer(), int64_t(int64_c(density->xSize()) + 2*ghost_layers))
+   const int64_t _size_density_0 = int64_t(int64_c(density->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->ySizeWithGhostLayer(), int64_t(int64_c(density->ySize()) + 2*ghost_layers))
+   const int64_t _size_density_1 = int64_t(int64_c(density->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->zSizeWithGhostLayer(), int64_t(int64_c(density->zSize()) + 2*ghost_layers))
+   const int64_t _size_density_2 = int64_t(int64_c(density->zSize()) + 2*ghost_layers);
+   const int64_t _stride_density_0 = int64_t(density->xStride());
+   const int64_t _stride_density_1 = int64_t(density->yStride());
+   const int64_t _stride_density_2 = int64_t(density->zStride());
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
+   const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+   const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+   const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+   internal_d3q27srt_kernel_initialise::d3q27srt_kernel_initialise(_data_density, _data_pdfs, _data_velocity, _size_density_0, _size_density_1, _size_density_2, _stride_density_0, _stride_density_1, _stride_density_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
+}
+void D3Q27SRT::initialiseCellInterval( field::GhostLayerField<double, 1> * density, field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 3> * velocity, const CellInterval & ci)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(density->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(density->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(density->nrOfGhostLayers()))
+   double * RESTRICT const _data_density = density->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT  _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(velocity->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(velocity->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(velocity->nrOfGhostLayers()))
+   double * RESTRICT const _data_velocity = velocity->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_density_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_density_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_density_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_density_0 = int64_t(density->xStride());
+   const int64_t _stride_density_1 = int64_t(density->yStride());
+   const int64_t _stride_density_2 = int64_t(density->zStride());
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
+   const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+   const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+   const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+   internal_d3q27srt_kernel_initialise::d3q27srt_kernel_initialise(_data_density, _data_pdfs, _data_velocity, _size_density_0, _size_density_1, _size_density_2, _stride_density_0, _stride_density_1, _stride_density_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
+}
+
+void D3Q27SRT::calculateMacroscopicParameters( field::GhostLayerField<double, 1> * density, field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 3> * velocity, const cell_idx_t ghost_layers )
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(density->nrOfGhostLayers()))
+   double * RESTRICT  _data_density = density->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(-ghost_layers, -int_c(velocity->nrOfGhostLayers()))
+   double * RESTRICT  _data_velocity = velocity->dataAt(-ghost_layers, -ghost_layers, -ghost_layers, 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->xSizeWithGhostLayer(), int64_t(int64_c(density->xSize()) + 2*ghost_layers))
+   const int64_t _size_density_0 = int64_t(int64_c(density->xSize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->ySizeWithGhostLayer(), int64_t(int64_c(density->ySize()) + 2*ghost_layers))
+   const int64_t _size_density_1 = int64_t(int64_c(density->ySize()) + 2*ghost_layers);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->zSizeWithGhostLayer(), int64_t(int64_c(density->zSize()) + 2*ghost_layers))
+   const int64_t _size_density_2 = int64_t(int64_c(density->zSize()) + 2*ghost_layers);
+   const int64_t _stride_density_0 = int64_t(density->xStride());
+   const int64_t _stride_density_1 = int64_t(density->yStride());
+   const int64_t _stride_density_2 = int64_t(density->zStride());
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
+   const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+   const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+   const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+   internal_d3q27srt_kernel_getter::d3q27srt_kernel_getter(_data_density, _data_pdfs, _data_velocity, _size_density_0, _size_density_1, _size_density_2, _stride_density_0, _stride_density_1, _stride_density_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
+}
+void D3Q27SRT::calculateMacroscopicParametersCellInterval( field::GhostLayerField<double, 1> * density, field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 3> * velocity, const CellInterval & ci)
+{
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(density->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(density->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(density->nrOfGhostLayers()))
+   double * RESTRICT  _data_density = density->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+   double * RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(velocity->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(velocity->nrOfGhostLayers()))
+   WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(velocity->nrOfGhostLayers()))
+   double * RESTRICT  _data_velocity = velocity->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+   const int64_t _size_density_0 = int64_t(int64_c(ci.xSize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+   const int64_t _size_density_1 = int64_t(int64_c(ci.ySize()) + 0);
+   WALBERLA_ASSERT_GREATER_EQUAL(density->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+   const int64_t _size_density_2 = int64_t(int64_c(ci.zSize()) + 0);
+   const int64_t _stride_density_0 = int64_t(density->xStride());
+   const int64_t _stride_density_1 = int64_t(density->yStride());
+   const int64_t _stride_density_2 = int64_t(density->zStride());
+   const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+   const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+   const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+   const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+   const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
+   const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
+   const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
+   const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
+   internal_d3q27srt_kernel_getter::d3q27srt_kernel_getter(_data_density, _data_pdfs, _data_velocity, _size_density_0, _size_density_1, _size_density_2, _stride_density_0, _stride_density_1, _stride_density_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
+}
+
+
+
+} // namespace lbm
+} // namespace walberla
+
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic pop
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL )
+#pragma warning pop
+#endif
\ No newline at end of file
diff --git a/src/lbm_generated/sweep_collection/D3Q27SRT.h b/src/lbm_generated/sweep_collection/D3Q27SRT.h
new file mode 100644
index 0000000000000000000000000000000000000000..eb45b71660fbf902d16cd064e2f09dadf24548d7
--- /dev/null
+++ b/src/lbm_generated/sweep_collection/D3Q27SRT.h
@@ -0,0 +1,1131 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file D3Q27SRT.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/logging/Logging.h"
+#include "core/Macros.h"
+
+
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+
+#include "field/SwapableCompare.h"
+#include "field/GhostLayerField.h"
+
+#include <set>
+#include <cmath>
+
+
+
+using namespace std::placeholders;
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wunused-parameter"
+#   pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace lbm {
+
+
+class D3Q27SRT
+{
+public:
+  enum Type { ALL = 0, INNER = 1, OUTER = 2 };
+
+   D3Q27SRT(const shared_ptr< StructuredBlockStorage > & blocks, BlockDataID pdfsID_, BlockDataID densityID_, BlockDataID velocityID_, double omega, const Cell & outerWidth=Cell(1, 1, 1))
+     : blocks_(blocks), pdfsID(pdfsID_), densityID(densityID_), velocityID(velocityID_), omega_(omega), outerWidth_(outerWidth)
+   {
+      
+
+      for (auto& iBlock : *blocks)
+      {
+         if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 ||
+             int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 ||
+             int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
+          WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock")
+      }
+   };
+
+   
+    ~D3Q27SRT() {  
+        for(auto p: cache_pdfs_) {
+            delete p;
+        }
+     }
+
+
+   /*************************************************************************************
+   *                Internal Function Definitions with raw Pointer
+   *************************************************************************************/
+   static void streamCollide (field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 27> * pdfs_tmp, double omega, const cell_idx_t ghost_layers = 0);
+   static void streamCollideCellInterval (field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 27> * pdfs_tmp, double omega, const CellInterval & ci);
+   
+   static void collide (field::GhostLayerField<double, 27> * pdfs, double omega, const cell_idx_t ghost_layers = 0);
+   static void collideCellInterval (field::GhostLayerField<double, 27> * pdfs, double omega, const CellInterval & ci);
+   
+   static void stream (field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 27> * pdfs_tmp, const cell_idx_t ghost_layers = 0);
+   static void streamCellInterval (field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 27> * pdfs_tmp, const CellInterval & ci);
+   
+   static void streamOnlyNoAdvancement (field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 27> * pdfs_tmp, const cell_idx_t ghost_layers = 0);
+   static void streamOnlyNoAdvancementCellInterval (field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 27> * pdfs_tmp, const CellInterval & ci);
+   
+   static void initialise (field::GhostLayerField<double, 1> * density, field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 3> * velocity, const cell_idx_t ghost_layers = 0);
+   static void initialiseCellInterval (field::GhostLayerField<double, 1> * density, field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 3> * velocity, const CellInterval & ci);
+   
+   static void calculateMacroscopicParameters (field::GhostLayerField<double, 1> * density, field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 3> * velocity, const cell_idx_t ghost_layers = 0);
+   static void calculateMacroscopicParametersCellInterval (field::GhostLayerField<double, 1> * density, field::GhostLayerField<double, 27> * pdfs, field::GhostLayerField<double, 3> * velocity, const CellInterval & ci);
+   
+
+   /*************************************************************************************
+   *                Function Definitions for external Usage
+   *************************************************************************************/
+
+   std::function<void (IBlock *)> streamCollide()
+   {
+      return [this](IBlock* block) { streamCollide(block); };
+   }
+
+   std::function<void (IBlock *)> streamCollide(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamCollideInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamCollideOuter(block); };
+         default:
+            return [this](IBlock* block) { streamCollide(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> streamCollide(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamCollideInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamCollideOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { streamCollide(block, ghost_layers); };
+      }
+   }
+
+   
+
+   void streamCollide(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      field::GhostLayerField<double, 27> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      auto & omega = this->omega_;
+      
+      streamCollide(pdfs, pdfs_tmp, omega, ghost_layers);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   void streamCollide(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      field::GhostLayerField<double, 27> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      auto & omega = this->omega_;
+      
+      streamCollide(pdfs, pdfs_tmp, omega, ghost_layers);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   
+
+   void streamCollideCellInterval(IBlock * block, const CellInterval & ci)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      field::GhostLayerField<double, 27> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      auto & omega = this->omega_;
+      
+      streamCollideCellInterval(pdfs, pdfs_tmp, omega, ci);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   void streamCollideInner(IBlock * block)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      field::GhostLayerField<double, 27> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      auto & omega = this->omega_;
+      
+
+      CellInterval inner = pdfs->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      streamCollideCellInterval(pdfs, pdfs_tmp, omega, inner);
+   }
+
+   void streamCollideOuter(IBlock * block)
+   {
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      field::GhostLayerField<double, 27> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      auto & omega = this->omega_;
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         pdfs->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      for( auto & ci: layers_ )
+      {
+         streamCollideCellInterval(pdfs, pdfs_tmp, omega, ci);
+      }
+    
+
+    pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+   
+
+   std::function<void (IBlock *)> collide()
+   {
+      return [this](IBlock* block) { collide(block); };
+   }
+
+   std::function<void (IBlock *)> collide(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { collideInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { collideOuter(block); };
+         default:
+            return [this](IBlock* block) { collide(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> collide(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { collideInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { collideOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { collide(block, ghost_layers); };
+      }
+   }
+
+   
+
+   void collide(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+
+      auto & omega = this->omega_;
+      
+      collide(pdfs, omega, ghost_layers);
+      
+   }
+
+   void collide(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+
+      auto & omega = this->omega_;
+      
+      collide(pdfs, omega, ghost_layers);
+      
+   }
+
+   
+
+   void collideCellInterval(IBlock * block, const CellInterval & ci)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+
+      auto & omega = this->omega_;
+      
+      collideCellInterval(pdfs, omega, ci);
+      
+   }
+
+   void collideInner(IBlock * block)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+
+      auto & omega = this->omega_;
+      
+
+      CellInterval inner = pdfs->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      collideCellInterval(pdfs, omega, inner);
+   }
+
+   void collideOuter(IBlock * block)
+   {
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+
+      auto & omega = this->omega_;
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         pdfs->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      for( auto & ci: layers_ )
+      {
+         collideCellInterval(pdfs, omega, ci);
+      }
+    
+
+    
+   }
+   
+
+   std::function<void (IBlock *)> stream()
+   {
+      return [this](IBlock* block) { stream(block); };
+   }
+
+   std::function<void (IBlock *)> stream(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamOuter(block); };
+         default:
+            return [this](IBlock* block) { stream(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> stream(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { stream(block, ghost_layers); };
+      }
+   }
+
+   
+
+   void stream(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      field::GhostLayerField<double, 27> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      stream(pdfs, pdfs_tmp, ghost_layers);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   void stream(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      field::GhostLayerField<double, 27> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      stream(pdfs, pdfs_tmp, ghost_layers);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   
+
+   void streamCellInterval(IBlock * block, const CellInterval & ci)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      field::GhostLayerField<double, 27> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      streamCellInterval(pdfs, pdfs_tmp, ci);
+      pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+
+   void streamInner(IBlock * block)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      field::GhostLayerField<double, 27> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+
+      CellInterval inner = pdfs->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      streamCellInterval(pdfs, pdfs_tmp, inner);
+   }
+
+   void streamOuter(IBlock * block)
+   {
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      field::GhostLayerField<double, 27> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         pdfs->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      for( auto & ci: layers_ )
+      {
+         streamCellInterval(pdfs, pdfs_tmp, ci);
+      }
+    
+
+    pdfs->swapDataPointers(pdfs_tmp);
+
+   }
+   
+
+   std::function<void (IBlock *)> streamOnlyNoAdvancement()
+   {
+      return [this](IBlock* block) { streamOnlyNoAdvancement(block); };
+   }
+
+   std::function<void (IBlock *)> streamOnlyNoAdvancement(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
+         default:
+            return [this](IBlock* block) { streamOnlyNoAdvancement(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> streamOnlyNoAdvancement(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { streamOnlyNoAdvancement(block, ghost_layers); };
+      }
+   }
+
+   
+
+   void streamOnlyNoAdvancement(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      field::GhostLayerField<double, 27> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      streamOnlyNoAdvancement(pdfs, pdfs_tmp, ghost_layers);
+      
+   }
+
+   void streamOnlyNoAdvancement(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      field::GhostLayerField<double, 27> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      streamOnlyNoAdvancement(pdfs, pdfs_tmp, ghost_layers);
+      
+   }
+
+   
+
+   void streamOnlyNoAdvancementCellInterval(IBlock * block, const CellInterval & ci)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      field::GhostLayerField<double, 27> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+      streamOnlyNoAdvancementCellInterval(pdfs, pdfs_tmp, ci);
+      
+   }
+
+   void streamOnlyNoAdvancementInner(IBlock * block)
+   {
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      field::GhostLayerField<double, 27> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+
+      CellInterval inner = pdfs->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      streamOnlyNoAdvancementCellInterval(pdfs, pdfs_tmp, inner);
+   }
+
+   void streamOnlyNoAdvancementOuter(IBlock * block)
+   {
+
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      field::GhostLayerField<double, 27> * pdfs_tmp;
+      {
+          // Getting temporary field pdfs_tmp
+          auto it = cache_pdfs_.find( pdfs );
+          if( it != cache_pdfs_.end() )
+          {
+              pdfs_tmp = *it;
+          }
+          else
+          {
+              pdfs_tmp = pdfs->cloneUninitialized();
+              cache_pdfs_.insert(pdfs_tmp);
+          }
+      }
+
+      
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         pdfs->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         pdfs->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         pdfs->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      for( auto & ci: layers_ )
+      {
+         streamOnlyNoAdvancementCellInterval(pdfs, pdfs_tmp, ci);
+      }
+    
+
+    
+   }
+   
+
+   std::function<void (IBlock *)> initialise()
+   {
+      return [this](IBlock* block) { initialise(block); };
+   }
+
+   std::function<void (IBlock *)> initialise(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { initialiseInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { initialiseOuter(block); };
+         default:
+            return [this](IBlock* block) { initialise(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> initialise(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { initialiseInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { initialiseOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { initialise(block, ghost_layers); };
+      }
+   }
+
+   
+
+   void initialise(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      
+
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+      initialise(density, pdfs, velocity, ghost_layers);
+      
+   }
+
+   void initialise(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      
+
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+      initialise(density, pdfs, velocity, ghost_layers);
+      
+   }
+
+   
+
+   void initialiseCellInterval(IBlock * block, const CellInterval & ci)
+   {
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+      initialiseCellInterval(density, pdfs, velocity, ci);
+      
+   }
+
+   void initialiseInner(IBlock * block)
+   {
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+
+      CellInterval inner = density->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      initialiseCellInterval(density, pdfs, velocity, inner);
+   }
+
+   void initialiseOuter(IBlock * block)
+   {
+
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         density->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         density->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         density->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         density->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         density->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         density->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      for( auto & ci: layers_ )
+      {
+         initialiseCellInterval(density, pdfs, velocity, ci);
+      }
+    
+
+    
+   }
+   
+
+   std::function<void (IBlock *)> calculateMacroscopicParameters()
+   {
+      return [this](IBlock* block) { calculateMacroscopicParameters(block); };
+   }
+
+   std::function<void (IBlock *)> calculateMacroscopicParameters(Type type)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
+         default:
+            return [this](IBlock* block) { calculateMacroscopicParameters(block); };
+      }
+   }
+
+   std::function<void (IBlock *)> calculateMacroscopicParameters(Type type, const cell_idx_t ghost_layers)
+   {
+      switch (type)
+      {
+         case Type::INNER:
+            return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
+         case Type::OUTER:
+            return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
+         default:
+            return [this, ghost_layers](IBlock* block) { calculateMacroscopicParameters(block, ghost_layers); };
+      }
+   }
+
+   
+
+   void calculateMacroscopicParameters(IBlock * block)
+   {
+      const cell_idx_t ghost_layers = 0;
+      
+
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+      calculateMacroscopicParameters(density, pdfs, velocity, ghost_layers);
+      
+   }
+
+   void calculateMacroscopicParameters(IBlock * block, const cell_idx_t ghost_layers)
+   {
+      
+
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+      calculateMacroscopicParameters(density, pdfs, velocity, ghost_layers);
+      
+   }
+
+   
+
+   void calculateMacroscopicParametersCellInterval(IBlock * block, const CellInterval & ci)
+   {
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+      calculateMacroscopicParametersCellInterval(density, pdfs, velocity, ci);
+      
+   }
+
+   void calculateMacroscopicParametersInner(IBlock * block)
+   {
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+
+      CellInterval inner = density->xyzSize();
+      inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+      calculateMacroscopicParametersCellInterval(density, pdfs, velocity, inner);
+   }
+
+   void calculateMacroscopicParametersOuter(IBlock * block)
+   {
+
+      auto velocity = block->getData< field::GhostLayerField<double, 3> >(velocityID);
+      auto pdfs = block->getData< field::GhostLayerField<double, 27> >(pdfsID);
+      auto density = block->getData< field::GhostLayerField<double, 1> >(densityID);
+
+      
+      
+
+      if( layers_.empty() )
+      {
+         CellInterval ci;
+
+         density->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+         density->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+         layers_.push_back(ci);
+
+         density->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+         density->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+         ci.expand(Cell(0, 0, -outerWidth_[2]));
+         layers_.push_back(ci);
+
+         density->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+         density->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+         ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+         layers_.push_back(ci);
+      }
+
+    
+      for( auto & ci: layers_ )
+      {
+         calculateMacroscopicParametersCellInterval(density, pdfs, velocity, ci);
+      }
+    
+
+    
+   }
+   
+
+   
+
+   private:
+      shared_ptr< StructuredBlockStorage > blocks_;
+      BlockDataID pdfsID;
+    BlockDataID densityID;
+    BlockDataID velocityID;
+    double omega_;
+
+    private: std::set< field::GhostLayerField<double, 27> *, field::SwapableCompare< field::GhostLayerField<double, 27> * > > cache_pdfs_;
+
+      Cell outerWidth_;
+      std::vector<CellInterval> layers_;
+
+      
+};
+
+
+} // namespace lbm
+} // namespace walberla
+
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/lbm_generated/sweep_collection/sweep_collection_generation_script.py b/src/lbm_generated/sweep_collection/sweep_collection_generation_script.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdc208608f08bf202d361b5c369d48199c5c5ed4
--- /dev/null
+++ b/src/lbm_generated/sweep_collection/sweep_collection_generation_script.py
@@ -0,0 +1,48 @@
+import sympy as sp
+
+from pystencils import Target
+from pystencils import fields
+
+from lbmpy.creationfunctions import create_lb_collision_rule
+from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil
+from pystencils_walberla import ManualCodeGenerationContext, generate_info_header
+from lbmpy_walberla import generate_lbm_sweep_collection
+
+
+with ManualCodeGenerationContext(openmp=False, optimize_for_localhost=False,
+                                 mpi=True, double_accuracy=True, cuda=False) as ctx:
+
+    for stencil in [LBStencil(Stencil.D3Q19), LBStencil(Stencil.D3Q27)]:
+        target = Target.GPU if ctx.cuda else Target.CPU
+        data_type = "float64" if ctx.double_accuracy else "float32"
+        openmp = True if ctx.openmp else False
+        if ctx.optimize_for_localhost:
+            cpu_vec = {"nontemporal": False, "assume_aligned": True}
+        else:
+            cpu_vec = None
+
+        method = Method.SRT
+        relaxation_rate = sp.symbols("omega")
+        streaming_pattern = 'pull'
+
+        pdfs = fields(f"pdfs({stencil.Q}): {data_type}[{stencil.D}D]", layout='fzyx')
+        density_field, velocity_field = fields(f"density(1), velocity({stencil.D}): {data_type}[{stencil.D}D]",
+                                               layout='fzyx')
+
+        macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
+
+        lbm_config = LBMConfig(stencil=stencil, method=method, relaxation_rate=relaxation_rate,
+                               streaming_pattern=streaming_pattern)
+        lbm_opt = LBMOptimisation(cse_global=False)
+
+        collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
+
+        generate_lbm_sweep_collection(ctx, f'{stencil.name}{method.name}', collision_rule,
+                                      streaming_pattern='pull',
+                                      field_layout='zyxf',
+                                      refinement_scaling=None,
+                                      macroscopic_fields=macroscopic_fields,
+                                      target=target, data_type=data_type,
+                                      cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
+
+        ctx.write_all_files()
diff --git a/src/stencil/Directions.h b/src/stencil/Directions.h
index d3a75b812131878c4222d4cf4fa5ec5953e1f087..5be0d72223712c7cc8f4aa2b991bb9456f09aadb 100644
--- a/src/stencil/Directions.h
+++ b/src/stencil/Directions.h
@@ -9,9 +9,10 @@
 #pragma once
 
 // core includes
-#include "core/DataTypes.h"
 #include "core/cell/Cell.h"
+#include "core/DataTypes.h"
 #include "core/debug/Debug.h"
+#include "core/math/Vector3.h"
 
 // STL includes
 #include <string>
@@ -135,6 +136,39 @@ namespace stencil {
       }
    };
 
+
+   /// Maps a (x,y,z) direction vector to its direction \ingroup stencil
+   inline Direction vectorToDirection(cell_idx_t x, cell_idx_t y, cell_idx_t z){
+      static const Direction vecToDirArr[3][3][3] = {
+         {  // x = -1
+            {BSW, SW, TSW},   // y = -1
+            {BW, W, TW},      // y = 0
+            {BNW, NW, TNW}    // y = 1
+         },
+         {  // x = 0
+            {BS, S, TS},      // y = -1
+            {B, C, T},        // y = 0
+            {BN, N, TN}       // y = 1
+         },
+         {  // x = 1
+            {BSE, SE, TSE},   // y = -1
+            {BE, E, TE},      // y = 0
+            {BNE, NE, TNE}    // y = 1
+         }
+      };
+
+      return vecToDirArr[x + 1][y + 1][z + 1];
+   }
+
+   inline Direction vectorToDirection(Vector3< cell_idx_t > vec){
+      return vectorToDirection(vec[0], vec[1], vec[2]);
+   }
+
+   inline bool isFaceDirection(Direction dir) { return 1 <= dir && dir <= 6; }
+   inline bool isEdgeDirection(Direction dir) { return 7 <= dir && dir <= 18; }
+   inline bool isCornerDirection(Direction dir) { return 19 <= dir; }
+
+
    /// The x,y,z component for each normalized direction \ingroup stencil
    const real_t cNorm[3][NR_OF_DIRECTIONS] = {
       {
diff --git a/src/timeloop/SweepTimeloop.cpp b/src/timeloop/SweepTimeloop.cpp
index 6064efa27af1dce8a8b435132f961325759aa8a1..5721c51c79a57aa19b684776f8f70545a5a6d0bc 100644
--- a/src/timeloop/SweepTimeloop.cpp
+++ b/src/timeloop/SweepTimeloop.cpp
@@ -52,11 +52,11 @@ void SweepTimeloop::doTimeStep(const Set<SUID> &selectors)
          if( s.sweep.empty() )
          {
             WALBERLA_ABORT("Selecting Sweep " << sweepIt->first << ": " <<
-                           "No sweep has been registered! Did you only register a BeforeFunction or AfterFunction?" );
+                           "No sweep has been registered! Did you only register a BeforeFunction or AfterFunction?" )
          }
 
          // ensure that exactly one sweep has been registered that matches the specified selectors
-         size_t numSweeps = s.sweep.getNumberOfMatching(selectors + bi->getState());
+         size_t const numSweeps = s.sweep.getNumberOfMatching(selectors + bi->getState());
 
          if (numSweeps == size_t(0)) {
             continue;
@@ -73,7 +73,7 @@ void SweepTimeloop::doTimeStep(const Set<SUID> &selectors)
          {
             std::string sweepName;
             s.sweep.getUnique( selectors + bi->getState(), sweepName );
-            WALBERLA_LOG_PROGRESS("Running sweep \"" << sweepName << "\" on block " << bi->getId() );
+            WALBERLA_LOG_PROGRESS("Running sweep \"" << sweepName << "\" on block " << bi->getId() )
          }
 
          (selectedSweep->function_)( bi.get() );
@@ -121,11 +121,11 @@ void SweepTimeloop::doTimeStep(const Set<SUID> &selectors, WcTimingPool &timing)
          if( s.sweep.empty() )
          {
             WALBERLA_ABORT("Selecting Sweep " << sweepIt->first << ": " <<
-                           "No sweep has been registered! Did you only register a BeforeFunction or AfterFunction?" );
+                           "No sweep has been registered! Did you only register a BeforeFunction or AfterFunction?" )
          }
 
          // ensure that exactly one sweep has been registered that matches the specified selectors
-         size_t numSweeps = s.sweep.getNumberOfMatching(selectors + bi->getState());
+         size_t const numSweeps = s.sweep.getNumberOfMatching(selectors + bi->getState());
 
          if (numSweeps == size_t(0)) {
             continue;
@@ -139,7 +139,7 @@ void SweepTimeloop::doTimeStep(const Set<SUID> &selectors, WcTimingPool &timing)
          std::string sweepName;
          Sweep * selectedSweep = s.sweep.getUnique( selectors + bi->getState(), sweepName );
 
-         WALBERLA_LOG_PROGRESS("Running sweep \"" << sweepName << "\" on block " << bi->getId() );
+         WALBERLA_LOG_PROGRESS("Running sweep \"" << sweepName << "\" on block " << bi->getId() )
 
          // loop over all blocks
          timing[sweepName].start();
diff --git a/src/timeloop/Timeloop.cpp b/src/timeloop/Timeloop.cpp
index 6b2f548d54ec9922200488243eec2355e3a9f676..fd46e16c1a6e6cb646761e56f45231244c2083e9 100644
--- a/src/timeloop/Timeloop.cpp
+++ b/src/timeloop/Timeloop.cpp
@@ -40,7 +40,7 @@ Timeloop::Timeloop( uint_t nrOfTimeSteps)
 
 void Timeloop::run( const bool logTimeStep )
 {
-   WALBERLA_LOG_PROGRESS( "Running timeloop for " << nrOfTimeSteps_ << " time steps" );
+   WALBERLA_LOG_PROGRESS( "Running timeloop for " << nrOfTimeSteps_ << " time steps" )
    while(curTimeStep_ < nrOfTimeSteps_) {
       singleStep( logTimeStep );
       if ( stop_ ) {
@@ -48,12 +48,12 @@ void Timeloop::run( const bool logTimeStep )
          break;
       }
    }
-   WALBERLA_LOG_PROGRESS( "Timeloop finished" );
+   WALBERLA_LOG_PROGRESS( "Timeloop finished" )
 }
 
 void Timeloop::run( WcTimingPool & tp, const bool logTimeStep )
 {
-   WALBERLA_LOG_PROGRESS( "Running timeloop for " << nrOfTimeSteps_ << " time steps" );
+   WALBERLA_LOG_PROGRESS( "Running timeloop for " << nrOfTimeSteps_ << " time steps" )
 
    while(curTimeStep_ < nrOfTimeSteps_) {
       singleStep( tp, logTimeStep );
@@ -63,7 +63,7 @@ void Timeloop::run( WcTimingPool & tp, const bool logTimeStep )
       }
    }
 
-   WALBERLA_LOG_PROGRESS( "Timeloop finished" );
+   WALBERLA_LOG_PROGRESS( "Timeloop finished" )
 }
 
 //*******************************************************************************************************************
@@ -97,9 +97,9 @@ void Timeloop::synchronizedStop( bool stopVal )
 
 void Timeloop::singleStep( const bool logTimeStep )
 {
-   LoggingStampManager raii( make_shared<LoggingStamp>( *this ), logTimeStep );
+   LoggingStampManager const raii( make_shared<LoggingStamp>( *this ), logTimeStep );
 
-   WALBERLA_LOG_PROGRESS( "Running time step " << curTimeStep_ );
+   WALBERLA_LOG_PROGRESS( "Running time step " << curTimeStep_ )
 
    for(size_t i=0; i<beforeFunctions_.size(); ++i )
       executeSelectable( beforeFunctions_[i], uid::globalState(), "Pre-Timestep Function" );
@@ -114,9 +114,9 @@ void Timeloop::singleStep( const bool logTimeStep )
 
 void Timeloop::singleStep( WcTimingPool & tp, const bool logTimeStep )
 {
-   LoggingStampManager raii( make_shared<LoggingStamp>( *this ), logTimeStep );
+   LoggingStampManager const raii( make_shared<LoggingStamp>( *this ), logTimeStep );
 
-   WALBERLA_LOG_PROGRESS( "Running time step " << curTimeStep_ );
+   WALBERLA_LOG_PROGRESS( "Running time step " << curTimeStep_ )
 
    for(size_t i=0; i<beforeFunctions_.size(); ++i )
       executeSelectable( beforeFunctions_[i], uid::globalState(), "Pre-Timestep Function", tp );
@@ -147,7 +147,7 @@ void Timeloop::addFuncBeforeTimeStep(const Timeloop::FctHandle & h,
                                      const VoidFctNoArguments& f, const std::string & id,
                                      const Set<SUID>&r, const Set<SUID> & e )
 {
-   WALBERLA_ASSERT_LESS( h, beforeFunctions_.size() ); //invalid FctHandle
+   WALBERLA_ASSERT_LESS( h, beforeFunctions_.size() ) //invalid FctHandle
    beforeFunctions_[h].add(f,r,e,id);
 }
 
@@ -166,7 +166,7 @@ void Timeloop::addFuncAfterTimeStep(const Timeloop::FctHandle & h,
                                            const VoidFctNoArguments& f, const std::string & id,
                                            const Set<SUID>&r, const Set<SUID> & e )
 {
-   WALBERLA_ASSERT_LESS( h, afterFunctions_.size() ); //invalid FctHandle
+   WALBERLA_ASSERT_LESS( h, afterFunctions_.size() ) //invalid FctHandle
    afterFunctions_[h].add(f,r,e,id);
 }
 
@@ -182,10 +182,10 @@ void Timeloop::executeSelectable( const selectable::SetSelectableObject<VoidFctN
    if( exe == nullptr )
       WALBERLA_ABORT( "Trying to selecting " << what << ": "
                       << "Multiple Matches found! Check your selector " << selector << std::endl
-                      << "All registered objects: " << std::endl << selectable << std::endl );
+                      << "All registered objects: " << std::endl << selectable << std::endl )
 
 
-   WALBERLA_LOG_PROGRESS("Running " << what << " \"" << objectName << "\"" );
+   WALBERLA_LOG_PROGRESS("Running " << what << " \"" << objectName << "\"" )
 
    LIKWID_MARKER_START( objectName.c_str() );
    (*exe)();
@@ -203,9 +203,9 @@ void Timeloop::executeSelectable( const selectable::SetSelectableObject<VoidFctN
    if( !exe)
       WALBERLA_ABORT( "Trying to select " << what << ": "
                       << "Multiple or no matches found! Check your selector " << selector << std::endl
-                      << "All registered objects: " << std::endl << selectable << std::endl );
+                      << "All registered objects: " << std::endl << selectable << std::endl )
 
-   WALBERLA_LOG_PROGRESS("Running " << what << " \"" << objectName << "\"" );
+   WALBERLA_LOG_PROGRESS("Running " << what << " \"" << objectName << "\"" )
 
    timing[objectName].start();
    LIKWID_MARKER_START( objectName.c_str() );
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 94efcd3ae7e6607d6528c0bf2bd50a884cda4810..b16438de039b01b03a062b79d3589642491416b5 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -14,6 +14,7 @@ add_subdirectory( gather )
 add_subdirectory( geometry )
 add_subdirectory( gui )
 add_subdirectory( lbm )
+add_subdirectory( lbm_generated )
 add_subdirectory( lbm_mesapd_coupling )
 add_subdirectory( mesa_pd )
 add_subdirectory( mesh )
diff --git a/tests/core/FunctionTraitsTest.cpp b/tests/core/FunctionTraitsTest.cpp
index 8c378eceaa7f16bf08a8400f31a35ffa028f6a9d..dc503f2db3df10f911fe2df14d7accbe0a800524 100644
--- a/tests/core/FunctionTraitsTest.cpp
+++ b/tests/core/FunctionTraitsTest.cpp
@@ -25,7 +25,7 @@
 
 using namespace walberla;
 
-// FunctionTraits are used in a similar way in cuda/Kernel.h. As explained below, special attention is required.
+// FunctionTraits are used in a similar way in gpu/Kernel.h. As explained below, special attention is required.
 template< typename F>
 struct SomeClass
 {
diff --git a/tests/field/CMakeLists.txt b/tests/field/CMakeLists.txt
index 7251f35e4886df752f61e94f12b5a38e7c327bf8..b48f4ac79d1a778ccd1cadad910b6fab00a99b36 100644
--- a/tests/field/CMakeLists.txt
+++ b/tests/field/CMakeLists.txt
@@ -71,6 +71,11 @@ waLBerla_generate_target_from_python(NAME CodegenJacobiCPUGeneratedJacobiKernel
 waLBerla_compile_test( FILES codegen/CodegenJacobiCPU.cpp DEPENDS gui timeloop CodegenJacobiCPUGeneratedJacobiKernel)
 waLBerla_execute_test( NAME CodegenJacobiCPU )
 
+waLBerla_generate_target_from_python(NAME SweepCollectionKernel FILE codegen/SweepCollection.py
+        OUT_FILES SweepCollection.h SweepCollection.cpp)
+waLBerla_compile_test( FILES codegen/SweepCollection.cpp DEPENDS timeloop SweepCollectionKernel)
+waLBerla_execute_test( NAME SweepCollection )
+
 waLBerla_generate_target_from_python(NAME CodegenPoissonCPUGeneratedKernel FILE codegen/Poisson.py
       OUT_FILES Poisson.cpp Poisson.h )
 waLBerla_compile_test( FILES codegen/CodegenPoissonCPU.cpp DEPENDS gui timeloop CodegenPoissonCPUGeneratedKernel)
diff --git a/tests/field/codegen/CodegenJacobiCPU.cpp b/tests/field/codegen/CodegenJacobiCPU.cpp
index 3bba9623ed02f18521431ac492f3b2c4d2a584d3..6755c687a9ff0496e02c0b776dcc90595b151090 100644
--- a/tests/field/codegen/CodegenJacobiCPU.cpp
+++ b/tests/field/codegen/CodegenJacobiCPU.cpp
@@ -84,7 +84,7 @@ void testJacobi2D()
    auto firstBlock = blocks->begin();
    auto f = firstBlock->getData<ScalarField>( fieldID );
 
-   WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_c(1.0 / 4.0));
+   WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_c(1.0 / 4.0))
 }
 
 
@@ -132,7 +132,7 @@ void testJacobi3D()
 
    auto firstBlock = blocks->begin();
    auto f = firstBlock->getData<ScalarField>( fieldID );
-   WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_c(1.0 / 8.0));
+   WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_c(1.0 / 8.0))
 }
 
 
diff --git a/tests/field/codegen/SweepCollection.cpp b/tests/field/codegen/SweepCollection.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..33c8d2be099b9146d6738a5d7809023dbb7fa3b4
--- /dev/null
+++ b/tests/field/codegen/SweepCollection.cpp
@@ -0,0 +1,89 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file SweepCollection.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "blockforest/Initialization.h"
+
+#include "core/Environment.h"
+#include "core/debug/TestSubsystem.h"
+
+#include "field/AddToStorage.h"
+#include "field/communication/PackInfo.h"
+
+#include "timeloop/SweepTimeloop.h"
+#include "SweepCollection.h"
+
+using namespace walberla;
+
+typedef GhostLayerField<real_t, 1> ScalarField;
+using SweepCollection_T = pystencils::SweepCollection;
+
+void testSweepCollection()
+{
+   uint_t xSize = 20;
+   uint_t ySize = 20;
+   uint_t zSize = 20;
+   // Create blocks
+   shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
+           uint_t(1) , uint_t(1),  uint_t(1),  // number of blocks in x,y,z direction
+           xSize, ySize, zSize,            // how many cells per block (x,y,z)
+           real_c(1.0),                          // dx: length of one cell in physical coordinates
+           false,                              // one block per process - "false" means all blocks to one process
+           true, true, true );                 // full periodicity
+
+
+   const real_t initField1 = real_c(1.0);
+   const real_t initField2 = real_c(0.0);
+   const real_t initField3 = real_c(0.0);
+   const real_t a = real_c(2.0);
+
+   const BlockDataID field1ID = field::addToStorage<ScalarField>(blocks, "Field1", initField1);
+   const BlockDataID field2ID = field::addToStorage<ScalarField>(blocks, "Field2", initField2);
+   const BlockDataID field3ID = field::addToStorage<ScalarField>(blocks, "Field3", initField3);
+
+   SweepCollection_T sweepCollection(blocks, field1ID, field2ID, field3ID, a);
+
+   // Create Timeloop
+   const uint_t numberOfTimesteps = uint_t(100);
+   SweepTimeloop timeloop ( blocks, numberOfTimesteps );
+
+   // Registering the sweep
+   timeloop.add() << Sweep( sweepCollection.fct1(SweepCollection_T::ALL), "fc1" );
+   timeloop.add() << Sweep( sweepCollection.fct2(SweepCollection_T::ALL), "fc2" );
+
+   timeloop.run();
+
+   auto firstBlock = blocks->begin();
+   auto field1 = firstBlock->getData<ScalarField>( field1ID );
+   auto field2 = firstBlock->getData<ScalarField>( field2ID );
+   auto field3 = firstBlock->getData<ScalarField>( field3ID );
+
+   WALBERLA_CHECK_FLOAT_EQUAL(field1->get(0,0,0), initField1)
+   WALBERLA_CHECK_FLOAT_EQUAL(field2->get(0,0,0), initField1 * real_c(2.0) * a)
+   WALBERLA_CHECK_FLOAT_EQUAL(field3->get(0,0,0), initField1 * real_c(2.0) * a * real_c(2.0) * a)
+}
+
+
+int main( int argc, char ** argv )
+{
+   mpi::Environment env( argc, argv );
+   debug::enterTestMode();
+
+   testSweepCollection();
+   return EXIT_SUCCESS;
+}
diff --git a/tests/field/codegen/SweepCollection.py b/tests/field/codegen/SweepCollection.py
new file mode 100644
index 0000000000000000000000000000000000000000..1229a2e2ec4594e7f10596b08dd7801f3a0d465a
--- /dev/null
+++ b/tests/field/codegen/SweepCollection.py
@@ -0,0 +1,19 @@
+import sympy as sp
+
+import pystencils as ps
+from pystencils import Assignment
+from pystencils_walberla import CodeGeneration, function_generator, generate_sweep_collection
+
+
+with CodeGeneration() as ctx:
+    field_type = "float64" if ctx.double_accuracy else "float32"
+
+    a = sp.Symbol('a')
+    f1, f2, f3 = ps.fields(f"f1, f2, f3: {field_type}[3D]", layout='fzyx')
+    up1 = Assignment(f2.center, 2 * a * f1.center)
+    up2 = Assignment(f3.center, 2 * a * f2.center)
+
+    fct1 = function_generator(ctx, 'fct1', up1)
+    fct2 = function_generator(ctx, 'fct2', up2)
+
+    generate_sweep_collection(ctx, "SweepCollection", [fct1, fct2])
diff --git a/tests/gpu/codegen/GeneratedFieldPackInfoTestGPU.cpp b/tests/gpu/codegen/GeneratedFieldPackInfoTestGPU.cpp
index 4360b66e97cc65176b79bc215f3b73f099f2160d..55bf49b1b1fb158164d5b9a764fd35ae02defaf5 100644
--- a/tests/gpu/codegen/GeneratedFieldPackInfoTestGPU.cpp
+++ b/tests/gpu/codegen/GeneratedFieldPackInfoTestGPU.cpp
@@ -28,6 +28,7 @@
 #include "core/debug/TestSubsystem.h"
 #include "core/Environment.h"
 
+#include "gpu/GPUWrapper.h"
 #include "gpu/FieldCopy.h"
 #include "gpu/communication/UniformGPUScheme.h"
 
@@ -60,7 +61,7 @@ gpu::GPUField<int> * createSmallGPUField( IBlock * const , StructuredBlockStorag
 
 
 void testScalarField( std::shared_ptr<blockforest::StructuredBlockForest> & sbf, BlockDataID gpuFieldId ) {
-   gpu::communication::UniformGPUScheme< Stencil_T > us{ sbf };
+   gpu::communication::UniformGPUScheme< Stencil_T > us{ sbf, false, false };
    us.addPackInfo(std::make_shared< pystencils::ScalarFieldCommunicationGPU >(gpuFieldId));
 
    for( auto & block : *sbf ) {
@@ -97,10 +98,10 @@ void testScalarField( std::shared_ptr<blockforest::StructuredBlockForest> & sbf,
 }
 
 void testScalarFieldPullReduction( std::shared_ptr<blockforest::StructuredBlockForest> & sbf, BlockDataID gpuFieldId ) {
-   gpu::communication::UniformGPUScheme< Stencil_T > us1{ sbf };
+   gpu::communication::UniformGPUScheme< Stencil_T > us1{ sbf, false, false };
    us1.addPackInfo(std::make_shared< pystencils::ScalarFieldPullReductionGPU >(gpuFieldId));
 
-   gpu::communication::UniformGPUScheme< Stencil_T > us2{ sbf };
+   gpu::communication::UniformGPUScheme< Stencil_T > us2{ sbf, false, false };
    us2.addPackInfo(std::make_shared< pystencils::ScalarFieldCommunicationGPU >(gpuFieldId));
 
    for( auto & block : *sbf ) {
diff --git a/tests/gpu/communication/GPUBlockSelectorCommunicationTest.cpp b/tests/gpu/communication/GPUBlockSelectorCommunicationTest.cpp
index d70ecf5a35e5b0773ae99f0f7a6a520f8c01b9bd..3e79d6263b5feb28a11cc15d6206ba109cd3df5e 100644
--- a/tests/gpu/communication/GPUBlockSelectorCommunicationTest.cpp
+++ b/tests/gpu/communication/GPUBlockSelectorCommunicationTest.cpp
@@ -14,7 +14,7 @@
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
 //! \file GPUBlockSelectorCommunicationTest.cpp
-//! \ingroup cuda
+//! \ingroup gpu
 //! \author Helen Schottenhamml <helen.schottenhamml@fau.de>
 //! \brief Short communication test for the usage of block selectors in UniformGPUScheme.
 //
@@ -23,24 +23,26 @@
 #include <blockforest/GlobalLoadBalancing.h>
 #include <blockforest/Initialization.h>
 #include <blockforest/SetupBlockForest.h>
+
 #include <core/DataTypes.h>
 #include <core/Environment.h>
 #include <core/debug/TestSubsystem.h>
 #include <core/math/Random.h>
+
 #include <domain_decomposition/BlockDataID.h>
+
 #include <field/AddToStorage.h>
 #include <field/GhostLayerField.h>
+
+#include "gpu/GPUWrapper.h"
 #include <gpu/AddGPUFieldToStorage.h>
 #include <gpu/FieldCopy.h>
 #include <gpu/GPUField.h>
 #include <gpu/communication/MemcpyPackInfo.h>
 #include <gpu/communication/UniformGPUScheme.h>
+
 #include <stencil/D3Q27.h>
 #include <stencil/Directions.h>
-#include <stencil/Iterator.h>
-#include <vector>
-
-#include "gpu/GPUWrapper.h"
 
 namespace walberla
 {
@@ -53,15 +55,13 @@ using GPUScalarField_T = gpu::GPUField< Type_T >;
 const Set< SUID > requiredBlockSelector("communication");
 const Set< SUID > incompatibleBlockSelector("no communication");
 
-void suidAssignmentFunction( blockforest::SetupBlockForest & forest ) {
-
-   for( auto & sblock : forest ) {
-      if( forest.atDomainXMinBorder( sblock ) ) {
-         sblock.addState(incompatibleBlockSelector);
-      } else {
-         sblock.addState(requiredBlockSelector);
-      }
-      sblock.setWorkload(walberla::numeric_cast<walberla::workload_t>(1));
+void suidAssignmentFunction(blockforest::SetupBlockForest& forest)
+{
+   for (auto& sblock : forest)
+   {
+      if (forest.atDomainXMinBorder(sblock)) { sblock.addState(incompatibleBlockSelector); }
+      else { sblock.addState(requiredBlockSelector); }
+      sblock.setWorkload(walberla::numeric_cast< walberla::workload_t >(1));
    }
 }
 
@@ -70,13 +70,9 @@ void initScalarField(std::shared_ptr< StructuredBlockForest >& blocks, const Blo
    for (auto& block : *blocks)
    {
       Type_T val;
-      if (blocks->atDomainXMinBorder(block)) {
-         val = Type_T(-1);
-      } else if (blocks->atDomainXMaxBorder(block)) {
-         val = Type_T(1);
-      } else {
-         val = Type_T(0);
-      }
+      if (blocks->atDomainXMinBorder(block)) { val = Type_T(-1); }
+      else if (blocks->atDomainXMaxBorder(block)) { val = Type_T(1); }
+      else { val = Type_T(0); }
 
       auto* field = block.getData< ScalarField_T >(fieldID);
       WALBERLA_ASSERT_NOT_NULLPTR(field)
@@ -90,12 +86,11 @@ void initScalarField(std::shared_ptr< StructuredBlockForest >& blocks, const Blo
    }
 }
 
-std::shared_ptr< StructuredBlockForest > createSelectorBlockGrid (
-   const uint_t numberOfXBlocks,             const uint_t numberOfYBlocks,        const uint_t numberOfZBlocks,
-   const uint_t numberOfXCellsPerBlock,      const uint_t numberOfYCellsPerBlock, const uint_t numberOfZCellsPerBlock,
-   const real_t dx,
-   const bool xPeriodic, const bool yPeriodic, const bool zPeriodic,
-   const bool keepGlobalBlockInformation )
+std::shared_ptr< StructuredBlockForest >
+   createSelectorBlockGrid(const uint_t numberOfXBlocks, const uint_t numberOfYBlocks, const uint_t numberOfZBlocks,
+                           const uint_t numberOfXCellsPerBlock, const uint_t numberOfYCellsPerBlock,
+                           const uint_t numberOfZCellsPerBlock, const real_t dx, const bool xPeriodic,
+                           const bool yPeriodic, const bool zPeriodic, const bool keepGlobalBlockInformation)
 {
    // initialize SetupBlockForest = determine domain decomposition
 
@@ -103,10 +98,12 @@ std::shared_ptr< StructuredBlockForest > createSelectorBlockGrid (
 
    sforest.addWorkloadMemorySUIDAssignmentFunction(suidAssignmentFunction);
 
-   AABB const domainAABB{ real_c(0), real_c(0), real_c(0),
-                    dx * real_c( numberOfXBlocks * numberOfXCellsPerBlock ),
-                    dx * real_c( numberOfYBlocks * numberOfYCellsPerBlock ),
-                    dx * real_c( numberOfZBlocks * numberOfZCellsPerBlock ) };
+   AABB const domainAABB{ real_c(0),
+                          real_c(0),
+                          real_c(0),
+                          dx * real_c(numberOfXBlocks * numberOfXCellsPerBlock),
+                          dx * real_c(numberOfYBlocks * numberOfYCellsPerBlock),
+                          dx * real_c(numberOfZBlocks * numberOfZCellsPerBlock) };
    sforest.init(domainAABB, numberOfXBlocks, numberOfYBlocks, numberOfZBlocks, xPeriodic, yPeriodic, zPeriodic);
 
    // calculate process distribution
@@ -115,8 +112,8 @@ std::shared_ptr< StructuredBlockForest > createSelectorBlockGrid (
 
    blockforest::GlobalLoadBalancing::MetisConfiguration< SetupBlock > const metisConfig(
       true, false,
-      std::bind(blockforest::cellWeightedCommunicationCost, std::placeholders::_1, std::placeholders::_2, numberOfXCellsPerBlock,
-                numberOfYCellsPerBlock, numberOfZCellsPerBlock));
+      std::bind(blockforest::cellWeightedCommunicationCost, std::placeholders::_1, std::placeholders::_2,
+                numberOfXCellsPerBlock, numberOfYCellsPerBlock, numberOfZCellsPerBlock));
 
    sforest.calculateProcessDistribution_Default(uint_c(MPIManager::instance()->numProcesses()), memoryLimit, "hilbert",
                                                 10, false, metisConfig);
@@ -140,15 +137,16 @@ int main(int argc, char** argv)
    debug::enterTestMode();
    walberla::Environment const walberlaEnv(argc, argv);
 
-   const Vector3<uint_t> nBlocks { 3, 1, 1 };
-   const Vector3<uint_t> cells { 2, 2, 1 };
-   Vector3<real_t> domainSize;
-   for( uint_t d = 0; d < 3; ++d ) {
+   const Vector3< uint_t > nBlocks{ 3, 1, 1 };
+   const Vector3< uint_t > cells{ 2, 2, 1 };
+   Vector3< real_t > domainSize;
+   for (uint_t d = 0; d < 3; ++d)
+   {
       domainSize[d] = real_c(cells[d] * nBlocks[d]);
    }
 
-   auto blocks = createSelectorBlockGrid(nBlocks[0], nBlocks[1], nBlocks[2],
-                                         cells[0], cells[1], cells[2], 1, false, true, true, true);
+   auto blocks = createSelectorBlockGrid(nBlocks[0], nBlocks[1], nBlocks[2], cells[0], cells[1], cells[2], 1, false,
+                                         true, true, true);
 
    BlockDataID const fieldID = field::addToStorage< ScalarField_T >(blocks, "scalar", Type_T(0), field::fzyx, uint_t(1));
    initScalarField(blocks, fieldID);
@@ -161,17 +159,20 @@ int main(int argc, char** argv)
 
    // Perform one communication step
    communication();
+   WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+
 
    // Copy to CPU
    gpu::fieldCpy< ScalarField_T, GPUScalarField_T >( blocks, fieldID, gpuFieldID );
 
    // Check for correct data in ghost layers of middle block
-   auto middleBlock = blocks->getBlock( domainSize[0] / real_c(2), domainSize[1] / real_c(2), domainSize[2] / real_c(2) );
-   auto cpuField = middleBlock->getData<ScalarField_T>(fieldID);
+   auto middleBlock = blocks->getBlock(domainSize[0] / real_c(2), domainSize[1] / real_c(2), domainSize[2] / real_c(2));
+   auto cpuField    = middleBlock->getData< ScalarField_T >(fieldID);
    WALBERLA_ASSERT_NOT_NULLPTR(cpuField)
-   
+
    // avoid unused variable warning in release mode
    (void) cpuField;
+   // WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(cpuField, WALBERLA_LOG_DEVEL_VAR(cpuField->get(x, y, z)))
 
    // check for missing communication with left neighbour (first block, incompatible selector)
    WALBERLA_ASSERT_EQUAL(cpuField->get(-1, 0, 0), 0, "Communication with left neighbor detected.")
diff --git a/tests/gpu/communication/GPUPackInfoCommunicationTest.cpp b/tests/gpu/communication/GPUPackInfoCommunicationTest.cpp
index 66a4d3c74da29b7779783132a3d8f3cce5a08287..f0e41c1081e306cad53a5d0e3f04187acbb18b95 100644
--- a/tests/gpu/communication/GPUPackInfoCommunicationTest.cpp
+++ b/tests/gpu/communication/GPUPackInfoCommunicationTest.cpp
@@ -14,7 +14,7 @@
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
 //! \file GPUFieldPackInfoTest.cpp
-//! \ingroup cuda
+//! \ingroup gpu
 //! \author João Victor Tozatti Risso <jvtrisso@inf.ufpr.br>
 //! \brief Short communication test to verify the equivalence of GPUPackInfo using a default stream and multiple
 //! streams.
@@ -37,7 +37,6 @@
 #include "stencil/Directions.h"
 #include "stencil/Iterator.h"
 
-#include <cuda_runtime.h>
 #include <vector>
 
 #include "gpu/ErrorChecking.h"
@@ -134,7 +133,7 @@ int main(int argc, char** argv)
       CommSchemeType syncCommScheme(blocks);
       syncCommScheme.addPackInfo(make_shared< GPUPackInfoType >(syncGPUFieldId));
 
-      // Setup communication scheme for asynchronous GPUPackInfo, which uses CUDA streams
+      // Setup communication scheme for asynchronous GPUPackInfo, which uses GPU streams
       CommSchemeType asyncCommScheme(blocks);
       asyncCommScheme.addPackInfo(make_shared< GPUPackInfoType >(asyncGPUFieldId));
 
diff --git a/tests/gpu/communication/GPUPackInfoTest.cpp b/tests/gpu/communication/GPUPackInfoTest.cpp
index fec15a605a230c59f96abc3e31e8160992000338..e0a9d87fd06f7d261b09942c7d69bed189e60177 100644
--- a/tests/gpu/communication/GPUPackInfoTest.cpp
+++ b/tests/gpu/communication/GPUPackInfoTest.cpp
@@ -14,7 +14,7 @@
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
 //! \file GPUFieldPackInfoTest.cpp
-//! \ingroup cuda
+//! \ingroup gpu
 //! \author Paulo Carvalho <prcjunior@inf.ufpr.br>
 //! \brief Tests if a GPUField is correctly packed into buffers
 //
diff --git a/tests/lbm/diff_packinfos.sh b/tests/lbm/diff_packinfos.sh
index bfa89c5ef63477c61fefac60b7767fe22aaf4233..074d31492dbc1dd2cc0f47bc059ab5d181117f22 100755
--- a/tests/lbm/diff_packinfos.sh
+++ b/tests/lbm/diff_packinfos.sh
@@ -2,5 +2,5 @@
 
 REGEX='^((#include)|(void)|(uint_t))'
 cd default_codegen
-diff -u -B <(grep -vP "$REGEX" FromKernelPackInfoPull.cpp)  <(grep -vP "$REGEX" AccessorBasedPackInfoEven.cpp) || exit 1
-diff -u -B <(grep -vP "$REGEX" FromKernelPackInfoPush.cpp)  <(grep -vP "$REGEX" AccessorBasedPackInfoOdd.cpp) || exit 1
+diff -u -B <(tail -n +20 FromKernelPackInfoPull.cpp | grep -vP "$REGEX")  <(tail -n +20 AccessorBasedPackInfoEven.cpp | grep -vP "$REGEX") || exit 1
+diff -u -B <(tail -n +20 FromKernelPackInfoPush.cpp | grep -vP "$REGEX")  <(tail -n +20 AccessorBasedPackInfoOdd.cpp | grep -vP "$REGEX") || exit 1
diff --git a/tests/lbm_generated/CMakeLists.txt b/tests/lbm_generated/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7a7ef76bd6b9a605d19cd7e2bbdf4bedddbed7b
--- /dev/null
+++ b/tests/lbm_generated/CMakeLists.txt
@@ -0,0 +1,21 @@
+#############################################################################################################################
+#
+# Tests for generated lbm module
+#
+#############################################################################################################################
+waLBerla_link_files_to_builddir( "*.prm" )
+waLBerla_link_files_to_builddir( "*.py" )
+
+waLBerla_generate_target_from_python(NAME ExampleGenerated
+        FILE Example.py
+        OUT_FILES LBMStorageSpecification.h LBMStorageSpecification.cpp
+        LBMSweepCollection.h LBMSweepCollection.cpp
+        NoSlip.h NoSlip.cpp
+        UBB.h UBB.cpp
+        LBMBoundaryCollection.h
+        Example_InfoHeader.h)
+waLBerla_compile_test( FILES Example.cpp DEPENDS ExampleGenerated blockforest field lbm_generated timeloop )
+
+if( WALBERLA_DOUBLE_ACCURACY )
+waLBerla_compile_test( FILES LDC.cpp DEPENDS blockforest field lbm_generated timeloop )
+endif()
diff --git a/tests/lbm_generated/Example.cpp b/tests/lbm_generated/Example.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4dfd69b553d88d268efb0c49c857eb391f6277ea
--- /dev/null
+++ b/tests/lbm_generated/Example.cpp
@@ -0,0 +1,233 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file Example.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/all.h"
+
+#include "core/all.h"
+
+#include "domain_decomposition/all.h"
+
+#include "field/all.h"
+
+#include "geometry/all.h"
+
+#include "timeloop/all.h"
+
+#include "lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h"
+#include "lbm_generated/communication/UniformGeneratedPdfPackInfo.h"
+#include "lbm_generated/field/AddToStorage.h"
+#include "lbm_generated/field/PdfField.h"
+#include "lbm_generated/refinement/BasicRecursiveTimeStep.h"
+
+// include the generated header file. It includes all generated classes
+#include "Example_InfoHeader.h"
+
+using namespace walberla;
+using namespace std::placeholders;
+
+using StorageSpecification_T = lbm::LBMStorageSpecification;
+using Stencil_T              = StorageSpecification_T::Stencil;
+using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil;
+using PdfField_T             = lbm_generated::PdfField< StorageSpecification_T >;
+using PackInfo_T             = lbm_generated::UniformGeneratedPdfPackInfo< PdfField_T >;
+
+using SweepCollection_T = lbm::LBMSweepCollection;
+
+using VectorField_T = GhostLayerField< real_t, StorageSpecification_T::Stencil::D >;
+using ScalarField_T = GhostLayerField< real_t, 1 >;
+
+using flag_t               = walberla::uint8_t;
+using FlagField_T          = FlagField< flag_t >;
+using BoundaryCollection_T = lbm::LBMBoundaryCollection< FlagField_T >;
+
+using RefinementSelectionFunctor = SetupBlockForest::RefinementSelectionFunction;
+
+class LDCRefinement
+{
+ public:
+   LDCRefinement(const uint_t depth) : refinementDepth_(depth){};
+
+   void operator()(SetupBlockForest& forest)
+   {
+      std::vector< SetupBlock* > blocks;
+      forest.getBlocks(blocks);
+
+      for (auto b : blocks)
+      {
+         if (forest.atDomainZMaxBorder(*b))
+         {
+            if (b->getLevel() < refinementDepth_) { b->setMarker(true); }
+         }
+      }
+   }
+
+ private:
+   const uint_t refinementDepth_;
+};
+
+class LDC
+{
+ public:
+   LDC(const uint_t depth) : refinementDepth_(depth), noSlipFlagUID_("NoSlip"), ubbFlagUID_("UBB"){};
+
+   Vector3< real_t > acceleration() const { return Vector3< real_t >(0.0); }
+
+   RefinementSelectionFunctor refinementSelector() { return LDCRefinement(refinementDepth_); }
+
+   void setupBoundaryFlagField(StructuredBlockForest& sbfs, const BlockDataID flagFieldID)
+   {
+      for (auto bIt = sbfs.begin(); bIt != sbfs.end(); ++bIt)
+      {
+         Block& b           = dynamic_cast< Block& >(*bIt);
+         uint_t level       = b.getLevel();
+         auto flagField     = b.getData< FlagField_T >(flagFieldID);
+         uint8_t noslipFlag = flagField->registerFlag(noSlipFlagUID_);
+         uint8_t ubbFlag    = flagField->registerFlag(ubbFlagUID_);
+         for (auto cIt = flagField->beginWithGhostLayerXYZ(2); cIt != flagField->end(); ++cIt)
+         {
+            Cell localCell = cIt.cell();
+            Cell globalCell(localCell);
+            sbfs.transformBlockLocalToGlobalCell(globalCell, b);
+            if (globalCell.z() >= cell_idx_c(sbfs.getNumberOfZCells(level))) { flagField->addFlag(localCell, ubbFlag); }
+            else if (globalCell.z() < 0 || globalCell.x() < 0 ||
+                     globalCell.x() >= cell_idx_c(sbfs.getNumberOfXCells(level)))
+            {
+               flagField->addFlag(localCell, noslipFlag);
+            }
+         }
+      }
+   }
+
+ private:
+   const std::string refinementProfile_;
+   const uint_t refinementDepth_;
+
+   const FlagUID noSlipFlagUID_;
+   const FlagUID ubbFlagUID_;
+};
+
+static void createSetupBlockForest(SetupBlockForest& setupBfs, const Config::BlockHandle& domainSetup, LDC& setup)
+{
+   Vector3< real_t > domainSize = domainSetup.getParameter< Vector3< real_t > >("domainSize");
+   Vector3< uint_t > rootBlocks = domainSetup.getParameter< Vector3< uint_t > >("rootBlocks");
+   Vector3< bool > periodic     = domainSetup.getParameter< Vector3< bool > >("periodic");
+
+   auto refSelection = setup.refinementSelector();
+   setupBfs.addRefinementSelectionFunction(std::function< void(SetupBlockForest&) >(refSelection));
+   AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
+   setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
+   setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalance(true), uint_c(MPIManager::instance()->numProcesses()));
+}
+
+int main(int argc, char** argv)
+{
+   walberla::Environment walberlaEnv(argc, argv);
+   mpi::MPIManager::instance()->useWorldComm();
+
+   // read parameters
+   auto domainSetup = walberlaEnv.config()->getOneBlock("DomainSetup");
+   auto parameters  = walberlaEnv.config()->getOneBlock("Parameters");
+
+   auto omega           = parameters.getParameter< real_t >("omega", real_c(1.4));
+   auto timesteps       = parameters.getParameter< uint_t >("timesteps", uint_c(10)) + uint_c(1);
+   auto refinementDepth = parameters.getParameter< uint_t >("refinementDepth", uint_c(1));
+
+   auto remainingTimeLoggerFrequency =
+      parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(3.0)); // in seconds
+
+   auto flowSetup = std::make_shared< LDC >(refinementDepth);
+
+   SetupBlockForest setupBfs;
+   WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
+   createSetupBlockForest(setupBfs, domainSetup, *flowSetup);
+   // domainSetup
+
+   // Create structured block forest
+   Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
+
+   WALBERLA_LOG_INFO_ON_ROOT("Creating structured block forest...")
+   auto bfs    = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
+   auto blocks = std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
+   blocks->createCellBoundingBoxes();
+
+   WALBERLA_ROOT_SECTION() { vtk::writeDomainDecomposition(blocks, "domainDecomposition", "vtk_out"); }
+
+   WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
+   for (uint_t level = 0; level <= refinementDepth; level++)
+   {
+      WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << setupBfs.getNumberOfBlocks(level))
+   }
+
+   StorageSpecification_T StorageSpec = StorageSpecification_T();
+   BlockDataID pdfFieldId = lbm_generated::addPdfFieldToStorage(blocks, "pdf field", StorageSpec, uint_c(2));
+   BlockDataID velFieldId = field::addToStorage< VectorField_T >(blocks, "Velocity", real_c(0.0), field::fzyx);
+
+   BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field", uint_c(3));
+
+   SweepCollection_T sweepCollection(blocks, pdfFieldId, velFieldId, omega);
+   for (auto& block : *blocks)
+   {
+      sweepCollection.initialise(&block);
+   }
+
+   const FlagUID fluidFlagUID("Fluid");
+   flowSetup->setupBoundaryFlagField(*blocks, flagFieldId);
+   geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldId, fluidFlagUID, 2);
+   BoundaryCollection_T boundaryCollection(blocks, flagFieldId, pdfFieldId, fluidFlagUID);
+
+   WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...")
+   auto comm =
+      std::make_shared< blockforest::communication::NonUniformBufferedScheme< CommunicationStencil_T > >(blocks);
+   auto packInfo = lbm_generated::setupNonuniformPdfCommunication< PdfField_T >(blocks, pdfFieldId);
+   comm->addPackInfo(packInfo);
+
+   lbm_generated::BasicRecursiveTimeStep< PdfField_T, SweepCollection_T, BoundaryCollection_T > timestep(
+      blocks, pdfFieldId, sweepCollection, boundaryCollection, comm, packInfo);
+
+   SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps);
+   uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
+   if (vtkWriteFrequency > 0)
+   {
+      auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "ExampleVTK", vtkWriteFrequency, 0, false, "vtk_out",
+                                                      "simulation_step", false, true, true, false, 0);
+
+      auto velWriter = make_shared< field::VTKWriter< VectorField_T > >(velFieldId, "velocity");
+      vtkOutput->addBeforeFunction([&]() {
+         for (auto& block : *blocks)
+         {
+            sweepCollection.calculateMacroscopicParameters(&block);
+         }
+      });
+
+      vtkOutput->addCellDataWriter(velWriter);
+      timeloop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
+   }
+   timeloop.addFuncAfterTimeStep(timestep);
+
+   // log remaining time
+   timeloop.addFuncAfterTimeStep(timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency),
+                                 "remaining time logger");
+
+   WALBERLA_LOG_INFO_ON_ROOT("Starting Simulation with " << timesteps << " timesteps")
+
+   timeloop.run();
+
+   return EXIT_SUCCESS;
+}
diff --git a/tests/lbm_generated/Example.prm b/tests/lbm_generated/Example.prm
new file mode 100644
index 0000000000000000000000000000000000000000..1957b362e4b77a94fb3d3c68b6e9d33b0efb3e6f
--- /dev/null
+++ b/tests/lbm_generated/Example.prm
@@ -0,0 +1,30 @@
+
+Parameters 
+{
+	omega           1.95;
+	timesteps       3000;
+	refinementDepth 1;
+
+	remainingTimeLoggerFrequency 3; // in seconds
+	vtkWriteFrequency 500;
+}
+
+DomainSetup
+{
+   domainSize    <64, 64, 64>;
+   rootBlocks    <4, 4, 4>;
+
+   cellsPerBlock <  16, 16, 16 >;
+   periodic      <  0,    1, 0 >;
+}
+
+Boundaries 
+{
+
+	Border { direction W;    walldistance -1;  flag NoSlip; }
+	Border { direction E;    walldistance -1;  flag NoSlip; }
+    Border { direction S;    walldistance -1;  flag NoSlip; }
+    Border { direction N;    walldistance -1;  flag UBB; }
+    Border { direction T;    walldistance -1;  flag NoSlip; }
+    Border { direction B;    walldistance -1;  flag NoSlip; }
+}
diff --git a/tests/lbm_generated/Example.py b/tests/lbm_generated/Example.py
new file mode 100644
index 0000000000000000000000000000000000000000..5233639be24c6574cee6440300bfe73e22e5e2ae
--- /dev/null
+++ b/tests/lbm_generated/Example.py
@@ -0,0 +1,48 @@
+import sympy as sp
+
+from pystencils import Target
+from pystencils import fields
+
+from lbmpy.advanced_streaming.utility import get_timesteps
+from lbmpy.boundaries import NoSlip, UBB
+from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule
+from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil
+from pystencils_walberla import CodeGeneration, generate_info_header
+from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
+
+import warnings
+
+warnings.filterwarnings("ignore")
+with CodeGeneration() as ctx:
+    target = Target.CPU  # Target.GPU if ctx.cuda else Target.CPU
+    data_type = "float64" if ctx.double_accuracy else "float32"
+
+    streaming_pattern = 'esotwist'
+    timesteps = get_timesteps(streaming_pattern)
+
+    omega = sp.symbols("omega")
+
+    stencil = LBStencil(Stencil.D3Q19)
+    pdfs, vel_field = fields(f"pdfs({stencil.Q}), velocity({stencil.D}): {data_type}[{stencil.D}D]", layout='fzyx')
+
+    macroscopic_fields = {'velocity': vel_field}
+
+    lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=omega,
+                           streaming_pattern=streaming_pattern)
+    lbm_opt = LBMOptimisation(cse_global=False, field_layout='fzyx')
+
+    method = create_lb_method(lbm_config=lbm_config)
+    collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
+
+    no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip',
+                                     boundary_object=NoSlip())
+    ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB',
+                                 boundary_object=UBB([0.05, 0, 0], data_type=data_type))
+
+    generate_lbm_package(ctx, name="LBM",
+                         collision_rule=collision_rule,
+                         lbm_config=lbm_config, lbm_optimisation=lbm_opt,
+                         nonuniform=True, boundaries=[no_slip, ubb],
+                         macroscopic_fields=macroscopic_fields)
+
+    generate_info_header(ctx, 'Example_InfoHeader')
diff --git a/tests/lbm_generated/LDC.cpp b/tests/lbm_generated/LDC.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6df6d45a3e1cf2915e3b077a83ee77c40668fff7
--- /dev/null
+++ b/tests/lbm_generated/LDC.cpp
@@ -0,0 +1,136 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file LDC.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/all.h"
+#include "blockforest/communication/UniformBufferedScheme.h"
+
+#include "core/all.h"
+
+#include "domain_decomposition/all.h"
+
+#include "field/all.h"
+#include "geometry/all.h"
+#include "timeloop/all.h"
+
+#include "lbm_generated/communication/UniformGeneratedPdfPackInfo.h"
+#include "lbm_generated/field/AddToStorage.h"
+#include "lbm_generated/field/PdfField.h"
+
+#include "lbm_generated/storage_specification/D3Q19StorageSpecification.h"
+#include "lbm_generated/sweep_collection/D3Q19SRT.h"
+#include "lbm_generated/boundary/D3Q19BoundaryCollection.h"
+
+
+using namespace walberla;
+using namespace std::placeholders;
+
+using StorageSpecification_T = lbm::D3Q19StorageSpecification;
+using Stencil_T              = StorageSpecification_T::Stencil;
+using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil;
+using PdfField_T             = lbm_generated::PdfField< StorageSpecification_T >;
+
+using SweepCollection_T = lbm::D3Q19SRT;
+
+using VectorField_T = GhostLayerField< real_t, StorageSpecification_T::Stencil::D >;
+using ScalarField_T = GhostLayerField< real_t, 1 >;
+
+using flag_t      = walberla::uint8_t;
+using FlagField_T = FlagField< flag_t >;
+using BoundaryCollection_T = lbm::D3Q19BoundaryCollection< FlagField_T >;
+
+using blockforest::communication::UniformBufferedScheme;
+
+int main(int argc, char** argv)
+{
+   walberla::Environment walberlaEnv(argc, argv);
+   mpi::MPIManager::instance()->useWorldComm();
+
+   // read parameters
+   auto parameters = walberlaEnv.config()->getOneBlock("Parameters");
+
+   const real_t omega     = parameters.getParameter< real_t >("omega", real_c(1.4));
+   const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(10)) + uint_c(1);
+
+   const double remainingTimeLoggerFrequency =
+      parameters.getParameter< double >("remainingTimeLoggerFrequency", real_c(3.0)); // in seconds
+
+   auto blocks = blockforest::createUniformBlockGridFromConfig(walberlaEnv.config());
+
+   StorageSpecification_T const StorageSpec = StorageSpecification_T();
+   BlockDataID const pdfFieldId  = lbm_generated::addPdfFieldToStorage(blocks, "pdf field", StorageSpec, uint_c(1), field::fzyx);
+   BlockDataID const velFieldId  = field::addToStorage< VectorField_T >(blocks, "Velocity", real_c(0.0), field::fzyx);
+   BlockDataID const densityFieldId  = field::addToStorage< ScalarField_T >(blocks, "density", real_c(0.0), field::fzyx);
+   BlockDataID const flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field", uint_c(1));
+
+   const FlagUID fluidFlagUID("Fluid");
+
+   auto boundariesConfig   = walberlaEnv.config()->getBlock("Boundaries");
+   geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldId, boundariesConfig);
+   geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldId, fluidFlagUID);
+
+   BoundaryCollection_T boundaryCollection(blocks, flagFieldId, pdfFieldId, fluidFlagUID, real_c(1.0), real_c(0.05), real_c(0.0), real_c(0.0));
+   SweepCollection_T sweepCollection(blocks, pdfFieldId, densityFieldId, velFieldId, omega);
+
+   for (auto& block : *blocks)
+   {
+      sweepCollection.initialise(&block);
+   }
+
+   auto packInfo = std::make_shared<lbm_generated::UniformGeneratedPdfPackInfo< PdfField_T >>(pdfFieldId);
+   UniformBufferedScheme< Stencil_T > communication(blocks);
+   communication.addPackInfo(packInfo);
+
+   SweepTimeloop timeLoop(blocks->getBlockStorage(), timesteps);
+
+   timeLoop.add() << BeforeFunction(communication, "communication")
+                  << Sweep(boundaryCollection.getSweep(BoundaryCollection_T::ALL), "Boundary Conditions");
+   timeLoop.add() << Sweep(sweepCollection.streamCollide(SweepCollection_T::ALL), "LBM StreamCollide");
+   //
+   auto vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
+   if (vtkWriteFrequency > 0)
+   {
+      auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "ExampleVTK", vtkWriteFrequency, 0, false, "vtk_out",
+                                                      "simulation_step", false, true, true, false, 0);
+
+      auto velWriter = make_shared< field::VTKWriter< VectorField_T > >(velFieldId, "velocity");
+      auto densWriter = make_shared< field::VTKWriter< ScalarField_T > >(densityFieldId, "density");
+      vtkOutput->addBeforeFunction([&](){
+      for (auto& block : *blocks)
+      {
+         sweepCollection.calculateMacroscopicParameters(&block);
+      }
+      });
+
+      vtkOutput->addCellDataWriter(velWriter);
+      vtkOutput->addCellDataWriter(densWriter);
+
+      timeLoop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
+   }
+
+   // log remaining time
+   timeLoop.addFuncAfterTimeStep(timing::RemainingTimeLogger(timeLoop.getNrOfTimeSteps(), remainingTimeLoggerFrequency),
+                                 "remaining time logger");
+
+   WALBERLA_LOG_INFO_ON_ROOT("Starting Simulation with " << timesteps << " timesteps")
+
+   timeLoop.run();
+
+   return EXIT_SUCCESS;
+}
diff --git a/tests/lbm_generated/LDC.prm b/tests/lbm_generated/LDC.prm
new file mode 100644
index 0000000000000000000000000000000000000000..4ba435d1b027eee3f9a066e9a9e39aa5e1ec831f
--- /dev/null
+++ b/tests/lbm_generated/LDC.prm
@@ -0,0 +1,28 @@
+
+Parameters 
+{
+	omega           1.95;
+	timesteps       3000;
+
+	remainingTimeLoggerFrequency 3; // in seconds
+	vtkWriteFrequency 500;
+}
+
+DomainSetup
+{
+   Blocks    <4, 4, 4>;
+   cellsPerBlock <  32, 32, 32 >;
+
+   periodic      <  0,    1, 0 >;
+}
+
+Boundaries 
+{
+
+	Border { direction W;    walldistance -1;  flag NoSlip; }
+	Border { direction E;    walldistance -1;  flag NoSlip; }
+    Border { direction S;    walldistance -1;  flag NoSlip; }
+    Border { direction N;    walldistance -1;  flag UBB; }
+    Border { direction T;    walldistance -1;  flag NoSlip; }
+    Border { direction B;    walldistance -1;  flag NoSlip; }
+}