diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 411a9bfe9db080b1196e2bf1583a7c51d478f941..00b4e8d8535b78fcb59e6f54a08e34c4516c4828 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -415,7 +415,7 @@ gcc_8_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -439,7 +439,7 @@ gcc_8_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -461,7 +461,7 @@ gcc_8_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -482,7 +482,7 @@ gcc_8_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -507,7 +507,7 @@ gcc_8_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -530,7 +530,7 @@ gcc_8_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -552,7 +552,7 @@ gcc_8_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -577,7 +577,7 @@ gcc_9_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -601,7 +601,7 @@ gcc_9_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -623,7 +623,7 @@ gcc_9_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -644,7 +644,7 @@ gcc_9_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -669,7 +669,7 @@ gcc_9_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -692,7 +692,7 @@ gcc_9_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -714,7 +714,7 @@ gcc_9_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -739,7 +739,7 @@ gcc_10_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -763,7 +763,7 @@ gcc_10_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -785,7 +785,7 @@ gcc_10_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -806,7 +806,7 @@ gcc_10_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -831,7 +831,7 @@ gcc_10_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -854,7 +854,7 @@ gcc_10_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -876,7 +876,7 @@ gcc_10_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:10
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -901,7 +901,7 @@ gcc_11_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -925,7 +925,7 @@ gcc_11_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -947,7 +947,7 @@ gcc_11_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -968,7 +968,7 @@ gcc_11_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -993,7 +993,7 @@ gcc_11_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1016,7 +1016,7 @@ gcc_11_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1038,7 +1038,7 @@ gcc_11_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1343,7 +1343,7 @@ clang_11.0_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1367,7 +1367,7 @@ clang_11.0_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1389,7 +1389,7 @@ clang_11.0_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1410,7 +1410,7 @@ clang_11.0_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1435,7 +1435,7 @@ clang_11.0_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1458,7 +1458,7 @@ clang_11.0_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1480,7 +1480,7 @@ clang_11.0_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:11.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1505,7 +1505,7 @@ clang_12.0_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1529,7 +1529,7 @@ clang_12.0_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1551,7 +1551,7 @@ clang_12.0_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1572,7 +1572,7 @@ clang_12.0_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1597,7 +1597,7 @@ clang_12.0_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1620,7 +1620,7 @@ clang_12.0_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1642,7 +1642,7 @@ clang_12.0_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:12.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1667,7 +1667,7 @@ clang_13.0_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1691,7 +1691,7 @@ clang_13.0_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1713,7 +1713,7 @@ clang_13.0_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1731,7 +1731,7 @@ clang_13.0_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1753,7 +1753,7 @@ clang_13.0_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1773,7 +1773,7 @@ clang_13.0_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1793,7 +1793,7 @@ clang_13.0_hybrid_dbg_sp:
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
    stage: pretest
    before_script:
-      - pip3 install lbmpy==1.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.2 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -2072,6 +2072,9 @@ coverage:
 
 mac_Serial_Dbg:
    extends: .mac_build_template
+   before_script:
+      - pip3 install pystencils==1.3.2
+      - pip3 install lbmpy==1.3.2
    variables:
       CMAKE_BUILD_TYPE: "DebugOptimized"
       CTEST_EXCLUDE_LABELS: "longrun"
@@ -2082,6 +2085,9 @@ mac_Serial_Dbg:
 
 mac_Serial:
    extends: .mac_build_template
+   before_script:
+      - pip3 install pystencils==1.3.2
+      - pip3 install lbmpy==1.3.2
    variables:
       CMAKE_BUILD_TYPE: "Release"
       CTEST_EXCLUDE_LABELS: "longrun"
@@ -2092,6 +2098,9 @@ mac_Serial:
 
 mac_MpiOnly_Dbg:
    extends: .mac_build_template
+   before_script:
+      - pip3 install pystencils==1.3.2
+      - pip3 install lbmpy==1.3.2
    variables:
       CMAKE_BUILD_TYPE: "DebugOptimized"
       CTEST_EXCLUDE_LABELS: "longrun"
@@ -2103,6 +2112,9 @@ mac_MpiOnly_Dbg:
 
 mac_MpiOnly:
    extends: .mac_build_template
+   before_script:
+      - pip3 install pystencils==1.3.2
+      - pip3 install lbmpy==1.3.2
    variables:
       CMAKE_BUILD_TYPE: "Release"
       CTEST_EXCLUDE_LABELS: "longrun"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 77369d876b90e89761e33e8d12fca0547882fafc..3ff2ea2a89bd9c49c0b294585ed7cff6b9719447 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -607,7 +607,7 @@ endif ()
 ##
 #############################################################################################################################
 if ( WALBERLA_BUILD_WITH_CODEGEN )
-   set(LBMPY_MIN_VERSION 1.2)
+   set(LBMPY_MIN_VERSION 1.3.2)
    execute_process(COMMAND ${Python_EXECUTABLE} -c "import lbmpy; print(lbmpy.__version__)"
          RESULT_VARIABLE LBMPY_FOUND OUTPUT_VARIABLE LBMPY_VERSION)
     if(NOT LBMPY_FOUND EQUAL 0)
diff --git a/extern/pybind11 b/extern/pybind11
index 8b03ffa7c06cd9c8a38297b1c8923695d1ff1b07..f7b499615e14d70ab098a20deb0cdb3889998a1a 160000
--- a/extern/pybind11
+++ b/extern/pybind11
@@ -1 +1 @@
-Subproject commit 8b03ffa7c06cd9c8a38297b1c8923695d1ff1b07
+Subproject commit f7b499615e14d70ab098a20deb0cdb3889998a1a
diff --git a/python/lbmpy_walberla/additional_data_handler.py b/python/lbmpy_walberla/additional_data_handler.py
index 102177487690f533822ac389df1d483e3e59ae42..16e5cb35f8ed64ae77eeda06c08812bbb57e950a 100644
--- a/python/lbmpy_walberla/additional_data_handler.py
+++ b/python/lbmpy_walberla/additional_data_handler.py
@@ -1,5 +1,6 @@
 from pystencils import Target
 from pystencils.stencil import inverse_direction
+from pystencils.typing import BasicType
 
 from lbmpy.advanced_streaming import AccessPdfValues, numeric_offsets, numeric_index, Timestep, is_inplace
 from lbmpy.advanced_streaming.indexing import MirroredStencilDirections
@@ -28,22 +29,6 @@ class FreeSlipAdditionalDataHandler(AdditionalDataHandler):
         assert isinstance(boundary_object, FreeSlip)
         super(FreeSlipAdditionalDataHandler, self).__init__(stencil=stencil)
 
-    @property
-    def constructor_arguments(self):
-        return ""
-
-    @property
-    def initialiser_list(self):
-        return ""
-
-    @property
-    def additional_arguments_for_fill_function(self):
-        return ""
-
-    @property
-    def additional_parameters_for_fill_function(self):
-        return ""
-
     def data_initialisation(self, direction):
         def array_pattern(dtype, name, content):
             return f"const {str(dtype)} {name} [] = {{ {','.join(str(c) for c in content)} }};"
@@ -101,20 +86,20 @@ class FreeSlipAdditionalDataHandler(AdditionalDataHandler):
 
         return "\n".join(init_list)
 
-    @property
-    def additional_member_variable(self):
-        return ""
-
 
 class UBBAdditionalDataHandler(AdditionalDataHandler):
     def __init__(self, stencil, boundary_object):
         assert isinstance(boundary_object, UBB)
         super(UBBAdditionalDataHandler, self).__init__(stencil=stencil)
 
+    @property
+    def constructor_argument_name(self):
+        return "velocityCallback"
+
     @property
     def constructor_arguments(self):
-        return ", std::function<Vector3<real_t>(const Cell &, const shared_ptr<StructuredBlockForest>&, IBlock&)>& " \
-               "velocityCallback "
+        return f", std::function<Vector3<real_t>(const Cell &, const shared_ptr<StructuredBlockForest>&, IBlock&)>& " \
+               f"{self.constructor_argument_name} "
 
     @property
     def initialiser_list(self):
@@ -144,13 +129,20 @@ class UBBAdditionalDataHandler(AdditionalDataHandler):
 
 
 class OutflowAdditionalDataHandler(AdditionalDataHandler):
-    def __init__(self, stencil, boundary_object, target=Target.CPU, field_name='pdfs', zeroth_timestep=None):
+    def __init__(self, stencil, boundary_object, target=Target.CPU, field_name='pdfs', pdfs_data_type=None, zeroth_timestep=None):
         assert isinstance(boundary_object, ExtrapolationOutflow)
         self._stencil = boundary_object.stencil
         self._lb_method = boundary_object.lb_method
         self._normal_direction = boundary_object.normal_direction
         self._field_name = field_name
         self._target = target
+        self._dtype = BasicType(boundary_object.data_type).c_name
+        if pdfs_data_type is None:
+            self._pdfs_data_type = "real_t"
+        else:
+            pdfs_data_type = BasicType(pdfs_data_type)
+            self._pdfs_data_type = pdfs_data_type.c_name
+
         self._streaming_pattern = boundary_object.streaming_pattern
         if zeroth_timestep:
             self._zeroth_timestep = zeroth_timestep
@@ -161,6 +153,10 @@ class OutflowAdditionalDataHandler(AdditionalDataHandler):
         assert sum([a != 0 for a in self._normal_direction]) == 1, \
             "The outflow boundary is only implemented for straight walls at the moment."
 
+    @property
+    def constructor_argument_name(self):
+        return f"{self._field_name}CPUID_" if self._target == Target.GPU else ""
+
     @property
     def constructor_arguments(self):
         return f", BlockDataID {self._field_name}CPUID_" if self._target == Target.GPU else ""
@@ -172,7 +168,7 @@ class OutflowAdditionalDataHandler(AdditionalDataHandler):
     @property
     def additional_field_data(self):
         identifier = "CPU" if self._target == Target.GPU else ""
-        return f"auto {self._field_name} = block->getData< field::GhostLayerField<real_t, " \
+        return f"auto {self._field_name} = block->getData< field::GhostLayerField<{self._pdfs_data_type}, " \
                f"{len(self._stencil)}> >({self._field_name}{identifier}ID); "
 
     def data_initialisation(self, direction_index):
@@ -183,7 +179,7 @@ class OutflowAdditionalDataHandler(AdditionalDataHandler):
 
         init_list = []
         for key, value in self.get_init_dict(pdf_acc, direction_index).items():
-            init_list.append(f"element.{key} = {self._field_name}->get({value});")
+            init_list.append(f"element.{key} = {self._dtype}( {self._field_name}->get({value}) );")
 
         return "\n".join(init_list)
 
diff --git a/python/lbmpy_walberla/boundary_collection.py b/python/lbmpy_walberla/boundary_collection.py
index 17bfa245a3212404c35dd06c420fcb19a55c3049..082567204acf47b97a1344de8b1ae38288ded567 100644
--- a/python/lbmpy_walberla/boundary_collection.py
+++ b/python/lbmpy_walberla/boundary_collection.py
@@ -15,8 +15,9 @@ from pystencils import Target
 import numpy as np
 
 
-def lbm_boundary_generator(class_name: str, flag_uid: str, boundary_object: LbBoundary, additional_data_handler=None):
-    def generation_function(ctx, lb_method, field_name='pdfs',
+def lbm_boundary_generator(class_name: str, flag_uid: str, boundary_object: LbBoundary, additional_data_handler=None,
+                           field_data_type=None):
+    def generation_function(ctx, lb_method, field_name='pdfs', spatial_shape=None,
                             streaming_pattern='pull', after_collision=True,
                             namespace='lbm',
                             **create_kernel_params):
@@ -25,6 +26,8 @@ def lbm_boundary_generator(class_name: str, flag_uid: str, boundary_object: LbBo
                                                       boundary_object=boundary_object,
                                                       lb_method=lb_method,
                                                       field_name=field_name,
+                                                      spatial_shape=spatial_shape,
+                                                      field_data_type=field_data_type,
                                                       streaming_pattern=streaming_pattern,
                                                       after_collision=after_collision,
                                                       additional_data_handler=additional_data_handler,
@@ -41,6 +44,7 @@ def generate_boundary_collection(generation_context,
                                  boundary_generators,
                                  lb_method,
                                  field_name='pdfs',
+                                 spatial_shape=None,
                                  streaming_pattern='pull',
                                  prev_timestep=Timestep.BOTH,
                                  namespace='lbm',
@@ -49,22 +53,26 @@ def generate_boundary_collection(generation_context,
     kernel_list = []
     includes = []
     boundary_classes = []
+    additional_data_handlers = []
     flag_uids = []
     object_names = []
     targets = []
 
     for boundary_generator in boundary_generators:
         boundary_functor = boundary_generator['generator']
-        context = boundary_functor(generation_context, lb_method, field_name, streaming_pattern, prev_timestep,
-                                   namespace, **create_kernel_params)
+        context = boundary_functor(generation_context, lb_method, field_name, spatial_shape,
+                                   streaming_pattern, prev_timestep, namespace, **create_kernel_params)
 
         kernel_list.append(context['kernel'])
         includes.append(f"\"{context['class_name']}.h\"")
         boundary_classes.append(f"{context['namespace']}::{context['class_name']}")
+        additional_data_handlers.append(context['additional_data_handler'])
         flag_uids.append(boundary_generator['flag_id'])
         object_names.append(f"{context['class_name']}Object")
         targets.append(f"{context['target']}")
 
+    additional_constructor_arguments = [a.constructor_arguments[2:] for a in additional_data_handlers]
+
     assert len(set(targets)) == 1
     target = targets[0]
 
@@ -75,6 +83,8 @@ def generate_boundary_collection(generation_context,
         'namespace': namespace,
         'includes': includes,
         'boundary_classes': boundary_classes,
+        'additional_data_handlers': additional_data_handlers,
+        'additional_constructor_arguments': additional_constructor_arguments,
         'flag_uids': flag_uids,
         'object_names': object_names
     }
@@ -94,6 +104,8 @@ def __generate_alternating_lbm_boundary(generation_context,
                                         boundary_object,
                                         lb_method,
                                         field_name='pdfs',
+                                        spatial_shape=None,
+                                        field_data_type=None,
                                         streaming_pattern='pull',
                                         after_collision=True,
                                         additional_data_handler=None,
@@ -132,13 +144,14 @@ def __generate_alternating_lbm_boundary(generation_context,
             return OddIntegerCondition(timestep_param_name, kernel_even, kernel_odd, timestep_param_dtype)
 
     timestep_advancement = {"field_name": field_name, "function": "getTimestep"}
-
     context = pystencils_walberla.boundary.generate_boundary(generation_context,
-                                                             class_name,
-                                                             boundary_object,
+                                                             class_name=class_name,
+                                                             boundary_object=boundary_object,
                                                              field_name=field_name,
                                                              neighbor_stencil=lb_method.stencil,
                                                              index_shape=[lb_method.stencil.Q],
+                                                             spatial_shape=spatial_shape,
+                                                             field_data_type=field_data_type,
                                                              kernel_creation_function=boundary_creation_function,
                                                              namespace=namespace,
                                                              additional_data_handler=additional_data_handler,
diff --git a/python/lbmpy_walberla/packing_kernels.py b/python/lbmpy_walberla/packing_kernels.py
index 53e5d877e14f5584c1e2eb7738fe018752998485..8a8728031cd4eb56ba4366e25ef6fd85b1f2e1b5 100644
--- a/python/lbmpy_walberla/packing_kernels.py
+++ b/python/lbmpy_walberla/packing_kernels.py
@@ -83,7 +83,8 @@ def generate_packing_kernels(generation_context: CodeGenerationContext, class_na
 
 class PackingKernelsCodegen:
 
-    def __init__(self, stencil, streaming_pattern, class_name, config: CreateKernelConfig):
+    def __init__(self, stencil, streaming_pattern, class_name, config: CreateKernelConfig,
+                 src_field=None, dst_field=None):
         self.stencil = stencil
         self.dim = stencil.D
         self.values_per_cell = stencil.Q
@@ -94,10 +95,11 @@ class PackingKernelsCodegen:
         self.config = config
         self.data_type = config.data_type['pdfs'].numpy_dtype
 
-        self.src_field, self.dst_field = fields(
-            f'pdfs_src({self.values_per_cell}), pdfs_dst({self.values_per_cell}) :{self.data_type}[{self.dim}D]')
+        self.src_field = src_field if src_field else fields(f'pdfs_src({stencil.Q}) :{self.data_type}[{stencil.D}D]')
+        self.dst_field = dst_field if dst_field else fields(f'pdfs_dst({stencil.Q}) :{self.data_type}[{stencil.D}D]')
+
         self.accessors = [get_accessor(streaming_pattern, t) for t in get_timesteps(streaming_pattern)]
-        self.mask_field = fields(f'mask : uint32 [{self.dim}D]')
+        self.mask_field = fields(f'mask : uint32 [{self.dim}D]', layout=src_field.layout)
 
     def create_uniform_kernel_families(self, kernels_dict=None):
         kernels = dict() if kernels_dict is None else kernels_dict
diff --git a/python/lbmpy_walberla/storage_specification.py b/python/lbmpy_walberla/storage_specification.py
index 544867aac60f440b05b89d16cbbebd0fe2cd5f89..60fd96d242bdf0c9513a1ff3d7d99efdc5898ec3 100644
--- a/python/lbmpy_walberla/storage_specification.py
+++ b/python/lbmpy_walberla/storage_specification.py
@@ -4,21 +4,25 @@ from dataclasses import replace
 from jinja2 import Environment, PackageLoader, StrictUndefined
 import numpy as np
 
-from pystencils import Target
+from pystencils import fields, Target
 
-from lbmpy import LBMConfig
-from lbmpy.advanced_streaming import is_inplace
+from lbmpy import LBMConfig, LBMOptimisation
+from lbmpy.advanced_streaming import is_inplace, get_accessor, Timestep
 from lbmpy.methods import AbstractLbMethod
 
 from pystencils_walberla.cmake_integration import CodeGenerationContext
 from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
 from pystencils_walberla.utility import config_from_context
 from lbmpy_walberla.packing_kernels import PackingKernelsCodegen
+from lbmpy_walberla.utility import create_pdf_field
 
 
 def generate_lbm_storage_specification(generation_context: CodeGenerationContext, class_name: str,
-                                       method: AbstractLbMethod, lbm_config: LBMConfig, nonuniform: bool = False,
-                                       target: Target = Target.CPU, data_type=None, cpu_openmp: bool = False,
+                                       method: AbstractLbMethod,
+                                       lbm_config: LBMConfig, lbm_optimisation: LBMOptimisation,
+                                       nonuniform: bool = False,
+                                       target: Target = Target.CPU,
+                                       data_type=None, cpu_openmp: bool = False,
                                        **create_kernel_params):
     namespace = "lbm"
     stencil = method.stencil
@@ -32,10 +36,32 @@ def generate_lbm_storage_specification(generation_context: CodeGenerationContext
     config = replace(config, cpu_vectorize_info=None)
 
     default_dtype = config.data_type.default_factory()
-    is_float = True if issubclass(default_dtype.numpy_dtype.type, np.float32) else False
-    constant_suffix = "f" if is_float else ""
-
-    cg = PackingKernelsCodegen(stencil, streaming_pattern, class_name, config)
+    if issubclass(default_dtype.numpy_dtype.type, np.float64):
+        data_type_string = "double"
+    elif issubclass(default_dtype.numpy_dtype.type, np.float32):
+        data_type_string = "float"
+    elif issubclass(default_dtype.numpy_dtype.type, np.float16):
+        data_type_string = "half"
+    else:
+        raise ValueError(f"default datatype {default_dtype.numpy_dtype.type} is not supported. "
+                         f"Supported are only np.float64, np.float32 and np.float16")
+
+    symbolic_field = lbm_optimisation.symbolic_field
+    if not symbolic_field:
+        symbolic_field = create_pdf_field(config=config, name="pdfs_src", stencil=stencil,
+                                          field_layout=lbm_optimisation.field_layout)
+
+    if is_inplace(streaming_pattern):
+        symbolic_temporary_field = create_pdf_field(config=config, name="pdfs_dst", stencil=stencil,
+                                                    field_layout=lbm_optimisation.field_layout)
+    else:
+        symbolic_temporary_field = lbm_optimisation.symbolic_temporary_field
+        if not symbolic_temporary_field:
+            symbolic_temporary_field = create_pdf_field(config=config, name="pdfs_dst", stencil=stencil,
+                                                        field_layout=lbm_optimisation.field_layout)
+
+    cg = PackingKernelsCodegen(stencil, streaming_pattern, class_name, config,
+                               src_field=symbolic_field, dst_field=symbolic_temporary_field)
     kernels = cg.create_uniform_kernel_families()
 
     if nonuniform:
@@ -50,6 +76,16 @@ def generate_lbm_storage_specification(generation_context: CodeGenerationContext
     cqc = method.conserved_quantity_computation
     equilibrium = method.equilibrium_distribution
 
+    f = fields(f"f({stencil.Q}): double[{stencil.D}D]", layout='fzyx')
+    even_accessor = get_accessor(streaming_pattern, Timestep.EVEN)
+    odd_accessor = get_accessor(streaming_pattern, Timestep.ODD)
+
+    even_read = even_accessor.read(f, stencil)
+    even_write = even_accessor.write(f, stencil)
+
+    odd_read = odd_accessor.read(f, stencil)
+    odd_write = odd_accessor.write(f, stencil)
+
     jinja_context = {
         'class_name': class_name,
         'namespace': namespace,
@@ -62,12 +98,17 @@ def generate_lbm_storage_specification(generation_context: CodeGenerationContext
         'equilibrium_deviation_only': equilibrium.deviation_only,
         'inplace': is_inplace(streaming_pattern),
         'zero_centered': cqc.zero_centered_pdfs,
-        'weights': ",".join(str(w.evalf()) + constant_suffix for w in method.weights),
-        'inverse_weights': ",".join(str((1 / w).evalf()) + constant_suffix for w in method.weights),
+
+        'weights': ", ".join(f"{data_type_string}({str(w.evalf())})" for w in method.weights),
+        'inverse_weights': ", ".join(f"{data_type_string}({str((1 / w).evalf())})" for w in method.weights),
+        'even_read': _get_access_list(even_read, stencil.D),
+        'even_write': _get_access_list(even_write, stencil.D),
+        'odd_read': _get_access_list(odd_read, stencil.D),
+        'odd_write': _get_access_list(odd_write, stencil.D),
 
         'nonuniform': nonuniform,
         'target': target.name.lower(),
-        'dtype': "float" if is_float else "double",
+        'dtype': data_type_string,
         'is_gpu': target == Target.GPU,
         'kernels': kernels,
         'direction_sizes': cg.get_direction_sizes(),
@@ -87,3 +128,16 @@ def generate_lbm_storage_specification(generation_context: CodeGenerationContext
     source_extension = "cu" if target == Target.GPU and generation_context.cuda else "cpp"
     generation_context.write_file(f"{class_name}.h", header)
     generation_context.write_file(f"{class_name}.{source_extension}", source)
+
+
+def _get_access_list(access_list, dim):
+    result = []
+    for i in range(dim):
+        result.append(", ".join([str(int(field_access.offsets[i])) for field_access in access_list]))
+
+    if dim == 2:
+        result.append(", ".join(["0"] * len(access_list)))
+
+    result.append(", ".join([str(int(field_access.index[0])) for field_access in access_list]))
+
+    return result
diff --git a/python/lbmpy_walberla/sweep_collection.py b/python/lbmpy_walberla/sweep_collection.py
index 8edd0779b328de768cba4a3acb5f04bdb6bb3acf..22770379c23ec586ef92b947546623457ca74fff 100644
--- a/python/lbmpy_walberla/sweep_collection.py
+++ b/python/lbmpy_walberla/sweep_collection.py
@@ -7,9 +7,10 @@ import numpy as np
 from pystencils import Target, create_kernel
 from pystencils.config import CreateKernelConfig
 from pystencils.field import Field
+from pystencils.simp import add_subexpressions_for_field_reads
 
 from lbmpy.advanced_streaming import is_inplace, get_accessor, Timestep
-from lbmpy.creationfunctions import LbmCollisionRule
+from lbmpy.creationfunctions import LbmCollisionRule, LBMConfig, LBMOptimisation
 from lbmpy.fieldaccess import CollideOnlyInplaceAccessor
 from lbmpy.macroscopic_value_kernels import macroscopic_values_setter, macroscopic_values_getter
 from lbmpy.updatekernels import create_lbm_kernel, create_stream_only_kernel
@@ -17,41 +18,45 @@ from lbmpy.updatekernels import create_lbm_kernel, create_stream_only_kernel
 from pystencils_walberla.kernel_selection import KernelCallNode, KernelFamily
 from pystencils_walberla.utility import config_from_context
 from pystencils_walberla import generate_sweep_collection
+from lbmpy_walberla.utility import create_pdf_field
 
 from .alternating_sweeps import EvenIntegerCondition
 from .function_generator import kernel_family_function_generator
 
 
 def generate_lbm_sweep_collection(ctx, class_name: str, collision_rule: LbmCollisionRule,
-                                  streaming_pattern='pull',
-                                  field_layout='fzyx', refinement_scaling=None,
-                                  macroscopic_fields: Dict[str, Field] = None,
+                                  lbm_config: LBMConfig, lbm_optimisation: LBMOptimisation,
+                                  refinement_scaling=None, macroscopic_fields: Dict[str, Field] = None,
                                   target=Target.CPU, data_type=None, cpu_openmp=None, cpu_vectorize_info=None,
                                   max_threads=None,
                                   **create_kernel_params):
+
     config = config_from_context(ctx, target=target, data_type=data_type,
                                  cpu_openmp=cpu_openmp, cpu_vectorize_info=cpu_vectorize_info, **create_kernel_params)
 
+    streaming_pattern = lbm_config.streaming_pattern
+    field_layout = lbm_optimisation.field_layout
+
     # usually a numpy layout is chosen by default i.e. xyzf - which is bad for waLBerla where at least the spatial
     # coordinates should be ordered in reverse direction i.e. zyx
     lb_method = collision_rule.method
 
-    q = lb_method.stencil.Q
-    dim = lb_method.stencil.D
-
     if field_layout == 'fzyx':
         config.cpu_vectorize_info['assume_inner_stride_one'] = True
     elif field_layout == 'zyxf':
         config.cpu_vectorize_info['assume_inner_stride_one'] = False
 
-    src_field = Field.create_generic('pdfs', dim, config.data_type['pdfs'].numpy_dtype,
-                                     index_dimensions=1, layout=field_layout, index_shape=(q,))
+    src_field = lbm_optimisation.symbolic_field
+    if not src_field:
+        src_field = create_pdf_field(config=config, name="pdfs", stencil=lbm_config.stencil,
+                                     field_layout=lbm_optimisation.field_layout)
     if is_inplace(streaming_pattern):
         dst_field = src_field
     else:
-        dst_field = Field.create_generic('pdfs_tmp', dim, config.data_type['pdfs_tmp'].numpy_dtype,
-                                         index_dimensions=1, layout=field_layout,
-                                         index_shape=(q,))
+        dst_field = lbm_optimisation.symbolic_temporary_field
+        if not dst_field:
+            dst_field = create_pdf_field(config=config, name="pdfs_tmp", stencil=lbm_config.stencil,
+                                         field_layout=lbm_optimisation.field_layout)
 
     config = replace(config, ghost_layers=0)
 
@@ -98,15 +103,17 @@ class RefinementScaling:
 def lbm_kernel_family(class_name, kernel_name,
                       collision_rule, streaming_pattern, src_field, dst_field, config: CreateKernelConfig):
 
+    default_dtype = config.data_type.default_factory()
     if kernel_name == "streamCollide":
         def lbm_kernel(field_accessor, lb_stencil):
-            return create_lbm_kernel(collision_rule, src_field, dst_field, field_accessor)
+            return create_lbm_kernel(collision_rule, src_field, dst_field, field_accessor, data_type=default_dtype)
         advance_timestep = {"field_name": src_field.name, "function": "advanceTimestep"}
         temporary_fields = ['pdfs_tmp']
         field_swaps = [('pdfs', 'pdfs_tmp')]
     elif kernel_name == "collide":
         def lbm_kernel(field_accessor, lb_stencil):
-            return create_lbm_kernel(collision_rule, src_field, dst_field, CollideOnlyInplaceAccessor())
+            return create_lbm_kernel(collision_rule, src_field, dst_field, CollideOnlyInplaceAccessor(),
+                                     data_type=default_dtype)
         advance_timestep = {"field_name": src_field.name, "function": "advanceTimestep"}
         temporary_fields = ()
         field_swaps = ()
@@ -161,6 +168,8 @@ def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi
     density = macroscopic_fields.get('density', 1.0)
     velocity = macroscopic_fields.get('velocity', [0.0] * dim)
 
+    default_dtype = config.data_type.default_factory()
+
     get_timestep = {"field_name": pdfs.name, "function": "getTimestep"}
     temporary_fields = ()
     field_swaps = ()
@@ -173,6 +182,9 @@ def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi
                                                density=density, velocity=velocity, pdfs=pdfs,
                                                streaming_pattern=streaming_pattern, previous_timestep=timestep)
 
+            if default_dtype != pdfs.dtype:
+                setter = add_subexpressions_for_field_reads(setter, data_type=default_dtype)
+
             setter_ast = create_kernel(setter, config=config)
             setter_ast.function_name = 'kernel_initialise' + timestep_suffix
             nodes.append(KernelCallNode(setter_ast))
@@ -199,6 +211,8 @@ def get_getter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi
     if density is None and velocity is None:
         return None
 
+    default_dtype = config.data_type.default_factory()
+
     get_timestep = {"field_name": pdfs.name, "function": "getTimestep"}
     temporary_fields = ()
     field_swaps = ()
@@ -211,6 +225,9 @@ def get_getter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi
                                                density=density, velocity=velocity, pdfs=pdfs,
                                                streaming_pattern=streaming_pattern, previous_timestep=timestep)
 
+            if default_dtype != pdfs.dtype:
+                getter = add_subexpressions_for_field_reads(getter, data_type=default_dtype)
+
             getter_ast = create_kernel(getter, config=config)
             getter_ast.function_name = 'kernel_getter' + timestep_suffix
             nodes.append(KernelCallNode(getter_ast))
diff --git a/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h b/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h
index 5f49137846ba99d60888e7353ac4ff195ade2a84..47f313860dacc6cf69c783047c3ed53e82bcb91b 100644
--- a/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h
+++ b/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h
@@ -41,12 +41,12 @@ class {{class_name}}
    enum Type { ALL = 0, INNER = 1, OUTER = 2 };
 
 
-   {{class_name}}( {{- ["const shared_ptr<StructuredBlockForest> & blocks", "BlockDataID flagID_", "BlockDataID pdfsID_", "FlagUID domainUID_", [kernel_list|generate_constructor_parameters(['indexVector', 'indexVectorSize', 'pdfs'])]] | type_identifier_list -}} )
+   {{class_name}}( {{- ["const shared_ptr<StructuredBlockForest> & blocks", "BlockDataID flagID_", "BlockDataID pdfsID_", "FlagUID domainUID_", [kernel_list|generate_constructor_parameters(['indexVector', 'indexVectorSize', 'pdfs'])], additional_constructor_arguments] | type_identifier_list -}} )
       : blocks_(blocks), flagID(flagID_), pdfsID(pdfsID_), domainUID(domainUID_)
    {
-      {% for object_name, boundary_class, kernel in zip(object_names, boundary_classes, kernel_list) -%}
+      {% for object_name, boundary_class, kernel, additional_data_handler in zip(object_names, boundary_classes, kernel_list, additional_data_handlers) -%}
 
-      {{object_name}} = std::make_shared< {{boundary_class}} >({{- ["blocks", "pdfsID", [kernel|generate_function_collection_call(['indexVector', 'indexVectorSize', 'pdfs', 'timestep', 'gpuStream'])]] | type_identifier_list -}});
+      {{object_name}} = std::make_shared< {{boundary_class}} >({{- ["blocks", "pdfsID", [kernel|generate_function_collection_call(['indexVector', 'indexVectorSize', 'pdfs', 'timestep', 'gpuStream'])], additional_data_handler.constructor_argument_name] | type_identifier_list -}});
       {% endfor %}
 
       {% for object_name, flag_uid in zip(object_names, flag_uids) -%}
@@ -105,4 +105,3 @@ class {{class_name}}
 
 }
 }
-
diff --git a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp
index 91c7d7d960a78552628d3d8568dd611f13c14a2d..92106cddf0248c93707e6881b600b6a702d27985 100644
--- a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp
+++ b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp
@@ -27,31 +27,31 @@
 #   pragma GCC diagnostic ignored "-Wunused-variable"
 #endif
 
-/*************************************************************************************
+namespace walberla {
+namespace {{namespace}} {
+
+   /*************************************************************************************
  *                                Kernel Definitions
 *************************************************************************************/
-{{ kernels['packAll']      | generate_definitions }}
-{{ kernels['unpackAll']    | generate_definitions }}
-{{ kernels['localCopyAll'] | generate_definitions }}
-
-{{ kernels['packDirection']      | generate_definitions }}
-{{ kernels['unpackDirection']    | generate_definitions }}
-{{ kernels['localCopyDirection'] | generate_definitions }}
-
-{% if nonuniform -%}
-{{ kernels['unpackRedistribute']    | generate_definitions }}
-{{ kernels['packPartialCoalescence']    | generate_definitions }}
-{{ kernels['zeroCoalescenceRegion']    | generate_definitions }}
-{{ kernels['unpackCoalescence']    | generate_definitions }}
-{%- endif %}
-
-/*************************************************************************************
+   {{ kernels['packAll']      | generate_definitions }}
+   {{ kernels['unpackAll']    | generate_definitions }}
+   {{ kernels['localCopyAll'] | generate_definitions }}
+
+   {{ kernels['packDirection']      | generate_definitions }}
+   {{ kernels['unpackDirection']    | generate_definitions }}
+   {{ kernels['localCopyDirection'] | generate_definitions }}
+
+   {% if nonuniform -%}
+   {{ kernels['unpackRedistribute']    | generate_definitions }}
+   {{ kernels['packPartialCoalescence']    | generate_definitions }}
+   {{ kernels['zeroCoalescenceRegion']    | generate_definitions }}
+   {{ kernels['unpackCoalescence']    | generate_definitions }}
+   {%- endif %}
+
+   /*************************************************************************************
  *                                 Kernel Wrappers
 *************************************************************************************/
 
-namespace walberla {
-namespace {{namespace}} {
-
    void {{class_name}}::PackKernels::packAll(
       {{- [ "PdfField_T * " + src_field.name, "CellInterval & ci",
              "unsigned char * outBuffer", kernels['packAll'].kernel_selection_parameters,
@@ -128,8 +128,8 @@ namespace {{namespace}} {
       WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize())
 
       {{kernels['localCopyDirection']
-          | generate_call(cell_interval={src_field : 'srcInterval', dst_field : 'dstInterval'}, stream='stream')
-          | indent(6) }}
+               | generate_call(cell_interval={src_field : 'srcInterval', dst_field : 'dstInterval'}, stream='stream')
+               | indent(6) }}
    }
 
    {% if nonuniform -%}
diff --git a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h
index 5d5409b6684703666d5066ffa9ea40530a48f07d..ad40a55eef10c08deb66e6266753a828be9f3452 100644
--- a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h
+++ b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h
@@ -78,6 +78,32 @@ class {{class_name}}
    // Inverse lattice weights
    static constexpr {{dtype}} wInv[{{stencil_size}}] = { {{inverse_weights}} };
 
+   struct AccessorEVEN
+   {
+      static constexpr cell_idx_t readX[{{stencil_size}}] = { {{even_read[0]}} };
+      static constexpr cell_idx_t readY[{{stencil_size}}] = { {{even_read[1]}} };
+      static constexpr cell_idx_t readZ[{{stencil_size}}] = { {{even_read[2]}} };
+      static constexpr cell_idx_t readD[{{stencil_size}}] = { {{even_read[3]}} };
+
+      static constexpr cell_idx_t writeX[{{stencil_size}}] = { {{even_write[0]}} };
+      static constexpr cell_idx_t writeY[{{stencil_size}}] = { {{even_write[1]}} };
+      static constexpr cell_idx_t writeZ[{{stencil_size}}] = { {{even_write[2]}} };
+      static constexpr cell_idx_t writeD[{{stencil_size}}] = { {{even_write[3]}} };
+   };
+
+   struct AccessorODD
+   {
+      static constexpr cell_idx_t readX[{{stencil_size}}] = { {{odd_read[0]}} };
+      static constexpr cell_idx_t readY[{{stencil_size}}] = { {{odd_read[1]}} };
+      static constexpr cell_idx_t readZ[{{stencil_size}}] = { {{odd_read[2]}} };
+      static constexpr cell_idx_t readD[{{stencil_size}}] = { {{odd_read[3]}} };
+
+      static constexpr cell_idx_t writeX[{{stencil_size}}] = { {{odd_write[0]}} };
+      static constexpr cell_idx_t writeY[{{stencil_size}}] = { {{odd_write[1]}} };
+      static constexpr cell_idx_t writeZ[{{stencil_size}}] = { {{odd_write[2]}} };
+      static constexpr cell_idx_t writeD[{{stencil_size}}] = { {{odd_write[3]}} };
+   };
+
    // Compute kernels to pack and unpack MPI buffers
    class PackKernels {
 
@@ -96,8 +122,8 @@ class {{class_name}}
       static const bool inplace = {% if inplace -%} true {%- else -%} false {%- endif -%};
 
       /**
-       * Packs all pdfs from the given cell interval to the send buffer.
-       * */
+      * Packs all pdfs from the given cell interval to the send buffer.
+      * */
       void packAll(
          {{- [ "PdfField_T * " + src_field.name, "CellInterval & ci",
                 "unsigned char * outBuffer", kernels['packAll'].kernel_selection_parameters,
@@ -168,7 +194,7 @@ class {{class_name}}
        * @return    The required size of the buffer, in bytes
        * */
       uint_t size (CellInterval & ci, stencil::Direction dir) const {
-         return ci.numCells() * sizes[dir] * sizeof(value_type);
+         return ci.numCells() * sizes[dir] * uint_c(sizeof(value_type));
       }
 
       /**
@@ -178,7 +204,7 @@ class {{class_name}}
        * @return    The required size of the buffer, in bytes
        * */
       uint_t size (CellInterval & ci) const {
-         return ci.numCells() * {{stencil_size}} * sizeof(value_type);
+         return ci.numCells() * {{stencil_size}} * uint_c(sizeof(value_type));
       }
 
       {% if nonuniform -%}
@@ -251,6 +277,8 @@ class {{class_name}}
       const uint_t sizes[{{direction_sizes|length}}] { {{ direction_sizes | join(', ') }} };
    };
 
+   using value_type = PackKernels::value_type;
+
 };
 
 }} //{{namespace}}/walberla
\ No newline at end of file
diff --git a/python/lbmpy_walberla/utility.py b/python/lbmpy_walberla/utility.py
index 1289c381e7b50ac7e83d34fca887e6d659959b92..75460d5f9458104e02f21ba52d54acbb261628b7 100644
--- a/python/lbmpy_walberla/utility.py
+++ b/python/lbmpy_walberla/utility.py
@@ -1,4 +1,8 @@
+import numpy as np
+from pystencils import CreateKernelConfig, fields
+
 from lbmpy.advanced_streaming import Timestep
+from lbmpy.stencils import LBStencil
 
 
 def timestep_suffix(timestep: Timestep):
@@ -9,3 +13,9 @@ def timestep_suffix(timestep: Timestep):
     """
     return ("_" + str(timestep)) if timestep != Timestep.BOTH else ''
 
+
+def create_pdf_field(config: CreateKernelConfig, name: str, stencil: LBStencil, field_layout: str = 'fzyx'):
+    default_dtype = config.data_type.default_factory()
+    data_type = default_dtype.numpy_dtype
+    return fields(f'{name}({stencil.Q}) :{data_type}[{stencil.D}D]', layout=field_layout)
+
diff --git a/python/lbmpy_walberla/walberla_lbm_package.py b/python/lbmpy_walberla/walberla_lbm_package.py
index e21d6c9613a1c2be87e21cbc06a2a78212f72552..80b37a4a90f717e79ea60890a802a363c45dde52 100644
--- a/python/lbmpy_walberla/walberla_lbm_package.py
+++ b/python/lbmpy_walberla/walberla_lbm_package.py
@@ -17,7 +17,9 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str,
                          lbm_config: LBMConfig, lbm_optimisation: LBMOptimisation,
                          nonuniform: bool = False, boundaries: List[Callable] = None,
                          macroscopic_fields: Dict[str, Field] = None,
-                         target: Target = Target.CPU, data_type=None, cpu_openmp=None, cpu_vectorize_info=None,
+                         target: Target = Target.CPU,
+                         data_type=None, pdfs_data_type=None,
+                         cpu_openmp=None, cpu_vectorize_info=None,
                          max_threads=None,
                          **kernel_parameters):
 
@@ -27,8 +29,10 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str,
     method = collision_rule.method
 
     storage_spec_name = f'{name}StorageSpecification'
-    generate_lbm_storage_specification(ctx, storage_spec_name, method, lbm_config,
-                                       nonuniform=nonuniform, target=target, data_type=data_type)
+    generate_lbm_storage_specification(ctx, storage_spec_name, method, lbm_config, lbm_optimisation,
+                                       nonuniform=nonuniform, target=target,
+                                       data_type=pdfs_data_type,
+                                       cpu_openmp=cpu_openmp)
 
     if nonuniform:
         omega = get_shear_relaxation_rate(method)
@@ -37,10 +41,8 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str,
     else:
         refinement_scaling = None
 
-    streaming_pattern = lbm_config.streaming_pattern
     generate_lbm_sweep_collection(ctx, f'{name}SweepCollection', collision_rule,
-                                  streaming_pattern=streaming_pattern,
-                                  field_layout=lbm_optimisation.field_layout,
+                                  lbm_config=lbm_config, lbm_optimisation=lbm_optimisation,
                                   refinement_scaling=refinement_scaling,
                                   macroscopic_fields=macroscopic_fields,
                                   target=target, data_type=data_type,
@@ -48,6 +50,11 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str,
                                   max_threads=max_threads,
                                   **kernel_parameters)
 
+    spatial_shape = None
+    if lbm_optimisation.symbolic_field and lbm_optimisation.symbolic_field.has_fixed_shape:
+        spatial_shape = lbm_optimisation.symbolic_field.spatial_shape + (lbm_config.stencil.Q, )
+
     generate_boundary_collection(ctx, f'{name}BoundaryCollection', boundary_generators=boundaries,
-                                 lb_method=method, streaming_pattern=streaming_pattern,
+                                 lb_method=method, field_name='pdfs', spatial_shape=spatial_shape,
+                                 streaming_pattern=lbm_config.streaming_pattern,
                                  target=target, layout=lbm_optimisation.field_layout)
diff --git a/python/pystencils_walberla/additional_data_handler.py b/python/pystencils_walberla/additional_data_handler.py
index 2c4efcc65bdd60451e32e7cefa88f9baf8d6c03e..48f87dd5307a2da6c4ceb3bfcd59974ef9896691 100644
--- a/python/pystencils_walberla/additional_data_handler.py
+++ b/python/pystencils_walberla/additional_data_handler.py
@@ -16,6 +16,10 @@ class AdditionalDataHandler:
         else:
             self._walberla_stencil = stencil
 
+    @property
+    def constructor_argument_name(self):
+        return ""
+
     @property
     def constructor_arguments(self):
         return ""
diff --git a/python/pystencils_walberla/boundary.py b/python/pystencils_walberla/boundary.py
index c5a5e54c1d00d9d6e476306453eae4320b6f5aa8..7af79ed677697633c099a5dec78b2b9afb66a226 100644
--- a/python/pystencils_walberla/boundary.py
+++ b/python/pystencils_walberla/boundary.py
@@ -19,7 +19,9 @@ def generate_boundary(generation_context,
                       field_name,
                       neighbor_stencil,
                       index_shape,
+                      spatial_shape=None,
                       field_type=FieldType.GENERIC,
+                      field_data_type=None,
                       kernel_creation_function=None,
                       target=Target.CPU,
                       data_type=None,
@@ -47,14 +49,17 @@ def generate_boundary(generation_context,
     del create_kernel_params['default_number_int']
     del create_kernel_params['skip_independence_check']
 
-    field_data_type = config.data_type[field_name].numpy_dtype
+    if field_data_type is None:
+        field_data_type = config.data_type[field_name].numpy_dtype
 
     index_struct_dtype = numpy_data_type_for_boundary_object(boundary_object, dim)
 
-    field = Field.create_generic(field_name, dim,
-                                 field_data_type,
-                                 index_dimensions=len(index_shape), layout=layout, index_shape=index_shape,
-                                 field_type=field_type)
+    if spatial_shape:
+        field = Field.create_fixed_size(field_name, spatial_shape, index_dimensions=len(index_shape),
+                                        dtype=field_data_type, layout=layout, field_type=field_type)
+    else:
+        field = Field.create_generic(field_name, dim, dtype=field_data_type, index_dimensions=len(index_shape),
+                                     layout=layout, index_shape=index_shape, field_type=field_type)
 
     index_field = Field('indexVector', FieldType.INDEXED, index_struct_dtype, layout=[0],
                         shape=(TypedSymbol("indexVectorSize", create_type("int32")), 1), strides=(1, 1))
@@ -126,5 +131,3 @@ def generate_staggered_flux_boundary(generation_context, class_name, boundary_ob
     assert dim == len(neighbor_stencil[0])
     generate_boundary(generation_context, class_name, boundary_object, 'flux', neighbor_stencil, index_shape,
                       FieldType.STAGGERED_FLUX, target=target, **kwargs)
-
-
diff --git a/python/pystencils_walberla/templates/Boundary.tmpl.h b/python/pystencils_walberla/templates/Boundary.tmpl.h
index 96a9202c19345f0e36c5e048be1ee65969f5c966..75e3cd13abdacdcd06ea7ba784552b66daa21b63 100644
--- a/python/pystencils_walberla/templates/Boundary.tmpl.h
+++ b/python/pystencils_walberla/templates/Boundary.tmpl.h
@@ -48,6 +48,10 @@
 #define RESTRICT
 #endif
 
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
 namespace walberla {
 namespace {{namespace}} {
 
diff --git a/python/pystencils_walberla/templates/SweepCollection.tmpl.h b/python/pystencils_walberla/templates/SweepCollection.tmpl.h
index 5db4ccb33457efcc2f9f9385d0f2b32db35aef5e..2a89fef5e4c6c825443db51ed1acc3a6d02ffaa8 100644
--- a/python/pystencils_walberla/templates/SweepCollection.tmpl.h
+++ b/python/pystencils_walberla/templates/SweepCollection.tmpl.h
@@ -64,20 +64,20 @@ namespace {{namespace}} {
 
 class {{class_name}}
 {
-public:
-  enum Type { ALL = 0, INNER = 1, OUTER = 2 };
+ public:
+   enum Type { ALL = 0, INNER = 1, OUTER = 2 };
 
    {{class_name}}(const shared_ptr< StructuredBlockStorage > & blocks, {{kernel_list|generate_constructor_parameters}}, const Cell & outerWidth=Cell(1, 1, 1))
-     : blocks_(blocks), {{ kernel_list|generate_constructor_initializer_list(parameter_registration=parameter_scaling) }}, outerWidth_(outerWidth)
+      : blocks_(blocks), {{ kernel_list|generate_constructor_initializer_list(parameter_registration=parameter_scaling) }}, outerWidth_(outerWidth)
    {
       {{kernel_list|generate_constructor(parameter_registration=parameter_scaling) |indent(6)}}
 
+      validInnerOuterSplit_= true;
+
       for (auto& iBlock : *blocks)
       {
-         if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 ||
-             int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 ||
-             int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
-          WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock")
+         if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 || int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 || int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
+            validInnerOuterSplit_ = false;
       }
    };
 
@@ -105,54 +105,66 @@ public:
 
    std::function<void (IBlock *)> {{kernel['function_name']}}({{- ["Type type", ] | type_identifier_list -}})
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", ] | type_identifier_list -}}); };
-         case Type::OUTER:
-            return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", ] | type_identifier_list -}}); };
-         default:
-            return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", ] | type_identifier_list -}}); };
+      case Type::INNER:
+         return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", ] | type_identifier_list -}}); };
+      case Type::OUTER:
+         return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", ] | type_identifier_list -}}); };
+      default:
+         return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", ] | type_identifier_list -}}); };
       }
    }
 
    std::function<void (IBlock *)> {{kernel['function_name']}}({{- ["Type type", "const cell_idx_t ghost_layers"] | type_identifier_list -}})
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", ] | type_identifier_list -}}); };
-         case Type::OUTER:
-            return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", ] | type_identifier_list -}}); };
-         default:
-            return [{{- ["this", "ghost_layers"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "ghost_layers"] | type_identifier_list -}}); };
+      case Type::INNER:
+         return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", ] | type_identifier_list -}}); };
+      case Type::OUTER:
+         return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", ] | type_identifier_list -}}); };
+      default:
+         return [{{- ["this", "ghost_layers"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "ghost_layers"] | type_identifier_list -}}); };
       }
    }
 
    {% if target is equalto 'gpu' -%}
    std::function<void (IBlock *)> {{kernel['function_name']}}({{- ["Type type", "const cell_idx_t ghost_layers", "gpuStream_t gpuStream"] | type_identifier_list -}})
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", "gpuStream"] | type_identifier_list -}}); };
-         case Type::OUTER:
-            return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", "gpuStream"] | type_identifier_list -}}); };
-         default:
-            return [{{- ["this", "ghost_layers", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "ghost_layers", "gpuStream"] | type_identifier_list -}}); };
+      case Type::INNER:
+         return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+      case Type::OUTER:
+         return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+      default:
+         return [{{- ["this", "ghost_layers", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "ghost_layers", "gpuStream"] | type_identifier_list -}}); };
       }
    }
 
    std::function<void (IBlock *)> {{kernel['function_name']}}({{- ["Type type", "gpuStream_t gpuStream"] | type_identifier_list -}})
    {
+      if (!validInnerOuterSplit_ && type != Type::ALL)
+         WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
       switch (type)
       {
-         case Type::INNER:
-            return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", "gpuStream"] | type_identifier_list -}}); };
-         case Type::OUTER:
-            return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", "gpuStream"] | type_identifier_list -}}); };
-         default:
-            return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "cell_idx_c(0)", "gpuStream"] | type_identifier_list -}}); };
+      case Type::INNER:
+         return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+      case Type::OUTER:
+         return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+      default:
+         return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "cell_idx_c(0)", "gpuStream"] | type_identifier_list -}}); };
       }
    }
    {%- endif %}
@@ -247,24 +259,24 @@ public:
          layers_.push_back(ci);
       }
 
-    {%if target is equalto 'gpu'%}
+      {%if target is equalto 'gpu'%}
       {
          auto parallelSection_ = parallelStreams_.parallelSection( gpuStream );
          for( auto & ci: layers_ )
          {
-          parallelSection_.run([&]( auto s ) {
-             {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}});
-          });
+            parallelSection_.run([&]( auto s ) {
+               {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}});
+            });
          }
       }
-    {% else %}
+      {% else %}
       for( auto & ci: layers_ )
       {
          {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}});
       }
-    {% endif %}
+      {% endif %}
 
-    {{kernel['kernel']|generate_swaps|indent(9)}}
+      {{kernel['kernel']|generate_swaps|indent(9)}}
    }
    {% endfor %}
 
@@ -275,17 +287,18 @@ public:
    }
    {%endif%}
 
-   private:
-      shared_ptr< StructuredBlockStorage > blocks_;
-      {{kernel_list|generate_members(parameter_registration=parameter_scaling)|indent(4)}}
+ private:
+   shared_ptr< StructuredBlockStorage > blocks_;
+   {{kernel_list|generate_members(parameter_registration=parameter_scaling)|indent(4)}}
 
-      Cell outerWidth_;
-      std::vector<CellInterval> layers_;
+   Cell outerWidth_;
+   std::vector<CellInterval> layers_;
+   bool validInnerOuterSplit_;
 
-      {%if target is equalto 'gpu' -%}
-      gpu::ParallelStreams parallelStreams_;
-      // std::map<BlockID, gpuStream_t > streams_;
-      {%- endif %}
+   {%if target is equalto 'gpu' -%}
+   gpu::ParallelStreams parallelStreams_;
+   // std::map<BlockID, gpuStream_t > streams_;
+   {%- endif %}
 };
 
 
diff --git a/tests/lbm_generated/Example.py b/tests/lbm_generated/Example.py
index 5233639be24c6574cee6440300bfe73e22e5e2ae..c4e3eaa96d596b2745811e30739790748740f884 100644
--- a/tests/lbm_generated/Example.py
+++ b/tests/lbm_generated/Example.py
@@ -23,7 +23,8 @@ with CodeGeneration() as ctx:
     omega = sp.symbols("omega")
 
     stencil = LBStencil(Stencil.D3Q19)
-    pdfs, vel_field = fields(f"pdfs({stencil.Q}), velocity({stencil.D}): {data_type}[{stencil.D}D]", layout='fzyx')
+    pdfs, vel_field = fields(f"pdfs({stencil.Q}), velocity({stencil.D}): {data_type}[{stencil.D}D]",
+                             layout='fzyx')
 
     macroscopic_fields = {'velocity': vel_field}