diff --git a/.gitignore b/.gitignore
index 4c63e3b65e5c1acb0f32cbc2d7f9975427a68c57..9b148120db3b0f916bc876d5d4de33e4da0c2a21 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,6 +34,7 @@ qrc_*
 
 # CLion
 *.idea
+*.clion*
 
 
 # QtCreator
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 47749be91788b88f0aa025862573e8210324cc96..2f2771402b2c2a625736330446783ac51c7d6230 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -273,7 +273,7 @@ gcc_9_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -297,7 +297,7 @@ gcc_9_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -319,7 +319,7 @@ gcc_9_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -340,7 +340,7 @@ gcc_9_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -365,7 +365,7 @@ gcc_9_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -388,7 +388,7 @@ gcc_9_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -407,7 +407,7 @@ gcc_9_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-9
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -432,7 +432,7 @@ gcc_10_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -456,7 +456,7 @@ gcc_10_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -478,7 +478,7 @@ gcc_10_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -499,7 +499,7 @@ gcc_10_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -524,7 +524,7 @@ gcc_10_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -547,7 +547,7 @@ gcc_10_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -569,7 +569,7 @@ gcc_10_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-10
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -594,7 +594,7 @@ gcc_11_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -618,7 +618,7 @@ gcc_11_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -640,7 +640,7 @@ gcc_11_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -661,7 +661,7 @@ gcc_11_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -686,7 +686,7 @@ gcc_11_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -709,7 +709,7 @@ gcc_11_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -731,7 +731,7 @@ gcc_11_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc-11
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -841,7 +841,7 @@ clang_12_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -865,7 +865,7 @@ clang_12_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -887,7 +887,7 @@ clang_12_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -908,7 +908,7 @@ clang_12_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -933,7 +933,7 @@ clang_12_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -956,7 +956,7 @@ clang_12_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -975,7 +975,7 @@ clang_12_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-12
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1000,7 +1000,7 @@ clang_13_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1024,7 +1024,7 @@ clang_13_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1046,7 +1046,7 @@ clang_13_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1067,7 +1067,7 @@ clang_13_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1092,7 +1092,7 @@ clang_13_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1115,7 +1115,7 @@ clang_13_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1137,7 +1137,7 @@ clang_13_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-13
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1162,7 +1162,7 @@ clang_14_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1186,7 +1186,7 @@ clang_14_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1208,7 +1208,7 @@ clang_14_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1229,7 +1229,7 @@ clang_14_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1254,7 +1254,7 @@ clang_14_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1277,7 +1277,7 @@ clang_14_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1299,7 +1299,7 @@ clang_14_hybrid_dbg_sp:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-14
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1324,7 +1324,7 @@ clang_15_serial:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1348,7 +1348,7 @@ clang_15_mpionly:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1370,7 +1370,7 @@ clang_15_hybrid:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1388,7 +1388,7 @@ clang_15_serial_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1410,7 +1410,7 @@ clang_15_mpionly_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1430,7 +1430,7 @@ clang_15_hybrid_dbg:
    extends: .build_template
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1450,7 +1450,7 @@ clang_15_hybrid_dbg_sp:
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang-15
    stage: pretest
    before_script:
-      - pip3 install lbmpy==1.3.2 jinja2 pytest
+      - pip3 install lbmpy==1.3.3 jinja2 pytest
       - cd python
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
@@ -1654,8 +1654,8 @@ coverage:
 mac_Serial_Dbg:
    extends: .mac_build_template
    before_script:
-     - pip3 install pystencils==1.3.2
-     - pip3 install lbmpy==1.3.2
+     - pip3 install pystencils==1.3.3
+     - pip3 install lbmpy==1.3.3
    variables:
       CMAKE_BUILD_TYPE: "DebugOptimized"
       CTEST_EXCLUDE_LABELS: "longrun"
@@ -1667,8 +1667,8 @@ mac_Serial_Dbg:
 mac_Serial:
    extends: .mac_build_template
    before_script:
-     - pip3 install pystencils==1.3.2
-     - pip3 install lbmpy==1.3.2
+     - pip3 install pystencils==1.3.3
+     - pip3 install lbmpy==1.3.3
    variables:
       CMAKE_BUILD_TYPE: "Release"
       CTEST_EXCLUDE_LABELS: "longrun"
@@ -1680,8 +1680,8 @@ mac_Serial:
 mac_MpiOnly_Dbg:
    extends: .mac_build_template
    before_script:
-     - pip3 install pystencils==1.3.2
-     - pip3 install lbmpy==1.3.2
+     - pip3 install pystencils==1.3.3
+     - pip3 install lbmpy==1.3.3
    variables:
       CMAKE_BUILD_TYPE: "DebugOptimized"
       CTEST_EXCLUDE_LABELS: "longrun"
@@ -1694,8 +1694,8 @@ mac_MpiOnly_Dbg:
 mac_MpiOnly:
    extends: .mac_build_template
    before_script:
-     - pip3 install pystencils==1.3.2
-     - pip3 install lbmpy==1.3.2
+     - pip3 install pystencils==1.3.3
+     - pip3 install lbmpy==1.3.3
    variables:
       CMAKE_BUILD_TYPE: "Release"
       CTEST_EXCLUDE_LABELS: "longrun"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e3b808b8d12c0c708bef8d1fbd1c280aab9a62e0..4365195f6a3318d1fbca8764220ec4d40348c7d0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -618,7 +618,7 @@ endif ()
 ##
 #############################################################################################################################
 if ( WALBERLA_BUILD_WITH_CODEGEN )
-   set(LBMPY_MIN_VERSION 1.3.2)
+   set(LBMPY_MIN_VERSION 1.3.3)
    execute_process(COMMAND ${Python_EXECUTABLE} -c "import lbmpy; print(lbmpy.__version__)"
          RESULT_VARIABLE LBMPY_FOUND OUTPUT_VARIABLE LBMPY_VERSION)
     if(NOT LBMPY_FOUND EQUAL 0)
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py
index e8b28a4e5402d9122e6e4cf34c28248ca6e13b6b..d0e676f573c33880f7fe9e20f60700e51d36f28a 100644
--- a/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py
+++ b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py
@@ -93,7 +93,7 @@ with CodeGeneration() as ctx:
     g_updates = g_updates.new_with_substitutions(parameters.parameter_map())
 
     force_h = interface_tracking_force(C, stencil_phase, parameters)
-    hydro_force = hydrodynamic_force(g, C, method_hydro, parameters, body_force)
+    hydro_force = hydrodynamic_force(C, method_hydro, parameters, body_force)
 
     ####################
     # LBM UPDATE RULES #
@@ -108,7 +108,7 @@ with CodeGeneration() as ctx:
 
     phase_field_LB_step = sympy_cse(phase_field_LB_step)
     # ---------------------------------------------------------------------------------------------------------
-    force_Assignments = hydrodynamic_force_assignments(g, u, C, method_hydro, parameters, body_force)
+    force_Assignments = hydrodynamic_force_assignments(u, C, method_hydro, parameters, body_force)
 
     lbm_optimisation = LBMOptimisation(symbolic_field=g, symbolic_temporary_field=g_tmp)
     hydro_LB_step = create_lb_update_rule(lbm_config=lbm_config_hydro,
diff --git a/apps/showcases/CMakeLists.txt b/apps/showcases/CMakeLists.txt
index 0b655239106e3c076b00e1c071e89d9cffb7d4d7..1442bac94b94111f1b290edf4ed86a9b51b46419 100644
--- a/apps/showcases/CMakeLists.txt
+++ b/apps/showcases/CMakeLists.txt
@@ -7,13 +7,21 @@ add_subdirectory( LightRisingParticleInFluidAMR )
 add_subdirectory( Mixer )
 add_subdirectory( ParticlePacking )
 add_subdirectory( PegIntoSphereBed )
-if ( WALBERLA_BUILD_WITH_CODEGEN AND WALBERLA_BUILD_WITH_PYTHON )
-   add_subdirectory( PhaseFieldAllenCahn )
-endif()
-if ( WALBERLA_BUILD_WITH_CODEGEN AND NOT WALBERLA_BUILD_WITH_OPENMP)
-   add_subdirectory( PorousMedia )
-endif()
-if ( WALBERLA_BUILD_WITH_CODEGEN )
+if ( WALBERLA_BUILD_WITH_CODEGEN)
+
    add_subdirectory( Antidunes )
+
+   if (WALBERLA_BUILD_WITH_PYTHON)
+      add_subdirectory( PhaseFieldAllenCahn )
+   endif ()
+
+   if(NOT WALBERLA_BUILD_WITH_OPENMP)
+      add_subdirectory( PorousMedia )
+   endif()
+
+   if (WALBERLA_DOUBLE_ACCURACY)
+      add_subdirectory( Thermocapillary )
+   endif ()
+
 endif()
 
diff --git a/apps/showcases/PhaseFieldAllenCahn/CPU/multiphase_codegen.py b/apps/showcases/PhaseFieldAllenCahn/CPU/multiphase_codegen.py
index 0b117fc23e52770a065c7fa782d81ffdd3991a72..60a7f4c12022f89144e60e909471a9d05cff14c3 100644
--- a/apps/showcases/PhaseFieldAllenCahn/CPU/multiphase_codegen.py
+++ b/apps/showcases/PhaseFieldAllenCahn/CPU/multiphase_codegen.py
@@ -91,7 +91,7 @@ with CodeGeneration() as ctx:
     g_updates = initializer_kernel_hydro_lb(method_hydro, density_field, u, g)
 
     force_h = interface_tracking_force(C, stencil_phase, parameters)
-    hydro_force = hydrodynamic_force(g, C, method_hydro, parameters, body_force)
+    hydro_force = hydrodynamic_force(C, method_hydro, parameters, body_force)
 
     ####################
     # LBM UPDATE RULES #
@@ -103,7 +103,7 @@ with CodeGeneration() as ctx:
 
     allen_cahn_update_rule = add_interface_tracking_force(allen_cahn_update_rule, force_h)
     # ---------------------------------------------------------------------------------------------------------
-    force_Assignments = hydrodynamic_force_assignments(g, u, C, method_hydro, parameters, body_force)
+    force_Assignments = hydrodynamic_force_assignments(u, C, method_hydro, parameters, body_force)
 
     lbm_optimisation = LBMOptimisation(symbolic_field=g, symbolic_temporary_field=g_tmp)
     hydro_lb_update_rule = create_lb_update_rule(lbm_config=lbm_config_hydro,
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_codegen.py b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_codegen.py
index 471a3e9f4e5b1fda6eb877e00100df61ac6a8993..d83aeac4541a02d69b661bac4d672f067a533f3c 100644
--- a/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_codegen.py
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_codegen.py
@@ -93,7 +93,7 @@ with CodeGeneration() as ctx:
     g_updates = initializer_kernel_hydro_lb(method_hydro, 1, u, g)
 
     force_h = interface_tracking_force(C, stencil_phase, parameters)
-    hydro_force = hydrodynamic_force(g, C, method_hydro, parameters, body_force)
+    hydro_force = hydrodynamic_force(C, method_hydro, parameters, body_force)
 
     ####################
     # LBM UPDATE RULES #
@@ -111,7 +111,7 @@ with CodeGeneration() as ctx:
                                              Block(allen_cahn_update_rule.all_assignments),
                                              Block([SympyAssignment(C_tmp.center, C.center)]))])
     # ---------------------------------------------------------------------------------------------------------
-    force_Assignments = hydrodynamic_force_assignments(g, u, C, method_hydro, parameters, body_force)
+    force_Assignments = hydrodynamic_force_assignments(u, C, method_hydro, parameters, body_force)
 
     lbm_optimisation = LBMOptimisation(symbolic_field=g, symbolic_temporary_field=g_tmp)
     hydro_lb_update_rule = create_lb_update_rule(lbm_config=lbm_config_hydro,
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/taylor_bubble.py b/apps/showcases/PhaseFieldAllenCahn/GPU/taylor_bubble.py
index 9bac32e1b46874049188db142d8503aeb10848a1..c9df1dfda4e208845b9a1137ea3b17c321cb817d 100755
--- a/apps/showcases/PhaseFieldAllenCahn/GPU/taylor_bubble.py
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/taylor_bubble.py
@@ -118,13 +118,13 @@ class Scenario:
                                                         diameter_outer=d1,
                                                         diameter_inner=d2)
 
-        self.density_gas = parameters["density_light"]
-        self.surface_tension = parameters["surface_tension"]
+        self.density_gas = parameters.density_light
+        self.surface_tension = parameters.surface_tension
 
-        self.gravitational_acceleration = parameters["gravitational_acceleration"]
+        self.gravitational_acceleration = parameters.gravitational_acceleration
 
-        self.relaxation_time_liquid = parameters.get("relaxation_time_heavy")
-        self.relaxation_time_gas = parameters.get("relaxation_time_light")
+        self.relaxation_time_liquid = parameters.relaxation_time_heavy
+        self.relaxation_time_gas = parameters.relaxation_time_light
 
         self.cudaEnabledMpi = cuda_enabled_mpi
         self.cuda_blocks = (64, 2, 2)
diff --git a/apps/showcases/Thermocapillary/CMakeLists.txt b/apps/showcases/Thermocapillary/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a06ee8e3716e4e94017b9e1e50abad07f04a1df
--- /dev/null
+++ b/apps/showcases/Thermocapillary/CMakeLists.txt
@@ -0,0 +1,43 @@
+waLBerla_link_files_to_builddir(*.prm)
+waLBerla_link_files_to_builddir(*.py)
+waLBerla_link_files_to_builddir(*.obj)
+
+waLBerla_generate_target_from_python(NAME ThermocapillaryGen
+        FILE thermocapillary_codegen.py
+        OUT_FILES initialize_phase_field_distributions.${CODEGEN_FILE_SUFFIX} initialize_phase_field_distributions.h
+        initialize_velocity_based_distributions.${CODEGEN_FILE_SUFFIX} initialize_velocity_based_distributions.h
+        initialize_thermal_distributions.${CODEGEN_FILE_SUFFIX} initialize_thermal_distributions.h
+        phase_field_LB_step.${CODEGEN_FILE_SUFFIX} phase_field_LB_step.h
+        phase_field_LB_NoSlip.${CODEGEN_FILE_SUFFIX} phase_field_LB_NoSlip.h
+        hydro_LB_step.${CODEGEN_FILE_SUFFIX} hydro_LB_step.h
+        hydro_LB_NoSlip.${CODEGEN_FILE_SUFFIX} hydro_LB_NoSlip.h
+        hydro_LB_UBB.${CODEGEN_FILE_SUFFIX} hydro_LB_UBB.h
+        thermal_LB_step.${CODEGEN_FILE_SUFFIX} thermal_LB_step.h
+        thermal_LB_DiffusionDirichlet_static.${CODEGEN_FILE_SUFFIX} thermal_LB_DiffusionDirichlet_static.h
+        thermal_LB_DiffusionDirichlet_dynamic.${CODEGEN_FILE_SUFFIX} thermal_LB_DiffusionDirichlet_dynamic.h
+        thermal_LB_NeumannByCopy.${CODEGEN_FILE_SUFFIX} thermal_LB_NeumannByCopy.h
+        initialize_rk2.${CODEGEN_FILE_SUFFIX} initialize_rk2.h
+        rk2_first_stage.${CODEGEN_FILE_SUFFIX} rk2_first_stage.h
+        rk2_second_stage.${CODEGEN_FILE_SUFFIX} rk2_second_stage.h
+        initialize_rk4.${CODEGEN_FILE_SUFFIX} initialize_rk4.h
+        rk4_first_stage.${CODEGEN_FILE_SUFFIX} rk4_first_stage.h
+        rk4_second_stage.${CODEGEN_FILE_SUFFIX} rk4_second_stage.h
+        rk4_third_stage.${CODEGEN_FILE_SUFFIX} rk4_third_stage.h
+        rk4_fourth_stage.${CODEGEN_FILE_SUFFIX} rk4_fourth_stage.h
+        PackInfo_phase_field_distributions.${CODEGEN_FILE_SUFFIX} PackInfo_phase_field_distributions.h
+        PackInfo_velocity_based_distributions.${CODEGEN_FILE_SUFFIX} PackInfo_velocity_based_distributions.h
+        PackInfo_thermal_field_distributions.${CODEGEN_FILE_SUFFIX} PackInfo_thermal_field_distributions.h
+        PackInfo_phase_field.${CODEGEN_FILE_SUFFIX} PackInfo_phase_field.h
+        PackInfo_temperature_field.${CODEGEN_FILE_SUFFIX} PackInfo_temperature_field.h
+        ContactAngle.${CODEGEN_FILE_SUFFIX} ContactAngle.h
+        GenDefines.h)
+
+if ( WALBERLA_BUILD_WITH_GPU_SUPPORT )
+    waLBerla_add_executable(NAME thermocapillary
+            FILES thermocapillary.cpp PythonExports.cpp InitializerFunctions.cpp thermocapillary_codegen.py
+            DEPENDS blockforest core gpu field postprocessing python_coupling lbm geometry timeloop ThermocapillaryGen)
+else ()
+    waLBerla_add_executable(NAME thermocapillary
+            FILES thermocapillary.cpp PythonExports.cpp InitializerFunctions.cpp thermocapillary_codegen.py
+            DEPENDS blockforest core field postprocessing python_coupling lbm geometry timeloop ThermocapillaryGen)
+endif ( WALBERLA_BUILD_WITH_GPU_SUPPORT )
diff --git a/apps/showcases/Thermocapillary/InitializerFunctions.cpp b/apps/showcases/Thermocapillary/InitializerFunctions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..47b147497d73ee8cdfb327068fe0c67fc2df6c14
--- /dev/null
+++ b/apps/showcases/Thermocapillary/InitializerFunctions.cpp
@@ -0,0 +1,80 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file InitializerFunctions.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "InitializerFunctions.h"
+
+namespace walberla
+{
+using ScalarField_T = GhostLayerField< real_t, 1 >;
+using FlagField_T  = FlagField< uint8_t >;
+
+void initPhaseFieldDroplet(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID phaseFieldID,
+                           const real_t dropletRadius                         = real_c(10.0),
+                           const Vector3< real_t > dropletMidPoint = Vector3< real_t >(0.0, 0.0, 0.0),
+                           const real_t W)
+{
+   for (auto& block : *blocks)
+   {
+      auto phaseField = block.getData< ScalarField_T >(phaseFieldID);
+      // clang-format off
+      WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(phaseField, Cell globalCell;
+         blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
+         real_t Ri = sqrt((real_c(globalCell[0]) - dropletMidPoint[0]) * (real_c(globalCell[0]) - dropletMidPoint[0]) +
+                          (real_c(globalCell[1]) - dropletMidPoint[1]) * (real_c(globalCell[1]) - dropletMidPoint[1]) +
+                          (real_c(globalCell[2]) - dropletMidPoint[2]) * (real_c(globalCell[2]) - dropletMidPoint[2]));
+         phaseField->get(x, y, z)        = real_c(0.5) - real_c(0.5) * real_c(tanh(2.0 * (Ri - dropletRadius) / W));
+      )
+      // clang-format on
+   }
+}
+
+void initMicroChannel(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID phaseFieldID, BlockDataID temperatureFieldID,
+                           const real_t Th, const real_t T0, const real_t Tc, const real_t W)
+{
+   auto halfX          = real_c(blocks->getDomainCellBB().xMax()) / real_c(2.0);
+   auto halfY          = real_c((blocks->getDomainCellBB().yMax())) / real_c(2.0);
+   for (auto& block : *blocks)
+   {
+      auto phaseField = block.getData< ScalarField_T >(phaseFieldID);
+      // clang-format off
+      WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(phaseField, Cell globalCell;
+         blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
+         phaseField->get(x, y, z)     = real_c(0.5) + real_c(0.5) * real_c(tanh(   (real_c(globalCell[1]) - halfY) / (W / real_c(2.0))   ));
+      )
+      // clang-format on
+
+      auto temperatureField = block.getData< ScalarField_T >(temperatureFieldID);
+      // clang-format off
+      WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(temperatureField, Cell globalCell;
+         blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
+         if(globalCell[1] == -1)
+         {
+            auto X = (globalCell[0] < 0) ? 0.0 : globalCell[0];
+            temperatureField->get(x, y, z)     = Th + T0 * cos( (math::pi / halfX) * (X - halfX) );
+         }
+         else
+         {
+            temperatureField->get(x, y, z)     = Tc;
+         }
+      )
+      // clang-format on
+   }
+}
+
+} // namespace walberla
diff --git a/apps/showcases/Thermocapillary/InitializerFunctions.h b/apps/showcases/Thermocapillary/InitializerFunctions.h
new file mode 100644
index 0000000000000000000000000000000000000000..dd2cb5964ceb44d19ec815e1fb240e7169d11d1a
--- /dev/null
+++ b/apps/showcases/Thermocapillary/InitializerFunctions.h
@@ -0,0 +1,41 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file InitializerFunctions.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#pragma once
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/math/Constants.h"
+
+#include "field/FlagField.h"
+#include "field/communication/PackInfo.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "python_coupling/DictWrapper.h"
+
+namespace walberla
+{
+void initPhaseFieldDroplet(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID phaseFieldID, real_t dropletRadius,
+                           Vector3< real_t > dropletMidPoint, const real_t W = real_c(5.0));
+
+void initMicroChannel(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID phaseFieldID, BlockDataID temperatureFieldID,
+                      const real_t Th, const real_t T0, const real_t Tc, const real_t W = real_c(5.0));
+
+
+} // namespace walberla
diff --git a/apps/showcases/Thermocapillary/PythonExports.cpp b/apps/showcases/Thermocapillary/PythonExports.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9906af03cd9af5ab6d37089b43102a4f630566ad
--- /dev/null
+++ b/apps/showcases/Thermocapillary/PythonExports.cpp
@@ -0,0 +1,55 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PythonExports.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "waLBerlaDefinitions.h"
+#ifdef WALBERLA_BUILD_WITH_PYTHON
+
+#   include "core/math/Constants.h"
+
+#   include "field/communication/PackInfo.h"
+
+#   include "python_coupling/Manager.h"
+#   include "python_coupling/export/BlockForestExport.h"
+#   include "python_coupling/export/FieldExports.h"
+
+namespace walberla {
+    using flag_t = uint8_t;
+    // clang-format off
+    void exportDataStructuresToPython() {
+
+        auto pythonManager = python_coupling::Manager::instance();
+
+        // Field
+        pythonManager->addExporterFunction(field::exportModuleToPython<Field<walberla::real_t, 1>, Field<walberla::real_t, 2>, Field<walberla::real_t, 3>, Field<walberla::real_t, 9>, Field<walberla::real_t, 19>, Field<walberla::real_t, 27>>);
+        pythonManager->addExporterFunction(field::exportGatherFunctions<Field<walberla::real_t, 1>, Field<walberla::real_t, 2>, Field<walberla::real_t, 3>, Field<walberla::real_t, 9>, Field<walberla::real_t, 19>, Field<walberla::real_t, 27>>);
+        pythonManager->addBlockDataConversion<Field<walberla::real_t, 1>, Field<walberla::real_t, 2>, Field<walberla::real_t, 3>, Field<walberla::real_t, 9>, Field<walberla::real_t, 19>, Field<walberla::real_t, 27>>();
+
+        // Blockforest
+        pythonManager->addExporterFunction(blockforest::exportModuleToPython<stencil::D2Q9, stencil::D3Q19, stencil::D3Q27>);
+    }
+   // clang-format on
+}
+
+#else
+
+namespace walberla {
+   void exportDataStructuresToPython() {}
+} // namespace walberla
+
+#endif
\ No newline at end of file
diff --git a/apps/showcases/Thermocapillary/PythonExports.h b/apps/showcases/Thermocapillary/PythonExports.h
new file mode 100644
index 0000000000000000000000000000000000000000..df5ff959a99e37be3a48f84d65e123f716126f2e
--- /dev/null
+++ b/apps/showcases/Thermocapillary/PythonExports.h
@@ -0,0 +1,26 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PythonExports.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+namespace walberla
+{
+void exportDataStructuresToPython();
+} // namespace walberla
diff --git a/apps/showcases/Thermocapillary/benchmark.py b/apps/showcases/Thermocapillary/benchmark.py
new file mode 100755
index 0000000000000000000000000000000000000000..6493eda5f56378c1d08d3814efcbbac6a7e57f09
--- /dev/null
+++ b/apps/showcases/Thermocapillary/benchmark.py
@@ -0,0 +1,213 @@
+import os
+import sys
+import pandas as pd
+
+import waLBerla as wlb
+from waLBerla.tools.config import block_decomposition
+from waLBerla.tools.sqlitedb import sequenceValuesToScalars
+
+
+def num_time_steps(block_size, time_steps_for_256_block=50):
+    # Number of time steps run for a workload of 256^3 cells per process
+    # if double as many cells are on the process, half as many time steps are run etc.
+    # increase this to get more reliable measurements
+    cells = block_size[0] * block_size[1] * block_size[2]
+    time_steps = (256 ** 3 / cells) * time_steps_for_256_block
+    if time_steps < 10:
+        time_steps = 10
+    return int(time_steps)
+
+
+class Scenario:
+    def __init__(self, cells=(256, 256, 256), heat_solver_rk_or_lbm=False,
+                 weak_scaling=False, time_step_strategy='NormalTimestep',
+                 cuda_enabled_mpi=False, cuda_blocks=(256, 1, 1), timesteps=None, rk_solver_order=2):
+        # simulation parameters
+        self.timesteps = timesteps if timesteps else num_time_steps(cells)
+
+        self.cells = cells
+        self.blocks = block_decomposition(wlb.mpi.numProcesses())
+        self.periodic = (1, 0, 0)
+
+        self.heat_solver_rk_or_lbm = heat_solver_rk_or_lbm
+        self.weak_scaling = weak_scaling
+        self.time_step_strategy = time_step_strategy
+        self.rk_solver_order = rk_solver_order
+
+        # GPU specific parameters will be neglected for benchmarks on CPU
+        self.cuda_enabled_mpi = cuda_enabled_mpi
+        self.cuda_blocks = cuda_blocks
+
+        self.config_dict = self.config()
+
+    @wlb.member_callback
+    def config(self):
+        return {
+            'DomainSetup': {
+                'blocks': self.blocks,
+                'cellsPerBlock': self.cells,
+                'periodic': self.periodic,
+                'weakScaling': self.weak_scaling,
+            },
+            'Parameters': {
+                'timesteps': self.timesteps,
+                'pre_thermal_timesteps': 0,
+                'HeatSolverRKOrLBM': self.heat_solver_rk_or_lbm,
+                'orderRKSolver': self.rk_solver_order,
+                'vtkWriteFrequency': 0,
+                'dbWriteFrequency': 0,
+                'gpuBlockSize': self.cuda_blocks,
+                'gpuEnabledMpi': self.cuda_enabled_mpi
+            },
+            'PhysicalParameters': {
+                'density_liquid': 1.0,
+                'density_gas': 1.0,
+                'sigma_ref': 0.025,
+                'sigma_t': -5e-4,
+                'mobility': 0.05,
+                'temperature_ref': 10,
+                'heat_conductivity_liquid': 0.2,
+                'heat_conductivity_gas': 0.2,
+                'relaxation_time_liquid': 1.1,
+                'relaxation_time_gas': 1.1,
+                'interface_thickness': 4,
+                'velocity_wall': 0
+            },
+            'BoundariesAllenCahn': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'T', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'B', 'walldistance': -1, 'flag': 'NoSlip'},
+                ],
+            },
+            'BoundariesHydro': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'T', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'B', 'walldistance': -1, 'flag': 'NoSlip'},
+                ],
+            },
+            'BoundariesThermal': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'DiffusionDirichletStatic'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'DiffusionDirichletDynamic'},
+                    {'direction': 'T', 'walldistance': -1, 'flag': 'NeumannByCopy'},
+                    {'direction': 'B', 'walldistance': -1, 'flag': 'NeumannByCopy'},
+                ],
+            },
+            'Benchmark': {
+                'timeStepStrategy': self.time_step_strategy
+            }
+        }
+
+    @wlb.member_callback
+    def write_benchmark_results(self, **kwargs):
+        data = {'timesteps': self.timesteps,
+                'cells_per_block_0': self.cells[0],
+                'cells_per_block_1': self.cells[1],
+                'cells_per_block_2': self.cells[2],
+                'blocks_0': self.blocks[0],
+                'blocks_1': self.blocks[1],
+                'blocks_2': self.blocks[2],
+                'cuda_blocks_0': self.cuda_blocks[0],
+                'cuda_blocks_1': self.cuda_blocks[1],
+                'cuda_blocks_2': self.cuda_blocks[2],
+                'stencil_phase': kwargs.get('stencil_phase'),
+                'stencil_hydro': kwargs.get('stencil_hydro'),
+                'stencil_thermal': kwargs.get('stencil_thermal'),
+                'collision_space_phase': kwargs.get('collision_space_phase'),
+                'collision_space_hydro': kwargs.get('collision_space_hydro'),
+                'collision_space_thermal': kwargs.get('collision_space_thermal'),
+                'field_type': kwargs.get('field_type'),
+                'field_type_pdfs': kwargs.get('field_type_pdfs'),
+                'number_of_processes': kwargs.get('number_of_processes'),
+                'threads': kwargs.get('threads'),
+                'MLUPS': kwargs.get('MLUPS'),
+                'MLUPS_process': kwargs.get('MLUPS_process'),
+                'timeStepsPerSecond': kwargs.get('timeStepsPerSecond'),
+                'timeStepStrategy': self.time_step_strategy,
+                'thermalSolver': "Runge Kutta" if self.heat_solver_rk_or_lbm else "lattice Boltzmann",
+                'RKSolverOrder': self.rk_solver_order}
+
+        data.update(self.config_dict['PhysicalParameters'])
+        data['executable'] = sys.argv[0]
+        data['compile_flags'] = wlb.build_info.compiler_flags
+        data['walberla_version'] = wlb.build_info.version
+        data['build_machine'] = wlb.build_info.build_machine
+
+        sequenceValuesToScalars(data)
+
+        csv_file = f"thermocapillary_benchmark.csv"
+
+        df = pd.DataFrame.from_records([data])
+        if not os.path.isfile(csv_file):
+            df.to_csv(csv_file, index=False, sep=';')
+        else:
+            df.to_csv(csv_file, index=False, mode='a', header=False, sep=';')
+
+
+def profiling():
+    scenarios = wlb.ScenarioManager()
+    cuda_enabled_mpi = False
+    time_step_strategy = "PhaseOnly"  # , "HydroOnly", "ThermalOnly", "KernelOnly"
+    heat_solver_rk_or_lbm = True
+
+    cells_per_block = (256, 256, 256)
+    cuda_blocks = (128, 1, 1)
+    scenarios.add(Scenario(cells=cells_per_block,
+                           heat_solver_rk_or_lbm=heat_solver_rk_or_lbm,
+                           time_step_strategy=time_step_strategy,
+                           cuda_enabled_mpi=cuda_enabled_mpi,
+                           cuda_blocks=cuda_blocks,
+                           timesteps=2))
+
+
+def benchmark():
+    scenarios = wlb.ScenarioManager()
+    cuda_enabled_mpi = True
+    cells = (512, 256, 256)
+
+    for i in range(3):
+        scenarios.add(Scenario(cells=cells,
+                               heat_solver_rk_or_lbm=False,
+                               rk_solver_order=2,
+                               time_step_strategy="NormalTimestep",
+                               cuda_enabled_mpi=cuda_enabled_mpi,
+                               cuda_blocks=(64, 2, 1),
+                               timesteps=100))
+
+    # for rk_solver_order in (2, 4):
+    #     scenarios.add(Scenario(cells=cells,
+    #                            heat_solver_rk_or_lbm=True,
+    #                            rk_solver_order=rk_solver_order,
+    #                            time_step_strategy="NormalTimestep",
+    #                            cuda_enabled_mpi=cuda_enabled_mpi,
+    #                            cuda_blocks=(64, 2, 1),
+    #                            timesteps=100))
+
+
+def single_kernel_benchmark():
+    scenarios = wlb.ScenarioManager()
+    cuda_enabled_mpi = False
+
+    cells_per_block = [(i, i, i) for i in (384, )]
+    cuda_blocks = [(64, 1, 1), (128, 1, 1), (256, 1, 1),
+                   (64, 2, 1), (128, 2, 1),
+                   (64, 4, 1)]
+    for heat_solver_rk_or_lbm in (True, ):
+        for time_step_strategy in ("PhaseOnly", "HydroOnly", "ThermalOnly", "KernelOnly"):
+            for cells in cells_per_block:
+                for cuda_block_size in cuda_blocks:
+                    scenarios.add(Scenario(cells=cells,
+                                           heat_solver_rk_or_lbm=heat_solver_rk_or_lbm,
+                                           rk_solver_order=2,
+                                           time_step_strategy=time_step_strategy,
+                                           cuda_enabled_mpi=cuda_enabled_mpi,
+                                           cuda_blocks=cuda_block_size))
+
+
+# profiling()
+benchmark()
+# single_kernel_benchmark()
diff --git a/apps/showcases/Thermocapillary/droplet_migration2D.py b/apps/showcases/Thermocapillary/droplet_migration2D.py
new file mode 100755
index 0000000000000000000000000000000000000000..583296f57713a912763195343cceea2c2aa35ff5
--- /dev/null
+++ b/apps/showcases/Thermocapillary/droplet_migration2D.py
@@ -0,0 +1,215 @@
+import os
+import shutil
+
+import numpy as np
+from scipy import integrate
+from matplotlib import pyplot as plt
+import pandas as pd
+
+from lbmpy.phasefield_allen_cahn.parameter_calculation import calculate_parameters_droplet_migration
+
+import waLBerla as wlb
+from waLBerla.core_extension import makeSlice
+from waLBerla.tools.sqlitedb import sequenceValuesToScalars
+
+
+class Scenario:
+    def __init__(self):
+        self.dropletRadius = 32
+        self.dropletMidPoint = (65, 0, 0)
+
+        # simulation parameters
+        self.cells = (8 * self.dropletRadius, 2 * self.dropletRadius, 1)
+        self.blocks = (1, 1, 1)
+        self.periodic = (1, 0, 0)
+        self.size = (self.cells[0] * self.blocks[0],
+                     self.cells[1] * self.blocks[1],
+                     self.cells[2] * self.blocks[2])
+
+        self.initial_temperature = 0
+
+        self.contact_angle = os.environ.get('CONTACT_ANGLE', 90)
+
+        self.capillary_number = 0.01
+        self.reynolds_number = 0.16
+        self.marangoni_number = 0.08
+        self.peclet_number = 1.0
+
+        self.sigma_ref = 5e-3
+        self.heat_ratio = 1
+        self.interface_width = 4
+        self.viscosity_ratio = 1
+
+        self.parameters = calculate_parameters_droplet_migration(radius=self.dropletRadius,
+                                                                 reynolds_number=self.reynolds_number,
+                                                                 capillary_number=self.capillary_number,
+                                                                 marangoni_number=self.marangoni_number,
+                                                                 peclet_number=self.peclet_number,
+                                                                 viscosity_ratio=self.viscosity_ratio,
+                                                                 heat_ratio=self.heat_ratio,
+                                                                 interface_width=self.interface_width,
+                                                                 reference_surface_tension=self.sigma_ref)
+
+        self.timesteps = self.parameters.reference_time * 100
+        self.pre_thermal_timesteps = self.parameters.reference_time
+        self.vtkWriteFrequency = 0
+        self.dbWriteFrequency = self.parameters.reference_time
+
+        self.config_dict = self.config()
+
+    @wlb.member_callback
+    def config(self):
+        return {
+            'DomainSetup': {
+                'blocks': self.blocks,
+                'cellsPerBlock': self.cells,
+                'periodic': self.periodic,
+            },
+            'Parameters': {
+                'timesteps': self.timesteps,
+                'pre_thermal_timesteps': self.pre_thermal_timesteps,
+                'vtkWriteFrequency': self.vtkWriteFrequency,
+                'dbWriteFrequency': self.dbWriteFrequency,
+                'remainingTimeLoggerFrequency': 10.0,
+                'gpuBlockSize': (128,  1, 1),
+                'gpuEnabledMpi': False
+            },
+            'PhysicalParameters': {
+                'density_liquid': self.parameters.density_heavy,
+                'density_gas': self.parameters.density_light,
+                'sigma_ref': self.parameters.sigma_ref,
+                'sigma_t': self.parameters.sigma_t,
+                'mobility': self.parameters.mobility,
+                'temperature_ref': self.parameters.tmp_ref,
+                'heat_conductivity_liquid': self.parameters.heat_conductivity_heavy,
+                'heat_conductivity_gas': self.parameters.heat_conductivity_light,
+                'relaxation_time_liquid': self.parameters.relaxation_time_heavy,
+                'relaxation_time_gas': self.parameters.relaxation_time_light,
+                'interface_thickness': self.parameters.interface_thickness,
+                'velocity_wall': 0.001,
+                'contact_angle': self.contact_angle
+            },
+            'BoundariesAllenCahn': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
+                ],
+            },
+            'BoundariesHydro': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'UBB'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
+                ],
+            },
+            'BoundariesThermal': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'DiffusionDirichlet'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'DiffusionDirichlet'},
+                    {'direction': 'W', 'walldistance': -1, 'flag': 'NeumannByCopy'},
+                    {'direction': 'E', 'walldistance': -1, 'flag': 'NeumannByCopy'},
+                ],
+            },
+            'Droplet': {
+                'dropletRadius': self.dropletRadius,
+                'dropletMidPoint': self.dropletMidPoint,
+            },
+            'HeatSource': {
+                'heatSourceMidPointOne': (181, 21, 0),
+                'heatSourceMidPointTwo': (181, 21, 0),
+                'ws': 6,
+                'sizeDiffusedHotspot': 8,
+                'maximumHeatFluxOne': 0.2,
+                'maximumHeatFluxTwo': 0
+            },
+        }
+
+    @wlb.member_callback
+    def at_end_of_time_step(self, blocks, **kwargs):
+        t = kwargs['timeStep']
+        velocity_field_wlb = wlb.field.gather(blocks, 'vel', makeSlice[:, :, :])
+        temperature_field_wlb = wlb.field.gather(blocks, 'temperature', makeSlice[:, :, :])
+        phase_field_wlb = wlb.field.gather(blocks, 'phase', makeSlice[:, :, :])
+
+        if temperature_field_wlb:
+            velocity_field = np.asarray(velocity_field_wlb).squeeze()
+            temperature_field = np.asarray(temperature_field_wlb).squeeze()
+            phase_field = np.asarray(phase_field_wlb).squeeze()
+
+            path = f"results_contact_angle_{int(self.contact_angle)}_degree"
+
+            if t == 0:
+                if not os.path.exists(path):
+                    os.makedirs(path)
+                else:
+                    shutil.rmtree(path)
+                    os.makedirs(path)
+
+            xx, yy = np.meshgrid(np.arange(phase_field.shape[0]), np.arange(phase_field.shape[1]))
+
+            intx = integrate.cumtrapz(velocity_field[:, :, 1].T, xx, axis=1, initial=0)[0]
+            inty = integrate.cumtrapz(velocity_field[:, :, 0].T, yy, axis=0, initial=0)
+            psi1 = intx-inty
+            intx = integrate.cumtrapz(velocity_field[:, :, 1].T, xx, axis=1, initial=0)
+            inty = integrate.cumtrapz(velocity_field[:, :, 0].T, yy, axis=0, initial=0)[:, 0][:, None]
+            psi2 = intx - inty
+
+            psi = 0.5 * (psi1 + psi2)
+
+            fig, ax = plt.subplots()
+            fig.set_figheight(8)
+            fig.set_figwidth(32)
+            ax.contour(xx, yy, psi, levels=50, cmap="RdBu")
+            ax.contour(xx, yy, phase_field[:, :].T, levels=[0.5, ], colors=['k'], linewidths=[4, ])
+            ax.contour(xx, yy, temperature_field[:, :].T, colors=['g'])
+            plt.grid()
+            plt.ylim((0, phase_field.shape[1]))
+            plt.xlim((0, phase_field.shape[0]))
+            plt.yticks(fontsize=24)
+            plt.xticks(fontsize=24)
+
+            plt.savefig(f'{path}/droplet{t}.png', dpi=300)
+            plt.close('all')
+
+            location_of_droplet = np.where(phase_field > 0.5)
+            center_of_mass = np.mean(location_of_droplet, axis=1)
+
+            data = {'timestep': t}
+            data.update({'total_timesteps': self.timesteps})
+            data.update({'pre_thermal_timesteps': self.pre_thermal_timesteps})
+            data.update({'center_of_mass_x': center_of_mass[0]})
+            data.update({'center_of_mass_y': center_of_mass[1]})
+            data.update({'capillary_number': self.capillary_number})
+            data.update({'reynolds_number': self.reynolds_number})
+            data.update({'marangoni_number': self.marangoni_number})
+            data.update({'peclet_number': self.peclet_number})
+            data.update({'viscosity_ratio': self.viscosity_ratio})
+            data.update(self.config_dict['PhysicalParameters'])
+
+            stencil_phase = kwargs.get('stencil_phase')
+            stencil_hydro = kwargs.get('stencil_hydro')
+            stencil_thermal = kwargs.get('stencil_thermal')
+            collision_space_phase = kwargs.get('collision_space_phase')
+            collision_space_hydro = kwargs.get('collision_space_hydro')
+            collision_space_thermal = kwargs.get('collision_space_thermal')
+
+            data.update({'stencil_phase': stencil_phase})
+            data.update({'stencil_hydro': stencil_hydro})
+            data.update({'stencil_thermal': stencil_thermal})
+            data.update({'collision_space_phase': collision_space_phase})
+            data.update({'collision_space_hydro': collision_space_hydro})
+            data.update({'collision_space_thermal': collision_space_thermal})
+            data.update({'contact_angle': self.contact_angle})
+
+            sequenceValuesToScalars(data)
+
+            csv_file = f"{path}/results.csv"
+
+            df = pd.DataFrame.from_records([data])
+            if not os.path.isfile(csv_file):
+                df.to_csv(csv_file, index=False, sep=';')
+            else:
+                df.to_csv(csv_file, index=False, mode='a', header=False, sep=';')
+
+
+scenarios = wlb.ScenarioManager()
+scenarios.add(Scenario())
diff --git a/apps/showcases/Thermocapillary/droplet_migration3D.py b/apps/showcases/Thermocapillary/droplet_migration3D.py
new file mode 100755
index 0000000000000000000000000000000000000000..e89727a778ee2ca5d3d34a7bec11f7b97aa0e985
--- /dev/null
+++ b/apps/showcases/Thermocapillary/droplet_migration3D.py
@@ -0,0 +1,252 @@
+import os
+import shutil
+
+import numpy as np
+from scipy import integrate
+from matplotlib import pyplot as plt
+import pandas as pd
+
+from lbmpy.phasefield_allen_cahn.parameter_calculation import calculate_parameters_droplet_migration
+
+import waLBerla as wlb
+from waLBerla.core_extension import makeSlice
+from waLBerla.tools.sqlitedb import sequenceValuesToScalars
+
+
+class Scenario:
+    def __init__(self):
+        self.dropletRadius = 32
+
+        self.cells = (6 * self.dropletRadius, 2 * self.dropletRadius, 4 * self.dropletRadius)
+        self.blocks = (2, 1, 2)
+        self.periodic = (1, 0, 0)
+        self.size = (self.cells[0] * self.blocks[0],
+                     self.cells[1] * self.blocks[1],
+                     self.cells[2] * self.blocks[2])
+
+        self.dropletMidPoint = (65, 0, self.size[2] // 2)
+
+        self.initial_temperature = 0
+
+        self.contact_angle = os.environ.get('CONTACT_ANGLE', 90)
+        self.maximum_heat_flux = float(os.environ.get('MAXIMUM_HEAT_FLUX', 0.2))
+        single_heat_source = True
+
+        self.heat_source_z_shift = 0  # int(self.dropletRadius * (2.0/3.0))
+        self.maximum_heat_flux_1 = self.maximum_heat_flux
+        self.maximum_heat_flux_2 = 0 if single_heat_source else self.maximum_heat_flux
+
+        self.capillary_number = 0.01
+        self.reynolds_number = 0.16
+        self.marangoni_number = 0.08
+        self.peclet_number = 1.0
+
+        self.sigma_ref = 5e-3
+        self.heat_ratio = 1
+        self.interface_width = 4
+        self.viscosity_ratio = 1
+
+        self.parameters = calculate_parameters_droplet_migration(radius=self.dropletRadius,
+                                                                 reynolds_number=self.reynolds_number,
+                                                                 capillary_number=self.capillary_number,
+                                                                 marangoni_number=self.marangoni_number,
+                                                                 peclet_number=self.peclet_number,
+                                                                 viscosity_ratio=self.viscosity_ratio,
+                                                                 heat_ratio=self.heat_ratio,
+                                                                 interface_width=self.interface_width,
+                                                                 reference_surface_tension=self.sigma_ref)
+
+        self.timesteps = 1 + self.parameters.reference_time * 100
+        self.pre_thermal_timesteps = self.parameters.reference_time
+        self.vtkWriteFrequency = 0
+        self.dbWriteFrequency = self.parameters.reference_time
+
+        # use either finite difference (4th order RK scheme) or lattice Boltzmann to solve the heat equation
+        self.heat_solver_fd = True  # "lattice Boltzmann"
+        self.config_dict = self.config()
+
+    @wlb.member_callback
+    def config(self):
+        return {
+            'DomainSetup': {
+                'blocks': self.blocks,
+                'cellsPerBlock': self.cells,
+                'periodic': self.periodic,
+            },
+            'Parameters': {
+                'timesteps': self.timesteps,
+                'pre_thermal_timesteps': self.pre_thermal_timesteps,
+                'heat_solver_fd': self.heat_solver_fd,
+                'vtkWriteFrequency': self.vtkWriteFrequency,
+                'dbWriteFrequency': self.dbWriteFrequency,
+                'meshWriteFrequency': 0,
+                'remainingTimeLoggerFrequency': 60.0,
+                'gpuEnabledMpi': False
+            },
+            'PhysicalParameters': {
+                'density_liquid': self.parameters.density_heavy,
+                'density_gas': self.parameters.density_light,
+                'sigma_ref': self.parameters.sigma_ref,
+                'sigma_t': self.parameters.sigma_t,
+                'mobility': self.parameters.mobility,
+                'temperature_ref': self.parameters.tmp_ref,
+                'heat_conductivity_liquid': self.parameters.heat_conductivity_heavy,
+                'heat_conductivity_gas': self.parameters.heat_conductivity_light,
+                'relaxation_time_liquid': self.parameters.relaxation_time_heavy,
+                'relaxation_time_gas': self.parameters.relaxation_time_light,
+                'interface_thickness': self.parameters.interface_thickness,
+                'velocity_wall': 0.001,
+                'contact_angle': self.contact_angle
+            },
+            'BoundariesAllenCahn': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'T', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'B', 'walldistance': -1, 'flag': 'NoSlip'},
+                ],
+            },
+            'BoundariesHydro': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'UBB'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'T', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'B', 'walldistance': -1, 'flag': 'NoSlip'},
+                ],
+            },
+            'BoundariesThermal': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'DiffusionDirichlet'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'DiffusionDirichlet'},
+                    {'direction': 'W', 'walldistance': -1, 'flag': 'NeumannByCopy'},
+                    {'direction': 'E', 'walldistance': -1, 'flag': 'NeumannByCopy'},
+                    {'direction': 'T', 'walldistance': -1, 'flag': 'DiffusionDirichlet'},
+                    {'direction': 'B', 'walldistance': -1, 'flag': 'DiffusionDirichlet'},
+                ],
+            },
+            'Droplet': {
+                'dropletRadius': self.dropletRadius,
+                'dropletMidPoint': self.dropletMidPoint,
+            },
+            'HeatSource': {
+                'heatSourceMidPointOne': (181, 21, self.size[2] // 2 + self.heat_source_z_shift),
+                'heatSourceMidPointTwo': (181, 21, self.size[2] // 2 - self.heat_source_z_shift),
+                'ws': 6,
+                'sizeDiffusedHotspot': 8,
+                'maximumHeatFluxOne': self.maximum_heat_flux_1,
+                'maximumHeatFluxTwo': self.maximum_heat_flux_2
+            },
+        }
+
+    @wlb.member_callback
+    def at_end_of_time_step(self, blocks, **kwargs):
+        t = kwargs['timeStep']
+        velocity_field_wlb = wlb.field.gather(blocks, 'vel', makeSlice[:, :, self.size[2] // 2])
+        temperature_field_wlb_xy = wlb.field.gather(blocks, 'temperature', makeSlice[:, :, self.size[2] // 2])
+        temperature_field_wlb_xz = wlb.field.gather(blocks, 'temperature', makeSlice[:, 21, :])
+        phase_field_wlb = wlb.field.gather(blocks, 'phase', makeSlice[:, :, :])
+
+        if temperature_field_wlb_xy:
+            velocity_field = np.asarray(velocity_field_wlb).squeeze()
+            temperature_field_xy = np.asarray(temperature_field_wlb_xy).squeeze()
+            temperature_field_xz = np.asarray(temperature_field_wlb_xz).squeeze()
+            phase_field = np.asarray(phase_field_wlb).squeeze()
+            
+            var = int(100 * self.maximum_heat_flux)
+            path = f"results_contact_angle_{int(self.contact_angle)}_degree_heat_flux_{var}"
+
+            if t == 0:
+                if not os.path.exists(path):
+                    os.makedirs(path)
+                else:
+                    shutil.rmtree(path)
+                    os.makedirs(path)
+
+            location_of_droplet = np.where(phase_field > 0.5)
+            center_of_mass = np.mean(location_of_droplet, axis=1)
+
+            xx, yy = np.meshgrid(np.arange(phase_field.shape[0]), np.arange(phase_field.shape[1]))
+
+            intx = integrate.cumtrapz(velocity_field[:, :, 1].T, xx, axis=1, initial=0)[0]
+            inty = integrate.cumtrapz(velocity_field[:, :, 0].T, yy, axis=0, initial=0)
+            psi1 = intx-inty
+            intx = integrate.cumtrapz(velocity_field[:, :, 1].T, xx, axis=1, initial=0)
+            inty = integrate.cumtrapz(velocity_field[:, :, 0].T, yy, axis=0, initial=0)[:, 0][:, None]
+            psi2 = intx - inty
+
+            psi = 0.5 * (psi1 + psi2)
+
+            fig, ax = plt.subplots()
+            fig.set_figheight(4)
+            fig.set_figwidth(32)
+            ax.contour(xx, yy, psi, levels=50, cmap="RdBu")
+            ax.contour(xx, yy, phase_field[:, :, self.size[2] // 2].T, levels=[0.5, ], colors=['k'], linewidths=[4, ])
+            ax.contour(xx, yy, temperature_field_xy[:, :].T, colors=['g'])
+            plt.grid()
+            plt.xticks(fontsize=40)
+            plt.yticks(fontsize=40)
+            plt.ylim((0, phase_field.shape[1]))
+            plt.xlim((0, phase_field.shape[0]))
+
+            plt.savefig(f'{path}/dropletXY{t}.png', dpi=300)
+            plt.close('all')
+
+            xx, zz = np.meshgrid(np.arange(phase_field.shape[0]), np.arange(phase_field.shape[2]))
+
+            fig, ax = plt.subplots()
+            fig.set_figheight(16)
+            fig.set_figwidth(32)
+            ax.contour(xx, zz, phase_field[:, int(center_of_mass[1]), :].T, levels=[0.5, ], colors=['k'], linewidths=[4, ])
+            ax.contour(xx, zz, temperature_field_xz[:, :].T, colors=['g'])
+            plt.grid()
+            plt.xticks(fontsize=40)
+            plt.yticks(fontsize=40)
+            plt.ylim((0, phase_field.shape[2]))
+            plt.xlim((0, phase_field.shape[0]))
+
+            plt.savefig(f'{path}/dropletXZ{t}.png', dpi=300)
+            plt.close('all')
+
+            data = {'timestep': t}
+            data.update({'total_timesteps': self.timesteps})
+            data.update({'pre_thermal_timesteps': self.pre_thermal_timesteps})
+            data.update({'center_of_mass_x': center_of_mass[0]})
+            data.update({'center_of_mass_y': center_of_mass[1]})
+            data.update({'center_of_mass_z': center_of_mass[2]})
+            data.update({'capillary_number': self.capillary_number})
+            data.update({'reynolds_number': self.reynolds_number})
+            data.update({'marangoni_number': self.marangoni_number})
+            data.update({'peclet_number': self.peclet_number})
+            data.update({'viscosity_ratio': self.viscosity_ratio})
+            data.update(self.config_dict['PhysicalParameters'])
+            data.update(self.config_dict['Droplet'])
+            data.update(self.config_dict['HeatSource'])
+
+            stencil_phase = kwargs.get('stencil_phase')
+            stencil_hydro = kwargs.get('stencil_hydro')
+            stencil_thermal = kwargs.get('stencil_thermal')
+            collision_space_phase = kwargs.get('collision_space_phase')
+            collision_space_hydro = kwargs.get('collision_space_hydro')
+            collision_space_thermal = kwargs.get('collision_space_thermal')
+
+            data.update({'stencil_phase': stencil_phase})
+            data.update({'stencil_hydro': stencil_hydro})
+            data.update({'stencil_thermal': stencil_thermal})
+            data.update({'collision_space_phase': collision_space_phase})
+            data.update({'collision_space_hydro': collision_space_hydro})
+            data.update({'collision_space_thermal': collision_space_thermal})
+            data.update({'contact_angle': self.contact_angle})
+
+            sequenceValuesToScalars(data)
+
+            csv_file = f"{path}/results.csv"
+
+            df = pd.DataFrame.from_records([data])
+            if not os.path.isfile(csv_file):
+                df.to_csv(csv_file, index=False, sep=';')
+            else:
+                df.to_csv(csv_file, index=False, mode='a', header=False, sep=';')
+
+
+scenarios = wlb.ScenarioManager()
+scenarios.add(Scenario())
diff --git a/apps/showcases/Thermocapillary/microchannel2D.py b/apps/showcases/Thermocapillary/microchannel2D.py
new file mode 100755
index 0000000000000000000000000000000000000000..9b183ace47dee2573461b24eb79f177dd3874769
--- /dev/null
+++ b/apps/showcases/Thermocapillary/microchannel2D.py
@@ -0,0 +1,252 @@
+import os
+import shutil
+
+import numpy as np
+from matplotlib import pyplot as plt
+import pandas as pd
+
+from lbmpy.phasefield_allen_cahn.analytical import analytical_solution_microchannel
+
+import waLBerla as wlb
+from waLBerla.core_extension import makeSlice
+from waLBerla.tools.sqlitedb import sequenceValuesToScalars
+
+
+class Scenario:
+    def __init__(self, heat_solver_rk_or_lbm, case):
+        self.reference_length = 256
+
+        # simulation parameters
+        self.timesteps = 1
+        self.pre_thermal_timesteps = 1
+        self.vtkWriteFrequency = 0
+        self.dbWriteFrequency = 10000
+
+        self.cells = (2 * self.reference_length, self.reference_length, 1)
+        self.blocks = (1, 1, 1)
+        self.domain_size = tuple([c * b for c, b in zip(self.cells, self.blocks)])
+        self.periodic = (1, 0, 0)
+
+        self.heat_solver_rk_or_lbm = heat_solver_rk_or_lbm
+        self.order_rk_solver = 4
+
+        self.case = case
+
+        if self.case == 1:
+            self.rho_h = 1.0
+            self.rho_l = 1.0
+            self.mu_l = 0.2
+            self.mu_h = 0.2
+
+            self.sigma_ref = 0.025
+            self.sigma_t = -5e-4
+
+            self.kappa_h = 0.2
+            self.kappa_l = 0.2
+            self.temperature_ref = 10
+            self.temperature_h = 20
+            self.temperature_l = 4
+
+            self.interface_thickness = 5
+            self.mobility = 0.05
+            self.velocity_wall = 0
+        else:
+            self.rho_h = 1.0
+            self.rho_l = 1.0
+            self.mu_l = 0.2
+            self.mu_h = 0.2
+
+            self.sigma_ref = 0.025
+            self.sigma_t = -5e-4
+
+            self.kappa_h = 0.04
+            self.kappa_l = 0.2
+            self.temperature_ref = 10
+            self.temperature_h = 20
+            self.temperature_l = 4
+
+            self.interface_thickness = 5
+            self.mobility = 0.05
+            self.velocity_wall = 0
+
+        x, y, u_x, u_y, t_a = analytical_solution_microchannel(self.reference_length,
+                                                               self.domain_size[0], self.domain_size[1],
+                                                               self.kappa_h, self.kappa_l,
+                                                               self.temperature_h, self.temperature_ref,
+                                                               self.temperature_l,
+                                                               self.sigma_t, self.mu_l)
+        self.x = x
+        self.y = y
+        self.u_x = u_x
+        self.u_y = u_y
+        self.t_a = t_a
+        l0 = self.reference_length
+        self.XX, self.YY = np.meshgrid(np.arange(self.domain_size[0]) - l0, np.arange(self.domain_size[1]) - l0 // 2)
+
+        self.config_dict = self.config()
+
+    @wlb.member_callback
+    def config(self):
+        return {
+            'DomainSetup': {
+                'blocks': self.blocks,
+                'cellsPerBlock': self.cells,
+                'periodic': self.periodic,
+            },
+            'Parameters': {
+                'timesteps': self.timesteps,
+                'pre_thermal_timesteps': self.pre_thermal_timesteps,
+                'HeatSolverRKOrLBM': self.heat_solver_rk_or_lbm,
+                'orderRKSolver': self.order_rk_solver,
+                'vtkWriteFrequency': self.vtkWriteFrequency,
+                'dbWriteFrequency': self.dbWriteFrequency,
+                'remainingTimeLoggerFrequency': 30.0,
+                'gpuBlockSize': (128, 1, 1),
+                'gpuEnabledMpi': False
+            },
+            'PhysicalParameters': {
+                'density_liquid': self.rho_h,
+                'density_gas': self.rho_l,
+                'sigma_ref': self.sigma_ref,
+                'sigma_t': self.sigma_t,
+                'mobility': self.mobility,
+                'temperature_ref': self.temperature_ref,
+                'heat_conductivity_liquid': self.kappa_h,
+                'heat_conductivity_gas': self.kappa_l,
+                'relaxation_time_liquid': 3 * self.mu_h,
+                'relaxation_time_gas': 3 * self.mu_l,
+                'interface_thickness': self.interface_thickness,
+                'velocity_wall': self.velocity_wall
+            },
+            'BoundariesAllenCahn': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
+                ],
+            },
+            'BoundariesHydro': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
+                ],
+            },
+            'BoundariesThermal': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'DiffusionDirichletStatic'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'DiffusionDirichletDynamic'},
+                ],
+            },
+            'MicroChannel': {
+                'Th': self.temperature_h,
+                'T0': self.temperature_l,
+            }
+        }
+
+    @wlb.member_callback
+    def at_end_of_time_step(self, blocks, **kwargs):
+        t = kwargs['timeStep']
+        velocity_field_wlb = wlb.field.gather(blocks, 'vel', makeSlice[:, :, :])
+        temperature_field_wlb = wlb.field.gather(blocks, 'temperature', makeSlice[:, :, :])
+
+        if temperature_field_wlb:
+            stencil_phase = kwargs.get('stencil_phase')
+            stencil_hydro = kwargs.get('stencil_hydro')
+            stencil_thermal = kwargs.get('stencil_thermal')
+            collision_space_phase = kwargs.get('collision_space_phase')
+            collision_space_hydro = kwargs.get('collision_space_hydro')
+            collision_space_thermal = kwargs.get('collision_space_thermal')
+            field_type = kwargs.get('field_type')
+            field_type_pdfs = kwargs.get('field_type_pdfs')
+            if self.heat_solver_rk_or_lbm:
+                path = f"results_{stencil_phase}_{stencil_hydro}_{collision_space_phase}_{collision_space_hydro}_RK{self.order_rk_solver}_{field_type}_{field_type_pdfs}_case_{self.case}"
+            else:
+                path = f"results_{stencil_phase}_{stencil_hydro}_{stencil_thermal}_" \
+                       f"{collision_space_phase}_{collision_space_hydro}_{collision_space_thermal}_{field_type}_{field_type_pdfs}_case_{self.case}"
+
+            if t == 0:
+                if not os.path.exists(path):
+                    os.makedirs(path)
+                else:
+                    shutil.rmtree(path)
+                    os.makedirs(path)
+
+            velocity_field = np.asarray(velocity_field_wlb).squeeze()
+            temperature_field = np.asarray(temperature_field_wlb).squeeze()
+            l_2_temp_num = 0.0
+            l_2_temp_den = 0.0
+            l_2_u_num = 0.0
+            l_2_u_den = 0.0
+            from math import sqrt
+            for x in range(self.domain_size[0]):
+                for y in range(self.domain_size[1]):
+                    u = sqrt(velocity_field[x, y, 0] ** 2 + velocity_field[x, y, 1] ** 2)
+                    u_den = sqrt(self.u_x.T[x, y] ** 2 + self.u_y.T[x, y] ** 2)
+                    l_2_temp_num += ((temperature_field[x, y] - self.t_a.T[x, y]) ** 2)
+                    l_2_temp_den += ((self.t_a.T[x, y]) ** 2)
+                    l_2_u_num += ((u - u_den) ** 2)
+                    l_2_u_den += (u_den ** 2)
+            l_2_t = sqrt(l_2_temp_num / l_2_temp_den)
+            l_2_u = sqrt(l_2_u_num / l_2_u_den)
+
+            fig, ax = plt.subplots()
+            fig.set_figheight(5)
+            fig.set_figwidth(10)
+            levels = range(11, 24)
+            plt.contour(self.x, self.y, self.t_a, linestyles='dashed', levels=levels, colors=['k'])
+            plt.grid()
+            contour_2 = plt.contour(self.XX, self.YY, temperature_field.T, levels=levels, colors=['k'])
+            clabels = ax.clabel(contour_2, inline=1, fontsize=10, fmt='%2.0lf')
+            [txt.set_bbox(dict(facecolor='white', edgecolor='none', pad=0)) for txt in clabels]
+            plt.ylim((-128, 128))
+            plt.xlim((-256, 256))
+            plt.savefig(f'{path}/temperature_{t}.png', dpi=300)
+            plt.close('all')
+
+            fig1, ax = plt.subplots()
+            fig1.set_figheight(5)
+            fig1.set_figwidth(10)
+            n = 15
+            ax.quiver(self.x[::n, ::n] + 1.1, self.y[::n, ::n] - 2.5, self.u_x.T[::n, ::n].T, self.u_y.T[::n, ::n].T,
+                      angles='xy', scale_units='xy', scale=0.00001, color='r')
+            # c = np.sqrt(velocity_field[::n, ::n, 0].T * velocity_field[::n, ::n, 0].T +
+            #             velocity_field[::n, ::n, 1].T * velocity_field[::n, ::n, 1].T)
+            ax.quiver(self.XX[::n, ::n] + 1.1, self.YY[::n, ::n] - 2,
+                      velocity_field[::n, ::n, 0].T, velocity_field[::n, ::n, 1].T,
+                      angles='xy', scale_units='xy', scale=0.00001, color='b')
+            plt.grid()
+            plt.ylim((-128, 128))
+            plt.xlim((-256, 256))
+            plt.savefig(f'{path}/velocity_{t}.png', dpi=300)
+            plt.close('all')
+
+            data = {'timestep': t}
+            data.update({"L2_T": l_2_t})
+            data.update({"L2_U": l_2_u})
+            data.update({'total_timesteps': self.timesteps})
+            data.update({'pre_thermal_timesteps': self.pre_thermal_timesteps})
+            data.update({'stencil_phase': stencil_phase})
+            data.update({'stencil_hydro': stencil_hydro})
+            if not self.heat_solver_rk_or_lbm:
+                data.update({'stencil_thermal': stencil_thermal})
+            data.update({'collision_space_phase': collision_space_phase})
+            data.update({'collision_space_hydro': collision_space_hydro})
+            if not self.heat_solver_rk_or_lbm:
+                data.update({'collision_space_thermal': collision_space_thermal})
+            data.update(self.config_dict['PhysicalParameters'])
+
+            sequenceValuesToScalars(data)
+
+            csv_file = f"{path}/results.csv"
+
+            df = pd.DataFrame.from_records([data])
+            if not os.path.isfile(csv_file):
+                df.to_csv(csv_file, index=False, sep=';')
+            else:
+                df.to_csv(csv_file, index=False, mode='a', header=False, sep=';')
+
+
+scenarios = wlb.ScenarioManager()
+# scenarios.add(Scenario(heat_solver_rk_or_lbm=False, case=1))
+scenarios.add(Scenario(heat_solver_rk_or_lbm=True, case=1))
+# scenarios.add(Scenario(heat_solver_rk_or_lbm=False, case=2))
+scenarios.add(Scenario(heat_solver_rk_or_lbm=True, case=2))
diff --git a/apps/showcases/Thermocapillary/microchannel3D.py b/apps/showcases/Thermocapillary/microchannel3D.py
new file mode 100755
index 0000000000000000000000000000000000000000..0726d7114ed6cb60c759ba090e694dabff1edba8
--- /dev/null
+++ b/apps/showcases/Thermocapillary/microchannel3D.py
@@ -0,0 +1,252 @@
+import os
+import shutil
+
+import numpy as np
+from matplotlib import pyplot as plt
+import pandas as pd
+
+from lbmpy.phasefield_allen_cahn.analytical import analytical_solution_microchannel
+
+import waLBerla as wlb
+from waLBerla.core_extension import makeSlice
+from waLBerla.tools.sqlitedb import sequenceValuesToScalars
+
+
+class Scenario:
+    def __init__(self, heat_solver_rk_or_lbm, case):
+        self.reference_length = 256
+
+        # simulation parameters
+        self.timesteps = 400001
+        self.pre_thermal_timesteps = 100000
+        self.vtkWriteFrequency = 0
+        self.dbWriteFrequency = 10000
+
+        self.cells = (self.reference_length, self.reference_length, 10)
+        self.blocks = (2, 1, 1)
+        self.domain_size = tuple([c * b for c, b in zip(self.cells, self.blocks)])
+        self.periodic = (1, 0, 1)
+
+        self.heat_solver_rk_or_lbm = heat_solver_rk_or_lbm
+
+        self.case = case
+
+        if self.case == 1:
+            self.rho_h = 1.0
+            self.rho_l = 1.0
+            self.mu_l = 0.2
+            self.mu_h = 0.2
+
+            self.sigma_ref = 0.025
+            self.sigma_t = -5e-4
+
+            self.kappa_h = 0.2
+            self.kappa_l = 0.2
+            self.temperature_ref = 10
+            self.temperature_h = 20
+            self.temperature_l = 4
+
+            self.interface_thickness = 5
+            self.mobility = 0.05
+            self.velocity_wall = 0
+        else:
+            self.rho_h = 1.0
+            self.rho_l = 1.0
+            self.mu_l = 0.2
+            self.mu_h = 0.2
+
+            self.sigma_ref = 0.025
+            self.sigma_t = -5e-4
+
+            self.kappa_h = 0.04
+            self.kappa_l = 0.2
+            self.temperature_ref = 10
+            self.temperature_h = 20
+            self.temperature_l = 4
+
+            self.interface_thickness = 5
+            self.mobility = 0.05
+            self.velocity_wall = 0
+
+        x, y, u_x, u_y, t_a = analytical_solution_microchannel(self.reference_length,
+                                                               self.domain_size[0], self.domain_size[1],
+                                                               self.kappa_h, self.kappa_l,
+                                                               self.temperature_h, self.temperature_ref,
+                                                               self.temperature_l,
+                                                               self.sigma_t, self.mu_l)
+        self.x = x
+        self.y = y
+        self.u_x = u_x
+        self.u_y = u_y
+        self.t_a = t_a
+        l0 = self.reference_length
+        self.XX, self.YY = np.meshgrid(np.arange(self.domain_size[0]) - l0, np.arange(self.domain_size[1]) - l0 // 2)
+
+        self.config_dict = self.config()
+
+    @wlb.member_callback
+    def config(self):
+        return {
+            'DomainSetup': {
+                'blocks': self.blocks,
+                'cellsPerBlock': self.cells,
+                'periodic': self.periodic,
+            },
+            'Parameters': {
+                'timesteps': self.timesteps,
+                'pre_thermal_timesteps': self.pre_thermal_timesteps,
+                'HeatSolverRKOrLBM': self.heat_solver_rk_or_lbm,
+                'vtkWriteFrequency': self.vtkWriteFrequency,
+                'dbWriteFrequency': self.dbWriteFrequency,
+                'remainingTimeLoggerFrequency': 30.0,
+                'gpuBlockSize': (128, 1, 1),
+                'gpuEnabledMpi': False
+            },
+            'PhysicalParameters': {
+                'density_liquid': self.rho_h,
+                'density_gas': self.rho_l,
+                'sigma_ref': self.sigma_ref,
+                'sigma_t': self.sigma_t,
+                'mobility': self.mobility,
+                'temperature_ref': self.temperature_ref,
+                'heat_conductivity_liquid': self.kappa_h,
+                'heat_conductivity_gas': self.kappa_l,
+                'relaxation_time_liquid': 3 * self.mu_h,
+                'relaxation_time_gas': 3 * self.mu_l,
+                'interface_thickness': self.interface_thickness,
+                'velocity_wall': self.velocity_wall
+            },
+            'BoundariesAllenCahn': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
+                ],
+            },
+            'BoundariesHydro': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
+                ],
+            },
+            'BoundariesThermal': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'DiffusionDirichletStatic'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'DiffusionDirichletDynamic'},
+                ],
+            },
+            'MicroChannel': {
+                'Th': self.temperature_h,
+                'T0': self.temperature_l,
+            }
+        }
+
+    @wlb.member_callback
+    def at_end_of_time_step(self, blocks, **kwargs):
+        t = kwargs['timeStep']
+        velocity_field_wlb = wlb.field.gather(blocks, 'vel', makeSlice[:, :, self.domain_size[2] // 2])
+        temperature_field_wlb = wlb.field.gather(blocks, 'temperature', makeSlice[:, :, self.domain_size[2] // 2])
+
+        if temperature_field_wlb:
+            stencil_phase = kwargs.get('stencil_phase')
+            stencil_hydro = kwargs.get('stencil_hydro')
+            stencil_thermal = kwargs.get('stencil_thermal')
+            collision_space_phase = kwargs.get('collision_space_phase')
+            collision_space_hydro = kwargs.get('collision_space_hydro')
+            collision_space_thermal = kwargs.get('collision_space_thermal')
+            field_type = kwargs.get('field_type')
+            field_type_pdfs = kwargs.get('field_type_pdfs')
+            if self.heat_solver_rk_or_lbm:
+                path = f"results_{stencil_phase}_{stencil_hydro}_{collision_space_phase}_{collision_space_hydro}_RK2_{field_type}_{field_type_pdfs}_case_{self.case}"
+            else:
+                path = f"results_{stencil_phase}_{stencil_hydro}_{stencil_thermal}_" \
+                       f"{collision_space_phase}_{collision_space_hydro}_{collision_space_thermal}_{field_type}_{field_type_pdfs}_case_{self.case}"
+
+            if t == 0:
+                if not os.path.exists(path):
+                    os.makedirs(path)
+                else:
+                    shutil.rmtree(path)
+                    os.makedirs(path)
+
+            velocity_field = np.asarray(velocity_field_wlb).squeeze()
+            temperature_field = np.asarray(temperature_field_wlb).squeeze()
+            l_2_temp_num = 0.0
+            l_2_temp_den = 0.0
+            l_2_u_num = 0.0
+            l_2_u_den = 0.0
+            from math import sqrt
+            for x in range(self.domain_size[0]):
+                for y in range(self.domain_size[1]):
+                    u = sqrt(velocity_field[x, y, 0] ** 2 + velocity_field[x, y, 1] ** 2)
+                    u_den = sqrt(self.u_x.T[x, y] ** 2 + self.u_y.T[x, y] ** 2)
+                    l_2_temp_num += ((temperature_field[x, y] - self.t_a.T[x, y]) ** 2)
+                    l_2_temp_den += ((self.t_a.T[x, y]) ** 2)
+                    l_2_u_num += ((u - u_den) ** 2)
+                    l_2_u_den += (u_den ** 2)
+            l_2_t = sqrt(l_2_temp_num / l_2_temp_den)
+            l_2_u = sqrt(l_2_u_num / l_2_u_den)
+
+            fig, ax = plt.subplots()
+            fig.set_figheight(5)
+            fig.set_figwidth(10)
+            levels = range(11, 24)
+            plt.contour(self.x, self.y, self.t_a, linestyles='dashed', levels=levels, colors=['k'])
+            plt.grid()
+            contour_2 = plt.contour(self.XX, self.YY, temperature_field.T, levels=levels, colors=['k'])
+            clabels = ax.clabel(contour_2, inline=1, fontsize=10, fmt='%2.0lf')
+            [txt.set_bbox(dict(facecolor='white', edgecolor='none', pad=0)) for txt in clabels]
+            plt.ylim((-128, 128))
+            plt.xlim((-256, 256))
+            plt.savefig(f'{path}/temperature_{t}.png', dpi=300)
+            plt.close('all')
+
+            fig1, [ax1, ax2] = plt.subplots(1, 2)
+            fig1.set_figheight(5)
+            fig1.set_figwidth(20)
+            n = 10
+            ax2.quiver(self.x[::n, ::n] + 1.1, self.y[::n, ::n] - 2.5, self.u_x.T[::n, ::n].T, self.u_y.T[::n, ::n].T,
+                       angles='xy', scale_units='xy', scale=0.00001, color='r')
+            ax2.set_title("analytic")
+            c = np.sqrt(velocity_field[::n, ::n, 0].T * velocity_field[::n, ::n, 0].T +
+                        velocity_field[::n, ::n, 1].T * velocity_field[::n, ::n, 1].T)
+            ax1.quiver(self.XX[::n, ::n] + 1.1, self.YY[::n, ::n] - 2,
+                       velocity_field[::n, ::n, 0].T, velocity_field[::n, ::n, 1].T, c,
+                       angles='xy', scale_units='xy', scale=0.00001)
+            ax1.set_title("lattice Boltzmann")
+            plt.grid()
+            plt.ylim((-128, 128))
+            plt.xlim((-256, 256))
+            plt.savefig(f'{path}/velocity_{t}.png', dpi=300)
+            plt.close('all')
+
+            data = {'timestep': t}
+            data.update({"L2_T": l_2_t})
+            data.update({"L2_U": l_2_u})
+            data.update({'total_timesteps': self.timesteps})
+            data.update({'pre_thermal_timesteps': self.pre_thermal_timesteps})
+            data.update({'stencil_phase': stencil_phase})
+            data.update({'stencil_hydro': stencil_hydro})
+            if not self.heat_solver_rk_or_lbm:
+                data.update({'stencil_thermal': stencil_thermal})
+            data.update({'collision_space_phase': collision_space_phase})
+            data.update({'collision_space_hydro': collision_space_hydro})
+            if not self.heat_solver_rk_or_lbm:
+                data.update({'collision_space_thermal': collision_space_thermal})
+            data.update(self.config_dict['PhysicalParameters'])
+
+            sequenceValuesToScalars(data)
+
+            csv_file = f"{path}/results.csv"
+
+            df = pd.DataFrame.from_records([data])
+            if not os.path.isfile(csv_file):
+                df.to_csv(csv_file, index=False, sep=';')
+            else:
+                df.to_csv(csv_file, index=False, mode='a', header=False, sep=';')
+
+
+scenarios = wlb.ScenarioManager()
+scenarios.add(Scenario(heat_solver_rk_or_lbm=False, case=1))
+# scenarios.add(Scenario(heat_solver_rk_or_lbm=True, case=1))
+# scenarios.add(Scenario(heat_solver_rk_or_lbm=True, case=2))
+scenarios.add(Scenario(heat_solver_rk_or_lbm=False, case=2))
diff --git a/apps/showcases/Thermocapillary/thermocapillary.cpp b/apps/showcases/Thermocapillary/thermocapillary.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fd8b5d155b9043e0d0df492d3820ab1a3a0aa4ea
--- /dev/null
+++ b/apps/showcases/Thermocapillary/thermocapillary.cpp
@@ -0,0 +1,1132 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file thermocapillary.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "blockforest/Initialization.h"
+#include "blockforest/communication/UniformBufferedScheme.h"
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/math/Constants.h"
+#include "core/math/IntegerFactorization.h"
+#include "core/timing/RemainingTimeLogger.h"
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+#include "gpu/GPURAII.h"
+#include "gpu/GPUWrapper.h"
+#include "gpu/ErrorChecking.h"
+#include "gpu/AddGPUFieldToStorage.h"
+#include "gpu/DeviceSelectMPI.h"
+#include "gpu/communication/UniformGPUScheme.h"
+#endif
+
+#include "field/AddToStorage.h"
+#include "field/FlagField.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "geometry/InitBoundaryHandling.h"
+#include "geometry/mesh/TriangleMeshIO.h"
+
+#include "lbm/PerformanceEvaluation.h"
+
+#include "postprocessing/FieldToSurfaceMesh.h"
+
+#include "python_coupling/CreateConfig.h"
+#include "python_coupling/PythonCallback.h"
+
+#include "timeloop/SweepTimeloop.h"
+
+#include "GenDefines.h"
+#include "InitializerFunctions.h"
+#include "PythonExports.h"
+
+////////////
+// USING //
+//////////
+
+using namespace std::placeholders;
+using namespace walberla;
+
+using flag_t      = walberla::uint8_t;
+using FlagField_T = FlagField< flag_t >;
+
+auto DiffusionCallback = [](const Cell& pos, const shared_ptr< StructuredBlockForest >& blocks, IBlock& block,
+                            const real_t Th, const real_t T0) {
+
+      auto x_half = real_c(blocks->getDomainCellBB().xMax()) / real_c(2.0);
+      Cell global_cell;
+      blocks->transformBlockLocalToGlobalCell(global_cell, block, pos);
+
+      return Th + T0 * cos( (math::pi / x_half) * (real_c(global_cell[0]) - x_half) );
+};
+
+int main(int argc, char** argv)
+{
+   mpi::Environment const Env(argc, argv);
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+   gpu::selectDeviceBasedOnMpiRank();
+#endif
+   exportDataStructuresToPython();
+
+   for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
+   {
+      WALBERLA_MPI_WORLD_BARRIER()
+
+      auto config = *cfg;
+      logging::configureLogging(config);
+
+      auto DomainSetup = config->getOneBlock("DomainSetup");
+      const Vector3<bool> periodic = DomainSetup.getParameter<Vector3<bool>>("periodic");
+      const bool weakScaling = DomainSetup.getParameter<bool>("weakScaling", false); // weak or strong scaling
+
+      const uint_t totalNumProcesses = uint_c(MPIManager::instance()->numProcesses());
+
+      Vector3<uint_t> cellsPerBlock;
+      Vector3<uint_t> NumberOfBlocks;
+      Vector3<uint_t> numProcesses;
+
+      if (!DomainSetup.isDefined("blocks"))
+      {
+         if (weakScaling)
+         {
+            const Vector3<uint_t> cells = DomainSetup.getParameter<Vector3<uint_t> >("cellsPerBlock");
+            blockforest::calculateCellDistribution(cells, totalNumProcesses, NumberOfBlocks, cellsPerBlock);
+            cellsPerBlock = cells;
+            numProcesses = NumberOfBlocks;
+         }
+         else
+         {
+            cellsPerBlock = DomainSetup.getParameter<Vector3<uint_t> >("cellsPerBlock");
+            const Vector3<uint_t> domainSize = DomainSetup.getParameter<Vector3<uint_t> >("domainSize");
+            std::vector< uint_t > tmp = math::getFactors( totalNumProcesses, 3);
+            // Round up to be divisible by eight (SIMD)
+            for (uint_t i = 0; i < 3; ++i)
+            {
+               NumberOfBlocks[i] = uint_c(std::ceil(double_c(domainSize[i]) / double_c(cellsPerBlock[i])));
+               numProcesses[i] = tmp[i];
+            }
+         }
+      }
+      else
+      {
+         cellsPerBlock = DomainSetup.getParameter<Vector3<uint_t>>("cellsPerBlock");
+         NumberOfBlocks = DomainSetup.getParameter<Vector3<uint_t>>("blocks");
+         numProcesses = NumberOfBlocks;
+      }
+
+      if ((NumberOfBlocks[0] * NumberOfBlocks[1] * NumberOfBlocks[2]) % totalNumProcesses != 0) {
+         WALBERLA_ABORT("The total number of blocks is " << NumberOfBlocks[0] * NumberOfBlocks[1] * NumberOfBlocks[2]
+                                                         << " they can not be equally distributed on " << totalNumProcesses
+                                                         << " Processes")
+      }
+
+      auto aabb = AABB(real_c(0), real_c(0), real_c(0), real_c(NumberOfBlocks[0] * cellsPerBlock[0]),
+                       real_c(NumberOfBlocks[1] * cellsPerBlock[1]),
+                       real_c(NumberOfBlocks[2] * cellsPerBlock[2]));
+
+      auto blocks = blockforest::createUniformBlockGrid( aabb,
+                                                        NumberOfBlocks[0], NumberOfBlocks[1], NumberOfBlocks[2],
+                                                        cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2],
+                                                        numProcesses[0], numProcesses[1], numProcesses[2],
+                                                        periodic[0], periodic[1], periodic[2], //periodicity
+                                                        false // keepGlobalBlockInformation
+      );
+      blocks->createCellBoundingBoxes();
+
+
+      ///////////////////////////////////
+      // GENERAL SIMULATION PARAMETERS //
+      //////////////////////////////////
+
+      auto parameters                  = config->getOneBlock("Parameters");
+      const uint_t timesteps           = parameters.getParameter< uint_t >("timesteps");
+      const uint_t thermal_timesteps   = parameters.getParameter< uint_t >("pre_thermal_timesteps", uint_c(0));
+      const bool heat_solver_RK_or_LBM = parameters.getParameter< bool >("HeatSolverRKOrLBM", false);
+      const uint_t orderRKSolver = parameters.getParameter< uint_t >("orderRKSolver", 2);
+      const real_t remaining_time_logger_frequency = parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(0.0));
+
+
+      /////////////////////////
+      // ADD DATA TO BLOCKS //
+      ///////////////////////
+      BlockDataID velocity_field_ID = field::addToStorage< VectorField_T >(blocks, "vel", real_c(0.0), field::fzyx);
+      BlockDataID phase_field_ID    = field::addToStorage< ScalarField_T >(blocks, "phase", real_c(0.0), field::fzyx);
+      BlockDataID temperature_field_ID = field::addToStorage< ScalarField_T >(blocks, "temperature", real_c(0.0), field::fzyx);
+
+      BlockDataID temperature_PDFs_ID;
+      BlockDataID rk_field_one_ID;
+      BlockDataID rk_field_two_ID;
+      BlockDataID rk_field_three_ID;
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+      BlockDataID phase_field_gpu = gpu::addGPUFieldToStorage< ScalarField_T >(blocks, phase_field_ID, "phase field on GPU", true);
+      BlockDataID phase_field_tmp = gpu::addGPUFieldToStorage< ScalarField_T >(blocks, phase_field_ID, "temporary phasefield", true);
+
+      BlockDataID vel_field_gpu =
+         gpu::addGPUFieldToStorage< VectorField_T >(blocks, velocity_field_ID, "velocity field on GPU", true);
+      BlockDataID temperature_field_gpu =
+         gpu::addGPUFieldToStorage< ScalarField_T >(blocks, temperature_field_ID, "temperature field on GPU", true);
+
+      const BlockDataID allen_cahn_PDFs_ID = gpu::addGPUFieldToStorage< GPUFieldPDFs >(
+         blocks, "lb phase field on GPU", Stencil_phase_T::Size, field::fzyx, uint_c(1));
+      const BlockDataID hydrodynamic_PDFs_ID = gpu::addGPUFieldToStorage< GPUFieldPDFs >(
+         blocks, "lb velocity field on GPU", Stencil_hydro_T::Size, field::fzyx, uint_c(1));
+
+      if(heat_solver_RK_or_LBM)
+      {
+         rk_field_one_ID = gpu::addGPUFieldToStorage< gpu::GPUField< real_t > >(blocks, "RK1", 1, field::fzyx, 1);
+         rk_field_two_ID = gpu::addGPUFieldToStorage< gpu::GPUField< real_t > >(blocks, "RK2", 1, field::fzyx, 1);
+         rk_field_three_ID = gpu::addGPUFieldToStorage< gpu::GPUField< real_t > >(blocks, "RK3", 1, field::fzyx, 1);
+      }
+      else
+      {
+         temperature_PDFs_ID = gpu::addGPUFieldToStorage< GPUFieldPDFs >(
+            blocks, "lb temperature field on GPU", Stencil_thermal_T::Size, field::fzyx, uint_c(1));
+      }
+#else
+      BlockDataID allen_cahn_PDFs_ID =
+         field::addToStorage< PdfField_phase_T >(blocks, "lb phase field", PdfField_phase_T::value_type(0.0), field::fzyx);
+      BlockDataID hydrodynamic_PDFs_ID =
+         field::addToStorage< PdfField_hydro_T >(blocks, "lb velocity field", PdfField_hydro_T::value_type(0.0), field::fzyx);
+      BlockDataID phase_field_tmp_ID    = field::addToStorage< ScalarField_T >(blocks, "phase tmp", real_c(0.0), field::fzyx);
+
+      if(heat_solver_RK_or_LBM)
+      {
+         rk_field_one_ID = field::addToStorage< ScalarField_T >(blocks, "RK1", real_c(0.0), field::fzyx);
+         rk_field_two_ID = field::addToStorage< ScalarField_T >(blocks, "RK2", real_c(0.0), field::fzyx);
+         rk_field_three_ID = field::addToStorage< ScalarField_T >(blocks, "RK3", real_c(0.0), field::fzyx);
+      }
+      else
+      {
+         temperature_PDFs_ID = field::addToStorage< PdfField_thermal_T >(blocks, "lb temperature field", PdfField_thermal_T::value_type(0.0), field::fzyx);
+      }
+
+#endif
+
+      const BlockDataID flag_field_allen_cahn_LB_ID = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field phase");
+      const BlockDataID flag_field_hydrodynamic_LB_ID = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field hydro");
+      const BlockDataID flag_field_thermal_LB_ID = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field thermal");
+
+      auto physical_parameters  = config->getOneBlock("PhysicalParameters");
+      const real_t density_liquid           = physical_parameters.getParameter< real_t >("density_liquid");
+      const real_t density_gas              = physical_parameters.getParameter< real_t >("density_gas");
+      const real_t sigma_ref                = physical_parameters.getParameter< real_t >("sigma_ref");
+      const real_t sigma_t                  = physical_parameters.getParameter< real_t >("sigma_t");
+      const real_t mobility                 = physical_parameters.getParameter< real_t >("mobility");
+      const real_t temperature_ref          = physical_parameters.getParameter< real_t >("temperature_ref");
+      const real_t heat_conductivity_liquid = physical_parameters.getParameter< real_t >("heat_conductivity_liquid");
+      const real_t heat_conductivity_gas    = physical_parameters.getParameter< real_t >("heat_conductivity_gas");
+      const real_t relaxation_time_liquid   = physical_parameters.getParameter< real_t >("relaxation_time_liquid");
+      const real_t relaxation_time_gas      = physical_parameters.getParameter< real_t >("relaxation_time_gas");
+      const real_t interface_thickness      = physical_parameters.getParameter< real_t >("interface_thickness");
+      const real_t velocity_wall            = physical_parameters.getParameter< real_t >("velocity_wall");
+      const real_t angle                    = physical_parameters.getParameter< real_t >("contact_angle", real_c(90));
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+      Vector3< int32_t > gpuBlockSize = parameters.getParameter< Vector3< int32_t > >("gpuBlockSize", Vector3< int32_t >(128, 1, 1));
+#endif
+
+      WALBERLA_LOG_INFO_ON_ROOT("Initialisation of the phase-field")
+      auto t_h = real_c(0.0);
+      auto t_0 = real_c(0.0);
+
+#ifdef GENERATED_HEAT_SOURCE
+      auto ws = real_c(0.0);
+      auto ds = real_c(0.0);
+      auto qsOne = real_c(0.0);
+      auto qsTwo = real_c(0.0);
+      Vector3< int64_t > heatSourceMidPointOne(0, 0, 0);
+      Vector3< int64_t > heatSourceMidPointTwo(0, 0, 0);
+#endif
+
+      bool benchmark = false;
+      std::string timestep_strategy = "NormalTimestep";
+      if (config->getNumBlocks("Droplet") == 1)
+      {
+         if(!GeneratedHeatSource)
+            WALBERLA_ABORT("For the Droplet application 'heat_source' has to be activated in the generation script")
+         if(heat_solver_RK_or_LBM)
+            WALBERLA_ABORT("Runge Kutta method is only available for the MicroChannel test case so far")
+         auto droplet_parameters    = config->getOneBlock("Droplet");
+         const real_t droplet_radius = droplet_parameters.getParameter< real_t >("dropletRadius");
+         const Vector3< real_t > droplet_midpoint =
+            droplet_parameters.getParameter< Vector3< real_t > >("dropletMidPoint");
+         initPhaseFieldDroplet(blocks, phase_field_ID, droplet_radius, droplet_midpoint, interface_thickness);
+
+#ifdef GENERATED_HEAT_SOURCE
+         auto HeatSource    = config->getOneBlock("HeatSource");
+         ws = HeatSource.getParameter< real_t >("ws");
+         ds = HeatSource.getParameter< real_t >("sizeDiffusedHotspot");
+
+         qsOne = HeatSource.getParameter< real_t >("maximumHeatFluxOne");
+         qsTwo = HeatSource.getParameter< real_t >("maximumHeatFluxTwo");
+
+         heatSourceMidPointOne = HeatSource.getParameter< Vector3< int64_t > >("heatSourceMidPointOne");
+         heatSourceMidPointTwo = HeatSource.getParameter< Vector3< int64_t > >("heatSourceMidPointTwo");
+#endif
+      }
+      else if (config->getNumBlocks("MicroChannel") == 1)
+      {
+         if(GeneratedHeatSource)
+            WALBERLA_ABORT("For the MicroChannel application 'heat_source' has to be deactivated in the generation script")
+         auto micro_channel_parameters = config->getOneBlock("MicroChannel");
+         t_h                         = micro_channel_parameters.getParameter< real_t >("Th");
+         t_0                         = micro_channel_parameters.getParameter< real_t >("T0");
+         initMicroChannel(blocks, phase_field_ID, temperature_field_ID, t_h, t_0, temperature_ref, interface_thickness);
+      }
+      else if (config->getNumBlocks("Benchmark") == 1)
+      {
+         auto benchmark_parameters = config->getOneBlock("Benchmark");
+         benchmark = true;
+         timestep_strategy = benchmark_parameters.getParameter<std::string>("timeStepStrategy", "NormalTimestep");
+         WALBERLA_LOG_INFO_ON_ROOT("Benchmarking Thermocapillary flows with: " << timestep_strategy)
+      }
+      else
+      {
+         WALBERLA_ABORT("Only Droplet or MicroChannel or Benchmark Scenario is available. Thus a Parameter Block for these "
+                        "Scenarios must exist!")
+      }
+      WALBERLA_LOG_INFO_ON_ROOT("Initialisation of the phase-field done")
+
+      /////////////////
+      // ADD SWEEPS //
+      ///////////////
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+
+      int streamHighPriority = 0;
+      int streamLowPriority = 0;
+      WALBERLA_GPU_CHECK( gpuDeviceGetStreamPriorityRange(&streamLowPriority, &streamHighPriority) );
+      auto defaultStream = gpu::StreamRAII::newPriorityStream( streamLowPriority );
+
+      pystencils::initialize_phase_field_distributions init_h(allen_cahn_PDFs_ID, phase_field_gpu, vel_field_gpu,
+                                                              interface_thickness);
+      pystencils::initialize_velocity_based_distributions init_g(hydrodynamic_PDFs_ID, vel_field_gpu);
+      pystencils::initialize_thermal_distributions init_f(temperature_PDFs_ID, temperature_field_gpu, vel_field_gpu);
+
+
+      pystencils::phase_field_LB_step phase_field_LB_step(allen_cahn_PDFs_ID, phase_field_gpu, phase_field_tmp, vel_field_gpu, mobility,
+                                                          interface_thickness, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
+      pystencils::hydro_LB_step hydro_LB_step(hydrodynamic_PDFs_ID, phase_field_gpu, temperature_field_gpu, vel_field_gpu,
+                                              temperature_ref, interface_thickness, density_liquid, density_gas,
+                                              sigma_t, sigma_ref, relaxation_time_liquid, relaxation_time_gas, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
+
+#if defined(GENERATED_HEAT_SOURCE)
+      pystencils::thermal_LB_step thermal_LB_step(temperature_PDFs_ID, phase_field_gpu, temperature_field_gpu, vel_field_gpu, ds,
+                                                  heatSourceMidPointOne[0], heatSourceMidPointOne[1], heatSourceMidPointOne[2],
+                                                  heatSourceMidPointTwo[0], heatSourceMidPointTwo[1], heatSourceMidPointTwo[2],
+                                                  heat_conductivity_liquid, heat_conductivity_gas,
+                                                  qsOne, qsTwo, ws,
+                                                  gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
+#else
+      pystencils::thermal_LB_step thermal_LB_step(temperature_PDFs_ID, phase_field_gpu, temperature_field_gpu, vel_field_gpu,
+                                                  heat_conductivity_liquid, heat_conductivity_gas, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
+#endif
+      pystencils::initialize_rk2 initRK2(rk_field_one_ID, temperature_field_gpu, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
+      pystencils::rk2_first_stage RK2FirstStage(rk_field_one_ID, phase_field_gpu, temperature_field_gpu, vel_field_gpu, heat_conductivity_liquid,
+                                                heat_conductivity_gas, density_liquid, density_gas, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
+      pystencils::rk2_second_stage RK2SecondStage(rk_field_one_ID, phase_field_gpu, temperature_field_gpu, vel_field_gpu,
+                                                  heat_conductivity_liquid, heat_conductivity_gas, density_liquid, density_gas,
+                                                  gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
+
+      pystencils::initialize_rk4 initRK4(rk_field_one_ID, rk_field_two_ID, rk_field_three_ID, temperature_field_gpu, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
+      pystencils::rk4_first_stage RK4FirstStage(rk_field_one_ID, phase_field_gpu, temperature_field_gpu, vel_field_gpu, heat_conductivity_liquid,
+                                                heat_conductivity_gas, density_liquid, density_gas,
+                                                gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
+      pystencils::rk4_second_stage RK4SecondStage(rk_field_one_ID, rk_field_two_ID, phase_field_gpu, temperature_field_gpu, vel_field_gpu,
+                                                  heat_conductivity_liquid, heat_conductivity_gas, density_liquid, density_gas,
+                                                  gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
+      pystencils::rk4_third_stage RK4ThirdStage(rk_field_two_ID, rk_field_three_ID, phase_field_gpu, temperature_field_gpu, vel_field_gpu,
+                                                heat_conductivity_liquid, heat_conductivity_gas, density_liquid, density_gas,
+                                                gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
+      pystencils::rk4_fourth_stage RK4FourthStage(rk_field_one_ID, rk_field_two_ID, rk_field_three_ID, phase_field_gpu, temperature_field_gpu, vel_field_gpu,
+                                                  heat_conductivity_liquid, heat_conductivity_gas, density_liquid, density_gas,
+                                                  gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
+
+      auto swapPhaseField = std::function< void(IBlock *) >([&](IBlock * b)
+      {
+            auto phaseField = b->getData< gpu::GPUField<real_t> >(phase_field_gpu);
+            auto phaseFieldTMP = b->getData< gpu::GPUField<real_t> >(phase_field_tmp);
+            phaseField->swapDataPointers(phaseFieldTMP);
+      });
+
+#else
+      pystencils::initialize_phase_field_distributions init_h(allen_cahn_PDFs_ID, phase_field_ID, velocity_field_ID,
+                                                              interface_thickness);
+      pystencils::initialize_velocity_based_distributions init_g(hydrodynamic_PDFs_ID, velocity_field_ID);
+      pystencils::initialize_thermal_distributions init_f(temperature_PDFs_ID, temperature_field_ID, velocity_field_ID);
+
+      pystencils::phase_field_LB_step phase_field_LB_step(allen_cahn_PDFs_ID, phase_field_ID, phase_field_tmp_ID, velocity_field_ID, mobility,
+                                                          interface_thickness);
+      pystencils::hydro_LB_step hydro_LB_step(hydrodynamic_PDFs_ID, phase_field_ID, temperature_field_ID, velocity_field_ID,
+                                              temperature_ref, interface_thickness, density_liquid, density_gas,
+                                              sigma_t, sigma_ref, relaxation_time_liquid, relaxation_time_gas);
+#if defined(GENERATED_HEAT_SOURCE)
+      pystencils::thermal_LB_step thermal_LB_step(temperature_PDFs_ID, phase_field_ID, temperature_field_ID, velocity_field_ID, ds,
+                                                  heatSourceMidPointOne[0], heatSourceMidPointOne[1], heatSourceMidPointOne[2],
+                                                  heatSourceMidPointTwo[0], heatSourceMidPointTwo[1], heatSourceMidPointTwo[2],
+                                                  heat_conductivity_liquid, heat_conductivity_gas,
+                                                  qsOne, qsTwo, ws);
+#else
+      pystencils::thermal_LB_step thermal_LB_step(temperature_PDFs_ID, phase_field_ID, temperature_field_ID, velocity_field_ID,
+                                                  heat_conductivity_liquid, heat_conductivity_gas);
+#endif
+      pystencils::initialize_rk2 initRK2(rk_field_one_ID, temperature_field_ID);
+      pystencils::rk2_first_stage RK2FirstStage(rk_field_one_ID, phase_field_ID, temperature_field_ID, velocity_field_ID, heat_conductivity_liquid,
+                                                heat_conductivity_gas, density_liquid, density_gas);
+      pystencils::rk2_second_stage RK2SecondStage(rk_field_one_ID, phase_field_ID, temperature_field_ID, velocity_field_ID,
+                                                  heat_conductivity_liquid, heat_conductivity_gas, density_liquid, density_gas);
+
+      pystencils::initialize_rk4 initRK4(rk_field_one_ID, rk_field_two_ID, rk_field_three_ID, temperature_field_ID);
+      pystencils::rk4_first_stage RK4FirstStage(rk_field_one_ID, phase_field_ID, temperature_field_ID, velocity_field_ID, heat_conductivity_liquid,
+                                                heat_conductivity_gas, density_liquid, density_gas);
+      pystencils::rk4_second_stage RK4SecondStage(rk_field_one_ID, rk_field_two_ID, phase_field_ID, temperature_field_ID, velocity_field_ID,
+                                                  heat_conductivity_liquid, heat_conductivity_gas, density_liquid, density_gas);
+      pystencils::rk4_third_stage RK4ThirdStage(rk_field_two_ID, rk_field_three_ID, phase_field_ID, temperature_field_ID, velocity_field_ID,
+                                                heat_conductivity_liquid, heat_conductivity_gas, density_liquid, density_gas);
+      pystencils::rk4_fourth_stage RK4FourthStage(rk_field_one_ID, rk_field_two_ID, rk_field_three_ID, phase_field_ID, temperature_field_ID, velocity_field_ID,
+                                                  heat_conductivity_liquid, heat_conductivity_gas, density_liquid, density_gas);
+
+      auto swapPhaseField = std::function< void(IBlock *) >([&](IBlock * b)
+      {
+         auto phaseField = b->getData< ScalarField_T >(phase_field_ID);
+         auto phaseFieldTMP = b->getData< ScalarField_T >(phase_field_tmp_ID);
+         phaseField->swapDataPointers(phaseFieldTMP);
+      });
+#endif
+
+      for (auto& block : *blocks)
+      {
+         thermal_LB_step.configure(blocks, &block);
+      }
+
+      ////////////////////////
+      // ADD COMMUNICATION //
+      //////////////////////
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+      const bool gpuEnabledMpi = parameters.getParameter< bool >("gpuEnabledMpi", false);
+      if(gpuEnabledMpi)
+         WALBERLA_LOG_INFO_ON_ROOT("GPU-Direct communication is used for MPI")
+      else
+         WALBERLA_LOG_INFO_ON_ROOT("No GPU-Direct communication is used for MPI")
+
+      auto generatedPackInfo_phase_field_distributions = make_shared< lbm::PackInfo_phase_field_distributions>(allen_cahn_PDFs_ID);
+      auto generatedPackInfo_velocity_based_distributions = make_shared< lbm::PackInfo_velocity_based_distributions >(hydrodynamic_PDFs_ID);
+      auto generatedPackInfo_thermal_distributions = make_shared< lbm::PackInfo_thermal_field_distributions>(temperature_PDFs_ID);
+      auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field_gpu);
+      auto generatedPackInfo_temperature_field = make_shared< pystencils::PackInfo_temperature_field >(temperature_field_gpu);
+
+      auto UniformGPUSchemeVelocityBasedDistributions = make_shared< gpu::communication::UniformGPUScheme< CommStencil_hydro_T > >(blocks, gpuEnabledMpi, false);
+      auto UniformGPUSchemePhaseFieldDistributions = make_shared< gpu::communication::UniformGPUScheme< CommStencil_phase_T > >(blocks, gpuEnabledMpi, false);
+      auto UniformGPUSchemeThermalDistributions = make_shared< gpu::communication::UniformGPUScheme< CommStencil_phase_T > >(blocks, gpuEnabledMpi, false);
+      auto UniformGPUSchemePhaseField = make_shared< gpu::communication::UniformGPUScheme< Full_Stencil_T > >(blocks, gpuEnabledMpi, false, 65432);
+      auto UniformGPUSchemeTemperatureField = make_shared< gpu::communication::UniformGPUScheme< Full_Stencil_T > >(blocks, gpuEnabledMpi, false, 65432);
+
+      UniformGPUSchemeVelocityBasedDistributions->addPackInfo(generatedPackInfo_velocity_based_distributions);
+      UniformGPUSchemeVelocityBasedDistributions->addPackInfo(generatedPackInfo_phase_field);
+      UniformGPUSchemePhaseFieldDistributions->addPackInfo(generatedPackInfo_phase_field_distributions);
+      UniformGPUSchemeThermalDistributions->addPackInfo(generatedPackInfo_thermal_distributions);
+      UniformGPUSchemeThermalDistributions->addPackInfo(generatedPackInfo_temperature_field);
+      UniformGPUSchemePhaseField->addPackInfo(generatedPackInfo_phase_field);
+      UniformGPUSchemeTemperatureField->addPackInfo(generatedPackInfo_temperature_field);
+
+      auto Comm_velocity_based_distributions = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->communicate(); });
+      auto Comm_velocity_based_distributions_start = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->startCommunication(); });
+      auto Comm_velocity_based_distributions_wait = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->wait(); });
+
+      auto Comm_phase_field_distributions = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->communicate(); });
+      auto Comm_phase_field_distributions_start = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->startCommunication(); });
+      auto Comm_phase_field_distributions_wait = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->wait(); });
+
+      auto Comm_thermal_distributions = std::function< void() >([&]() { UniformGPUSchemeThermalDistributions->communicate(); });
+      auto Comm_thermal_distributions_start = std::function< void() >([&]() { UniformGPUSchemeThermalDistributions->startCommunication(); });
+      auto Comm_thermal_distributions_wait = std::function< void() >([&]() { UniformGPUSchemeThermalDistributions->wait(); });
+
+      auto Comm_phase_field = std::function< void() >([&]() { UniformGPUSchemePhaseField->communicate(); });
+      auto Comm_phase_field_start = std::function< void() >([&]() { UniformGPUSchemePhaseField->startCommunication(); });
+      auto Comm_phase_field_wait = std::function< void() >([&]() { UniformGPUSchemePhaseField->wait(); });
+
+      auto Comm_temperature_field = std::function< void() >([&]() { UniformGPUSchemeTemperatureField->communicate(); });
+      auto Comm_temperature_field_start = std::function< void() >([&]() { UniformGPUSchemeTemperatureField->startCommunication(); });
+      auto Comm_temperature_field_wait = std::function< void() >([&]() { UniformGPUSchemeTemperatureField->wait(); });
+
+#else
+      auto UniformBufferedSchemeVelocityDistributions = make_shared<blockforest::communication::UniformBufferedScheme< CommStencil_hydro_T >> (blocks, 1111);
+      auto generatedPackInfo_velocity_based_distributions =
+         make_shared< lbm::PackInfo_velocity_based_distributions >(hydrodynamic_PDFs_ID);
+      UniformBufferedSchemeVelocityDistributions->addPackInfo(generatedPackInfo_velocity_based_distributions);
+      auto Comm_velocity_based_distributions = std::function< void() >([&]() { UniformBufferedSchemeVelocityDistributions->communicate(); });
+      auto Comm_velocity_based_distributions_start = std::function< void() >([&]() { UniformBufferedSchemeVelocityDistributions->startCommunication(); });
+      auto Comm_velocity_based_distributions_wait = std::function< void() >([&]() { UniformBufferedSchemeVelocityDistributions->wait(); });
+
+      auto UniformBufferedSchemePhaseField = make_shared<blockforest::communication::UniformBufferedScheme< Full_Stencil_T >> (blocks, 2222);
+      auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field_ID);
+      UniformBufferedSchemePhaseField->addPackInfo(generatedPackInfo_phase_field);
+      auto Comm_phase_field = std::function< void() >([&]() { UniformBufferedSchemePhaseField->communicate(); });
+      auto Comm_phase_field_start = std::function< void() >([&]() { UniformBufferedSchemePhaseField->startCommunication(); });
+      auto Comm_phase_field_wait = std::function< void() >([&]() { UniformBufferedSchemePhaseField->wait(); });
+
+      auto UniformBufferedSchemeTemperatureField = make_shared<blockforest::communication::UniformBufferedScheme< Full_Stencil_T >>(blocks, 3333);
+      auto generatedPackInfo_temperature_field =
+         make_shared< pystencils::PackInfo_temperature_field >(temperature_field_ID);
+      UniformBufferedSchemeTemperatureField->addPackInfo(generatedPackInfo_temperature_field);
+      auto Comm_temperature_field = std::function< void() >([&]() { UniformBufferedSchemeTemperatureField->communicate(); });
+      auto Comm_temperature_field_start = std::function< void() >([&]() { UniformBufferedSchemeTemperatureField->startCommunication(); });
+      auto Comm_temperature_field_wait = std::function< void() >([&]() { UniformBufferedSchemeTemperatureField->wait(); });
+
+      auto UniformBufferedSchemePhaseFieldDistributions = make_shared<blockforest::communication::UniformBufferedScheme< CommStencil_phase_T >>(blocks, 4444);
+      auto generatedPackInfo_phase_field_distributions =
+         make_shared< lbm::PackInfo_phase_field_distributions >(allen_cahn_PDFs_ID);
+      UniformBufferedSchemePhaseFieldDistributions->addPackInfo(generatedPackInfo_phase_field_distributions);
+      auto Comm_phase_field_distributions = std::function< void() >([&]() { UniformBufferedSchemePhaseFieldDistributions->communicate(); });
+      auto Comm_phase_field_distributions_start = std::function< void() >([&]() { UniformBufferedSchemePhaseFieldDistributions->startCommunication(); });
+      auto Comm_phase_field_distributions_wait = std::function< void() >([&]() { UniformBufferedSchemePhaseFieldDistributions->wait(); });
+
+      auto UniformBufferedSchemeThermalDistributions = make_shared<blockforest::communication::UniformBufferedScheme< CommStencil_thermal_T >>(blocks, 5555);
+      auto generatedPackInfo_thermal_distributions =
+         make_shared< lbm::PackInfo_thermal_field_distributions >(temperature_PDFs_ID);
+      UniformBufferedSchemeThermalDistributions->addPackInfo(generatedPackInfo_thermal_distributions);
+      auto Comm_thermal_distributions = std::function< void() >([&]() { UniformBufferedSchemeThermalDistributions->communicate(); });
+      auto Comm_thermal_distributions_start = std::function< void() >([&]() { UniformBufferedSchemeThermalDistributions->startCommunication(); });
+      auto Comm_thermal_distributions_wait = std::function< void() >([&]() { UniformBufferedSchemeThermalDistributions->wait(); });
+#endif
+
+      ////////////////////////
+      // BOUNDARY HANDLING //
+      //////////////////////
+
+      const FlagUID fluidFlagUID("Fluid");
+      const FlagUID wallFlagUID("NoSlip");
+      const FlagUID ubbFlagUID("UBB");
+      const FlagUID diffusionStaticFlagUID("DiffusionDirichletStatic");
+      const FlagUID diffusionDynamicFlagUID("DiffusionDirichletDynamic");
+      const FlagUID neumannFlagUID("NeumannByCopy");
+
+      auto boundariesConfigPhase = config->getBlock("BoundariesAllenCahn");
+      if (boundariesConfigPhase)
+      {
+         geometry::initBoundaryHandling< FlagField_T >(*blocks, flag_field_allen_cahn_LB_ID, boundariesConfigPhase);
+         geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flag_field_allen_cahn_LB_ID, fluidFlagUID);
+      }
+
+      auto boundariesConfigHydro = config->getBlock("BoundariesHydro");
+      if (boundariesConfigHydro)
+      {
+         geometry::initBoundaryHandling< FlagField_T >(*blocks, flag_field_hydrodynamic_LB_ID, boundariesConfigHydro);
+         geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flag_field_hydrodynamic_LB_ID, fluidFlagUID);
+      }
+
+      auto boundariesConfigThermal = config->getBlock("BoundariesThermal");
+      if (boundariesConfigPhase)
+      {
+         geometry::initBoundaryHandling< FlagField_T >(*blocks, flag_field_thermal_LB_ID, boundariesConfigThermal);
+         geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flag_field_thermal_LB_ID, fluidFlagUID);
+      }
+
+      lbm::phase_field_LB_NoSlip phase_field_LB_NoSlip(blocks, allen_cahn_PDFs_ID);
+      phase_field_LB_NoSlip.fillFromFlagField< FlagField_T >(blocks, flag_field_allen_cahn_LB_ID, wallFlagUID, fluidFlagUID);
+
+      lbm::hydro_LB_NoSlip hydro_LB_NoSlip(blocks, hydrodynamic_PDFs_ID);
+      hydro_LB_NoSlip.fillFromFlagField< FlagField_T >(blocks, flag_field_hydrodynamic_LB_ID, wallFlagUID, fluidFlagUID);
+      lbm::hydro_LB_UBB hydro_LB_UBB(blocks, hydrodynamic_PDFs_ID, velocity_wall);
+      hydro_LB_UBB.fillFromFlagField< FlagField_T >(blocks, flag_field_hydrodynamic_LB_ID, ubbFlagUID, fluidFlagUID);
+
+
+      std::function< real_t(const Cell&, const shared_ptr< StructuredBlockForest >&, IBlock&) >
+         DiffusionInitialisation = std::bind(DiffusionCallback, _1, _2, _3, t_h, t_0);
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+      lbm::thermal_LB_DiffusionDirichlet_static thermal_LB_DiffusionStatic(blocks, temperature_PDFs_ID, vel_field_gpu, temperature_ref);
+      thermal_LB_DiffusionStatic.fillFromFlagField< FlagField_T >(blocks, flag_field_thermal_LB_ID, diffusionStaticFlagUID, fluidFlagUID);
+      lbm::thermal_LB_DiffusionDirichlet_dynamic thermal_LB_DiffusionDynamic(blocks, temperature_PDFs_ID, vel_field_gpu, DiffusionInitialisation);
+      thermal_LB_DiffusionDynamic.fillFromFlagField< FlagField_T >(blocks, flag_field_thermal_LB_ID, diffusionDynamicFlagUID, fluidFlagUID);
+      lbm::thermal_LB_NeumannByCopy thermal_LB_Neumann(blocks, temperature_PDFs_ID);
+      thermal_LB_Neumann.fillFromFlagField< FlagField_T >(blocks, flag_field_thermal_LB_ID, neumannFlagUID, fluidFlagUID);
+
+      pystencils::ContactAngle contact_angle(blocks, phase_field_gpu, interface_thickness, angle);
+      contact_angle.fillFromFlagField< FlagField_T >(blocks, flag_field_allen_cahn_LB_ID, wallFlagUID, fluidFlagUID);
+#else
+      lbm::thermal_LB_DiffusionDirichlet_static thermal_LB_DiffusionStatic(blocks, temperature_PDFs_ID,
+                                                                           velocity_field_ID, temperature_ref);
+      thermal_LB_DiffusionStatic.fillFromFlagField< FlagField_T >(blocks, flag_field_thermal_LB_ID, diffusionStaticFlagUID, fluidFlagUID);
+      lbm::thermal_LB_DiffusionDirichlet_dynamic thermal_LB_DiffusionDynamic(blocks, temperature_PDFs_ID, velocity_field_ID, DiffusionInitialisation);
+      thermal_LB_DiffusionDynamic.fillFromFlagField< FlagField_T >(blocks, flag_field_thermal_LB_ID, diffusionDynamicFlagUID, fluidFlagUID);
+      lbm::thermal_LB_NeumannByCopy thermal_LB_Neumann(blocks, temperature_PDFs_ID);
+      thermal_LB_Neumann.fillFromFlagField< FlagField_T >(blocks, flag_field_thermal_LB_ID, neumannFlagUID, fluidFlagUID);
+
+      pystencils::ContactAngle contact_angle(blocks, phase_field_ID, interface_thickness, angle);
+      contact_angle.fillFromFlagField< FlagField_T >(blocks, flag_field_allen_cahn_LB_ID, wallFlagUID, fluidFlagUID);
+#endif
+
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+      ////////////////////////////
+      // TIMELOOP THERMAL ONLY //
+      //////////////////////////
+
+      SweepTimeloop thermalTimeloop(blocks->getBlockStorage(), thermal_timesteps);
+
+      if(heat_solver_RK_or_LBM)
+      {
+         if (orderRKSolver == 2)
+         {
+            thermalTimeloop.add() << Sweep(initRK2.getSweep(defaultStream), "initialise RK");
+            thermalTimeloop.add() << Sweep(RK2FirstStage.getSweep(defaultStream), "Runge Kutta step one");
+            thermalTimeloop.add() << Sweep(RK2SecondStage.getSweep(defaultStream), "Runge Kutta step two")
+                                  << AfterFunction(Comm_temperature_field, "Temperature field Communication");
+         }
+         else
+         {
+            thermalTimeloop.add() << Sweep(initRK4.getSweep(defaultStream), "initialise RK");
+            thermalTimeloop.add() << Sweep(RK4FirstStage.getSweep(defaultStream), "Runge Kutta step one");
+            thermalTimeloop.add() << Sweep(RK4SecondStage.getSweep(defaultStream), "Runge Kutta step two");
+            thermalTimeloop.add() << Sweep(RK4ThirdStage.getSweep(defaultStream), "Runge Kutta step three");
+            thermalTimeloop.add() << Sweep(RK4FourthStage.getSweep(defaultStream), "Runge Kutta step four")
+                                  << AfterFunction(Comm_temperature_field, "Temperature field Communication");
+         }
+      }
+      else
+      {
+         thermalTimeloop.add() << BeforeFunction(Comm_thermal_distributions, "Thermal PDFs Communication")
+                               << Sweep(thermal_LB_Neumann.getSweep(defaultStream), "Neumann Thermal");
+         thermalTimeloop.add() << Sweep(thermal_LB_DiffusionStatic.getSweep(defaultStream), "Static Diffusion Thermal");
+         thermalTimeloop.add() << Sweep(thermal_LB_DiffusionDynamic.getSweep(defaultStream), "Dynamic Diffusion Thermal");
+         thermalTimeloop.add() << Sweep(thermal_LB_step.getSweep(defaultStream), "Thermal LB Step");
+      }
+
+      ////////////////
+      // TIME LOOP //
+      //////////////
+
+      SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps);
+      if(heat_solver_RK_or_LBM)
+      {
+         if (timestep_strategy == "NormalTimestep")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Normal time step for production runs and full application benchmarks with " << std::to_string(orderRKSolver) << ". order RK scheme as thermal solver")
+            timeloop.add() << BeforeFunction(Comm_velocity_based_distributions_start, "Start Hydro PDFs Communication")
+                           << BeforeFunction(Comm_temperature_field_start, "Start Communication temperature field")
+                           << Sweep(phase_field_LB_NoSlip.getSweep(defaultStream), "NoSlip Phase");
+            timeloop.add() << Sweep(phase_field_LB_step.getSweep(defaultStream), "Phase LB Step")
+                           << AfterFunction(Comm_velocity_based_distributions_wait, "Wait Hydro PDFs Communication")
+                           << AfterFunction(Comm_temperature_field_wait, "Wait Communication temperature field");
+
+            timeloop.add() << BeforeFunction(Comm_phase_field_distributions_start, "Start Phase PDFs Communication")
+                           << Sweep(hydro_LB_UBB.getSweep(defaultStream), "UBB Hydro");
+            timeloop.add() << Sweep(hydro_LB_NoSlip.getSweep(defaultStream), "NoSlip Hydro");
+            timeloop.add() << Sweep(hydro_LB_step.getSweep(defaultStream), "Hydro LB Step");
+            timeloop.add() << Sweep(swapPhaseField, "Swap PhaseField");
+            timeloop.add() << Sweep(contact_angle.getSweep(defaultStream), "Contact Angle")
+                           << AfterFunction(Comm_phase_field_distributions_wait, "Wait Phase PDFs Communication");
+
+            if(orderRKSolver == 2)
+            {
+               timeloop.add() << BeforeFunction(Comm_phase_field, "Communication Phase Field")
+                              << Sweep(RK2FirstStage.getSweep(defaultStream), "Runge Kutta step one");
+               timeloop.add() << Sweep(RK2SecondStage.getSweep(defaultStream), "Runge Kutta step two");
+            }
+            else
+            {
+               timeloop.add() << BeforeFunction(Comm_phase_field, "Communication Phase Field")
+                              << Sweep(RK4FirstStage.getSweep(defaultStream), "Runge Kutta step one");
+               timeloop.add() << Sweep(RK4SecondStage.getSweep(defaultStream), "Runge Kutta step two");
+               timeloop.add() << Sweep(RK4ThirdStage.getSweep(defaultStream), "Runge Kutta step three");
+               timeloop.add() << Sweep(RK4FourthStage.getSweep(defaultStream), "Runge Kutta step four");
+
+            }
+         }
+         else if (timestep_strategy == "PhaseOnly")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Running only the Allen Cahn LB kernel. This makes only sense for benchmarking")
+            timeloop.add() << Sweep(phase_field_LB_step.getSweep(defaultStream), "Phase LB Step");
+         }
+         else if (timestep_strategy == "HydroOnly")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Running only the hydrodynamic LB kernel. This makes only sense for benchmarking")
+            timeloop.add() << Sweep(hydro_LB_step.getSweep(defaultStream), "Hydro LB Step");
+         }
+         else if (timestep_strategy == "ThermalOnly")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Running only the RK kernels. This makes only sense for benchmarking")
+            if(orderRKSolver == 2)
+            {
+               timeloop.add() << Sweep(RK2FirstStage.getSweep(defaultStream), "Runge Kutta step one");
+               timeloop.add() << Sweep(RK2SecondStage.getSweep(defaultStream), "Runge Kutta step two");
+            }
+            else
+            {
+               timeloop.add() << Sweep(RK4FirstStage.getSweep(defaultStream), "Runge Kutta step one");
+               timeloop.add() << Sweep(RK4SecondStage.getSweep(defaultStream), "Runge Kutta step two");
+               timeloop.add() << Sweep(RK4ThirdStage.getSweep(defaultStream), "Runge Kutta step three");
+               timeloop.add() << Sweep(RK4FourthStage.getSweep(defaultStream), "Runge Kutta step four");
+
+            }
+         }
+         else if (timestep_strategy == "KernelOnly")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Running only the compute kernels without boundary conditions and communication. This makes only sense for benchmarking")
+            timeloop.add() << Sweep(phase_field_LB_step.getSweep(defaultStream), "Phase LB Step");
+            timeloop.add() << Sweep(hydro_LB_step.getSweep(defaultStream), "Hydro LB Step");
+            if(orderRKSolver == 2)
+            {
+               timeloop.add() << Sweep(RK2FirstStage.getSweep(defaultStream), "Runge Kutta step one");
+               timeloop.add() << Sweep(RK2SecondStage.getSweep(defaultStream), "Runge Kutta step two");
+            }
+            else
+            {
+               timeloop.add() << Sweep(RK4FirstStage.getSweep(defaultStream), "Runge Kutta step one");
+               timeloop.add() << Sweep(RK4SecondStage.getSweep(defaultStream), "Runge Kutta step two");
+               timeloop.add() << Sweep(RK4ThirdStage.getSweep(defaultStream), "Runge Kutta step three");
+               timeloop.add() << Sweep(RK4FourthStage.getSweep(defaultStream), "Runge Kutta step four");
+
+            }
+         }
+         else
+         {
+            WALBERLA_ABORT_NO_DEBUG_INFO("Invalid value for 'timeStepStrategy'")
+         }
+      }
+      else
+      {
+         if (timestep_strategy == "NormalTimestep")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Normal time step for production runs and full application benchmarks with LBM thermal solver")
+            timeloop.add() << BeforeFunction(Comm_thermal_distributions_start, "Start Thermal PDFs Communication")
+                           << Sweep(phase_field_LB_NoSlip.getSweep(defaultStream), "NoSlip Phase");
+            timeloop.add() << Sweep(phase_field_LB_step.getSweep(defaultStream), "Phase LB Step")
+                           << AfterFunction(Comm_thermal_distributions_wait, "Wait Thermal PDFs Communication");
+
+            timeloop.add() << BeforeFunction(Comm_phase_field_distributions_start, "Start Phase PDFs Communication")
+                           << Sweep(hydro_LB_UBB.getSweep(defaultStream), "UBB Hydro");
+            timeloop.add() << Sweep(hydro_LB_NoSlip.getSweep(defaultStream), "NoSlip Hydro");
+            timeloop.add() << Sweep(hydro_LB_step.getSweep(defaultStream), "Hydro LB Step");
+            timeloop.add() << Sweep(swapPhaseField, "Swap PhaseField");
+            timeloop.add() << Sweep(contact_angle.getSweep(defaultStream), "Contact Angle")
+                           << AfterFunction(Comm_phase_field_distributions_wait, "Wait Phase PDFs Communication");
+
+            timeloop.add() << BeforeFunction(Comm_velocity_based_distributions_start, "Start Hydro PDFs Communication")
+                           << Sweep(thermal_LB_Neumann.getSweep(defaultStream), "Neumann Thermal");
+            timeloop.add() << Sweep(thermal_LB_DiffusionStatic.getSweep(defaultStream), "Static Diffusion Thermal");
+            timeloop.add() << Sweep(thermal_LB_DiffusionDynamic.getSweep(defaultStream), "Dynamic Diffusion Thermal");
+            timeloop.add() << Sweep(thermal_LB_step.getSweep(defaultStream), "Thermal LB Step")
+                           << AfterFunction(Comm_velocity_based_distributions_wait, "Wait Hydro PDFs Communication");
+         }
+         else if (timestep_strategy == "PhaseOnly")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Running only the Allen Cahn LB kernel. This makes only sense for benchmarking")
+            timeloop.add() << Sweep(phase_field_LB_step.getSweep(defaultStream), "Phase LB Step");
+         }
+         else if (timestep_strategy == "HydroOnly")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Running only the hydrodynamic LB kernel. This makes only sense for benchmarking")
+            timeloop.add() << Sweep(hydro_LB_step.getSweep(defaultStream), "Hydro LB Step");
+         }
+         else if (timestep_strategy == "ThermalOnly")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Running only the thermal LB step kernel. This makes only sense for benchmarking")
+            timeloop.add() << Sweep(thermal_LB_step.getSweep(defaultStream), "Thermal LB Step");
+
+         }
+         else if (timestep_strategy == "KernelOnly")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Running only the compute kernels without boundary conditions and communication. This makes only sense for benchmarking")
+            timeloop.add() << Sweep(phase_field_LB_step.getSweep(defaultStream), "Phase LB Step");
+            timeloop.add() << Sweep(hydro_LB_step.getSweep(defaultStream), "Hydro LB Step");
+            timeloop.add() << Sweep(thermal_LB_step.getSweep(defaultStream), "Thermal LB Step");
+         }
+         else
+         {
+            WALBERLA_ABORT_NO_DEBUG_INFO("Invalid value for 'timeStepStrategy'")
+         }
+
+      }
+      gpu::fieldCpy< GPUField, ScalarField_T >(blocks, phase_field_gpu, phase_field_ID);
+      gpu::fieldCpy< GPUField, VectorField_T >(blocks, vel_field_gpu, velocity_field_ID);
+      gpu::fieldCpy< GPUField, ScalarField_T >(blocks, temperature_field_gpu, temperature_field_ID);
+#else
+      ////////////////////////////
+      // TIMELOOP THERMAL ONLY //
+      //////////////////////////
+
+      SweepTimeloop thermalTimeloop(blocks->getBlockStorage(), thermal_timesteps);
+      if(heat_solver_RK_or_LBM)
+      {
+         if (orderRKSolver == 2)
+         {
+            thermalTimeloop.add() << Sweep(initRK2.getSweep(), "initialise RK");
+            thermalTimeloop.add() << Sweep(RK2FirstStage.getSweep(), "Runge Kutta step one");
+            thermalTimeloop.add() << Sweep(RK2SecondStage.getSweep(), "Runge Kutta step two")
+                                  << AfterFunction(Comm_temperature_field, "Temperature field Communication");
+         }
+         else
+         {
+            thermalTimeloop.add() << Sweep(initRK4.getSweep(), "initialise RK");
+            thermalTimeloop.add() << Sweep(RK4FirstStage.getSweep(), "Runge Kutta step one");
+            thermalTimeloop.add() << Sweep(RK4SecondStage.getSweep(), "Runge Kutta step two");
+            thermalTimeloop.add() << Sweep(RK4ThirdStage.getSweep(), "Runge Kutta step three");
+            thermalTimeloop.add() << Sweep(RK4FourthStage.getSweep(), "Runge Kutta step four")
+                                  << AfterFunction(Comm_temperature_field, "Temperature field Communication");
+         }
+      }
+      else
+      {
+         thermalTimeloop.add() << BeforeFunction(Comm_thermal_distributions, "Thermal PDFs Communication")
+                               << Sweep(thermal_LB_Neumann.getSweep(), "Neumann Thermal");
+         thermalTimeloop.add() << Sweep(thermal_LB_DiffusionStatic.getSweep(), "Static Diffusion Thermal");
+         thermalTimeloop.add() << Sweep(thermal_LB_DiffusionDynamic.getSweep(), "Dynamic Diffusion Thermal");
+         thermalTimeloop.add() << Sweep(thermal_LB_step.getSweep(), "Thermal LB Step");
+      }
+
+      ////////////////
+      // TIME LOOP //
+      //////////////
+
+      SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps);
+      if(heat_solver_RK_or_LBM)
+      {
+         if (timestep_strategy == "NormalTimestep")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Normal time step for production runs and full application benchmarks")
+            timeloop.add() << BeforeFunction(Comm_velocity_based_distributions_start, "Start Hydro PDFs Communication")
+                           << BeforeFunction(Comm_temperature_field_start, "Start Communication temperature field")
+                           << Sweep(phase_field_LB_NoSlip.getSweep(), "NoSlip Phase");
+            timeloop.add() << Sweep(phase_field_LB_step.getSweep(), "Phase LB Step")
+                           << AfterFunction(Comm_velocity_based_distributions_wait, "Wait Hydro PDFs Communication")
+                           << AfterFunction(Comm_temperature_field_wait, "Wait Communication temperature field");
+
+            timeloop.add() << BeforeFunction(Comm_phase_field_distributions_start, "Start Phase PDFs Communication")
+                           << Sweep(hydro_LB_UBB.getSweep(), "UBB Hydro");
+            timeloop.add() << Sweep(hydro_LB_NoSlip.getSweep(), "NoSlip Hydro");
+            timeloop.add() << Sweep(hydro_LB_step.getSweep(), "Hydro LB Step");
+            timeloop.add() << Sweep(swapPhaseField, "Swap PhaseField");
+            timeloop.add() << Sweep(contact_angle.getSweep(), "Contact Angle")
+                           << AfterFunction(Comm_phase_field_distributions_wait, "Wait Phase PDFs Communication")
+                           << AfterFunction(Comm_phase_field, "Communication Phase Field");
+
+            if(orderRKSolver == 2)
+            {
+               timeloop.add() << BeforeFunction(Comm_phase_field, "Communication Phase Field")
+                              << Sweep(RK2FirstStage.getSweep(), "Runge Kutta step one");
+               timeloop.add() << Sweep(RK2SecondStage.getSweep(), "Runge Kutta step two");
+            }
+            else
+            {
+               timeloop.add() << BeforeFunction(Comm_phase_field, "Communication Phase Field")
+                              << Sweep(RK4FirstStage.getSweep(), "Runge Kutta step one");
+               timeloop.add() << Sweep(RK4SecondStage.getSweep(), "Runge Kutta step two");
+               timeloop.add() << Sweep(RK4ThirdStage.getSweep(), "Runge Kutta step three");
+               timeloop.add() << Sweep(RK4FourthStage.getSweep(), "Runge Kutta step four");
+
+            }
+         }
+         else if (timestep_strategy == "PhaseOnly")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Running only the Allen Cahn LB kernel. This makes only sense for benchmarking")
+            timeloop.add() << Sweep(phase_field_LB_step.getSweep(), "Phase LB Step");
+         }
+         else if (timestep_strategy == "HydroOnly")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Running only the hydrodynamic LB kernel. This makes only sense for benchmarking")
+            timeloop.add() << Sweep(hydro_LB_step.getSweep(), "Hydro LB Step");
+         }
+         else if (timestep_strategy == "ThermalOnly")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Running only the RK kernels. This makes only sense for benchmarking")
+            if(orderRKSolver == 2)
+            {
+               timeloop.add() << Sweep(RK2FirstStage.getSweep(), "Runge Kutta step one");
+               timeloop.add() << Sweep(RK2SecondStage.getSweep(), "Runge Kutta step two");
+            }
+            else
+            {
+               timeloop.add() << Sweep(RK4FirstStage.getSweep(), "Runge Kutta step one");
+               timeloop.add() << Sweep(RK4SecondStage.getSweep(), "Runge Kutta step two");
+               timeloop.add() << Sweep(RK4ThirdStage.getSweep(), "Runge Kutta step three");
+               timeloop.add() << Sweep(RK4FourthStage.getSweep(), "Runge Kutta step four");
+
+            }
+         }
+         else if (timestep_strategy == "KernelOnly")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Running only the compute kernels without boundary conditions and communication. This makes only sense for benchmarking")
+            timeloop.add() << Sweep(phase_field_LB_step.getSweep(), "Phase LB Step");
+            timeloop.add() << Sweep(hydro_LB_step.getSweep(), "Hydro LB Step");
+            if(orderRKSolver == 2)
+            {
+               timeloop.add() << Sweep(RK2FirstStage.getSweep(), "Runge Kutta step one");
+               timeloop.add() << Sweep(RK2SecondStage.getSweep(), "Runge Kutta step two");
+            }
+            else
+            {
+               timeloop.add() << Sweep(RK4FirstStage.getSweep(), "Runge Kutta step one");
+               timeloop.add() << Sweep(RK4SecondStage.getSweep(), "Runge Kutta step two");
+               timeloop.add() << Sweep(RK4ThirdStage.getSweep(), "Runge Kutta step three");
+               timeloop.add() << Sweep(RK4FourthStage.getSweep(), "Runge Kutta step four");
+
+            }
+         }
+         else
+         {
+            WALBERLA_ABORT_NO_DEBUG_INFO("Invalid value for 'timeStepStrategy'")
+         }
+      }
+      else
+      {
+         if (timestep_strategy == "NormalTimestep")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Normal time step for production runs and full application benchmarks with LBM thermal solver")
+            timeloop.add() << BeforeFunction(Comm_thermal_distributions_start, "Start Thermal PDFs Communication")
+                           << BeforeFunction(Comm_temperature_field_start, "Start Communication temperature field")
+                           << Sweep(phase_field_LB_NoSlip.getSweep(), "NoSlip Phase");
+            timeloop.add() << Sweep(phase_field_LB_step.getSweep(), "Phase LB Step")
+                           << AfterFunction(Comm_thermal_distributions_wait, "Wait Thermal PDFs Communication")
+                           << AfterFunction(Comm_temperature_field_wait, "Wait Communication temperature field");
+
+            timeloop.add() << BeforeFunction(Comm_phase_field_distributions_start, "Start Phase PDFs Communication")
+                           << Sweep(hydro_LB_UBB.getSweep(), "UBB Hydro");
+            timeloop.add() << Sweep(hydro_LB_NoSlip.getSweep(), "NoSlip Hydro");
+            timeloop.add() << Sweep(hydro_LB_step.getSweep(), "Hydro LB Step");
+            timeloop.add() << Sweep(swapPhaseField, "Swap PhaseField");
+            timeloop.add() << Sweep(contact_angle.getSweep(), "Contact Angle")
+                           << AfterFunction(Comm_phase_field_distributions_wait, "Wait Phase PDFs Communication");
+
+            timeloop.add() << BeforeFunction(Comm_velocity_based_distributions_start, "Start Hydro PDFs Communication")
+                           << BeforeFunction(Comm_phase_field_start, "Start Communication Phase Field")
+                           << Sweep(thermal_LB_Neumann.getSweep(), "Neumann Thermal");
+            timeloop.add() << Sweep(thermal_LB_DiffusionStatic.getSweep(), "Static Diffusion Thermal");
+            timeloop.add() << Sweep(thermal_LB_DiffusionDynamic.getSweep(), "Dynamic Diffusion Thermal");
+            timeloop.add() << Sweep(thermal_LB_step.getSweep(), "Thermal LB Step")
+                           << AfterFunction(Comm_velocity_based_distributions_wait, "Wait Hydro PDFs Communication")
+                           << AfterFunction(Comm_phase_field_wait, "Wait Communication Phase Field");
+         }
+         else if (timestep_strategy == "PhaseOnly")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Running only the Allen Cahn LB kernel. This makes only sense for benchmarking")
+            timeloop.add() << Sweep(phase_field_LB_step.getSweep(), "Phase LB Step");
+         }
+         else if (timestep_strategy == "HydroOnly")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Running only the hydrodynamic LB kernel. This makes only sense for benchmarking")
+            timeloop.add() << Sweep(hydro_LB_step.getSweep(), "Hydro LB Step");
+         }
+         else if (timestep_strategy == "ThermalOnly")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Running only the thermal LB step kernel. This makes only sense for benchmarking")
+            timeloop.add() << Sweep(thermal_LB_step.getSweep(), "Thermal LB Step");
+
+         }
+         else if (timestep_strategy == "KernelOnly")
+         {
+            WALBERLA_LOG_INFO_ON_ROOT("Running only the compute kernels without boundary conditions and communication. This makes only sense for benchmarking")
+            timeloop.add() << Sweep(phase_field_LB_step.getSweep(), "Phase LB Step");
+            timeloop.add() << Sweep(hydro_LB_step.getSweep(), "Hydro LB Step");
+            timeloop.add() << Sweep(thermal_LB_step.getSweep(), "Thermal LB Step");
+         }
+         else
+         {
+            WALBERLA_ABORT_NO_DEBUG_INFO("Invalid value for 'timeStepStrategy'")
+         }
+
+      }
+#endif
+
+      WALBERLA_LOG_INFO_ON_ROOT("Initialisation of the PDFs")
+      for (auto& block : *blocks)
+      {
+         init_h(&block);
+         init_g(&block);
+         if(!heat_solver_RK_or_LBM)
+            init_f(&block);
+      }
+      Comm_phase_field_distributions();
+      WALBERLA_LOG_INFO_ON_ROOT("Initialisation of the PDFs done")
+
+      // remaining time logger, higher frequency than 0.5 seconds is not allowed
+      if (remaining_time_logger_frequency > 0.5)
+      {
+         timeloop.addFuncAfterTimeStep(
+            timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), remaining_time_logger_frequency),
+            "remaining time logger");
+
+         thermalTimeloop.addFuncAfterTimeStep(
+            timing::RemainingTimeLogger(thermalTimeloop.getNrOfTimeSteps(), remaining_time_logger_frequency),
+            "remaining time logger");
+      }
+
+      const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
+      const std::string vtkPath = parameters.getParameter<std::string>("vtkPath", "thermocapillary_vtk");
+      if (vtkWriteFrequency > 0)
+      {
+         auto vtkOutput   = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, vtkPath,
+                                                           "simulation_step", false, true, true, false, 0);
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+         vtkOutput->addBeforeFunction([&]() {
+            gpu::fieldCpy< ScalarField_T , GPUField >(blocks, phase_field_ID, phase_field_gpu);
+            gpu::fieldCpy< VectorField_T , GPUField >(blocks, velocity_field_ID, vel_field_gpu);
+            gpu::fieldCpy< ScalarField_T , GPUField >(blocks, temperature_field_ID, temperature_field_gpu);
+         });
+#endif
+
+         auto phaseWriter = make_shared< field::VTKWriter< ScalarField_T > >(phase_field_ID, "PhaseField");
+         vtkOutput->addCellDataWriter(phaseWriter);
+
+         auto velWriter = make_shared< field::VTKWriter< VectorField_T > >(velocity_field_ID, "Velocity");
+         vtkOutput->addCellDataWriter(velWriter);
+
+         auto tempWriter = make_shared< field::VTKWriter< ScalarField_T > >(temperature_field_ID, "Temperature");
+         vtkOutput->addCellDataWriter(tempWriter);
+
+         timeloop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
+      }
+
+      uint_t meshWriteFrequency = parameters.getParameter< uint_t >("meshWriteFrequency", 0);
+      const int targetRank          = 0;
+      int counter                   = 0;
+      if (meshWriteFrequency > 0)
+      {
+         timeloop.addFuncAfterTimeStep(
+            [&]() {
+               if (timeloop.getCurrentTimeStep() % uint_t(meshWriteFrequency) == 0)
+               {
+                  auto mesh = postprocessing::realFieldToSurfaceMesh< ScalarField_T >(blocks, phase_field_ID, 0.5, 0, true,
+                                                                                      targetRank, MPI_COMM_WORLD);
+                  WALBERLA_EXCLUSIVE_WORLD_SECTION(targetRank)
+                  {
+                     const std::string path = "./Meshes/Droplet_contact_angle_" + std::to_string(int32_c(angle));
+                     std::ostringstream out;
+                     out << std::internal << std::setfill('0') << std::setw(6) << counter;
+                     geometry::writeMesh(path + "_" + out.str() + ".obj", *mesh);
+                     counter++;
+                  }
+               }
+            },
+            "Mesh writer");
+      }
+
+
+      const uint_t dbWriteFrequency = parameters.getParameter< uint_t >("dbWriteFrequency", 0);
+      if (dbWriteFrequency > 0)
+      {
+         timeloop.addFuncAfterTimeStep(
+            [&]() {
+               if (timeloop.getCurrentTimeStep() % dbWriteFrequency == 0)
+               {
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+                  gpu::fieldCpy< ScalarField_T , GPUField >(blocks, phase_field_ID, phase_field_gpu);
+                  gpu::fieldCpy< VectorField_T , GPUField >(blocks, velocity_field_ID, vel_field_gpu);
+                  gpu::fieldCpy< ScalarField_T , GPUField >(blocks, temperature_field_ID, temperature_field_gpu);
+                  WALBERLA_GPU_CHECK(gpuPeekAtLastError())
+#endif
+                  python_coupling::PythonCallback callback("at_end_of_time_step");
+                  if (callback.isCallable())
+                  {
+                     callback.data().exposeValue("blocks", blocks);
+                     callback.data().exposeValue("timeStep", timeloop.getCurrentTimeStep());
+                     callback.data().exposeValue("stencil_phase", StencilNamePhase);
+                     callback.data().exposeValue("stencil_hydro", StencilNameHydro);
+                     callback.data().exposeValue("stencil_thermal", StencilNameThermal);
+                     callback.data().exposeValue("collision_space_phase", CollisionSpacePhase);
+                     callback.data().exposeValue("collision_space_hydro", CollisionSpaceHydro);
+                     callback.data().exposeValue("collision_space_thermal", CollisionSpaceThermal);
+                     callback.data().exposeValue("field_type", fieldType);
+                     callback.data().exposeValue("field_type_pdfs", fieldTypePDFs);
+                     callback();
+                  }
+               }
+            },
+            "Python callback");
+      }
+
+      const lbm::PerformanceEvaluation< FlagField_T > performance_evaluation( blocks, flag_field_hydrodynamic_LB_ID, fluidFlagUID );
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+      WALBERLA_GPU_CHECK(gpuPeekAtLastError())
+      WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+#endif
+      WALBERLA_LOG_INFO_ON_ROOT("Running "
+                                << thermal_timesteps
+                                << " timesteps with only the thermal LB step to initialise the temperature field")
+      thermalTimeloop.run();
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+      WALBERLA_GPU_CHECK(gpuPeekAtLastError())
+      WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+#endif
+      WALBERLA_MPI_WORLD_BARRIER()
+      WALBERLA_LOG_INFO_ON_ROOT("Initialised temperature field")
+
+      WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps")
+
+      WcTimingPool timeloopTiming;
+      WcTimer simTimer;
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+      WALBERLA_GPU_CHECK(gpuPeekAtLastError())
+      WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+#endif
+      simTimer.start();
+      timeloop.run(timeloopTiming);
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+      WALBERLA_GPU_CHECK(gpuPeekAtLastError())
+      WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+#endif
+      simTimer.end();
+
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+      WALBERLA_GPU_CHECK(gpuPeekAtLastError())
+      WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+#endif
+      WALBERLA_MPI_WORLD_BARRIER()
+      WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
+
+      real_t time = real_c(simTimer.max());
+      WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); }
+      performance_evaluation.logResultOnRoot( timesteps, time );
+
+      const auto reducedTimeloopTiming = timeloopTiming.getReduced();
+      WALBERLA_LOG_RESULT_ON_ROOT( "Time loop timing:\n" << *reducedTimeloopTiming )
+
+      if(benchmark)
+      {
+         WALBERLA_ROOT_SECTION()
+         {
+            python_coupling::PythonCallback callback("write_benchmark_results");
+            if (callback.isCallable())
+            {
+               callback.data().exposeValue("stencil_phase", StencilNamePhase);
+               callback.data().exposeValue("stencil_hydro", StencilNameHydro);
+               callback.data().exposeValue("stencil_thermal", StencilNameThermal);
+               callback.data().exposeValue("collision_space_phase", CollisionSpacePhase);
+               callback.data().exposeValue("collision_space_hydro", CollisionSpaceHydro);
+               callback.data().exposeValue("collision_space_thermal", CollisionSpaceThermal);
+               callback.data().exposeValue("field_type", fieldType);
+               callback.data().exposeValue("field_type_pdfs", fieldTypePDFs);
+               callback.data().exposeValue("number_of_processes", totalNumProcesses);
+               callback.data().exposeValue("threads", performance_evaluation.threads());
+               callback.data().exposeValue("MLUPS", double_c(performance_evaluation.mlups(timesteps, time)));
+               callback.data().exposeValue("MLUPS_process",
+                                           double_c(performance_evaluation.mlupsPerProcess(timesteps, time)));
+               callback.data().exposeValue(
+                  "timeStepsPerSecond",
+                  double_c(lbm::PerformanceEvaluation< FlagField_T >::timeStepsPerSecond(timesteps, time)));
+
+               callback();
+            }
+         }
+      }
+   }
+   return EXIT_SUCCESS;
+}
diff --git a/apps/showcases/Thermocapillary/thermocapillary_codegen.py b/apps/showcases/Thermocapillary/thermocapillary_codegen.py
new file mode 100644
index 0000000000000000000000000000000000000000..1930f0019a19c139c8432a34794553e389799a4f
--- /dev/null
+++ b/apps/showcases/Thermocapillary/thermocapillary_codegen.py
@@ -0,0 +1,409 @@
+import logging
+import numpy as np
+import sympy as sp
+
+from pystencils import Assignment, fields, Target
+from pystencils.astnodes import LoopOverCoordinate
+from pystencils.typing import TypedSymbol
+from pystencils.simp import (add_subexpressions_for_field_reads, sympy_cse, insert_aliases, insert_constants,
+                             insert_symbol_times_minus_one, insert_squares,
+                             insert_constant_multiples, insert_constant_additions)
+
+from lbmpy import LBMConfig, LBMOptimisation, LBStencil, Method, Stencil, create_lb_update_rule
+from lbmpy.creationfunctions import create_lb_method
+from lbmpy.boundaries import DiffusionDirichlet, NeumannByCopy, NoSlip, UBB
+from lbmpy.macroscopic_value_kernels import pdf_initialization_assignments
+
+from lbmpy.phasefield_allen_cahn.contact_angle import ContactAngle
+from lbmpy.phasefield_allen_cahn.kernel_equations import initializer_kernel_phase_field_lb, \
+    initializer_kernel_hydro_lb, interface_tracking_force, hydrodynamic_force, add_interface_tracking_force, \
+    add_hydrodynamic_force, hydrodynamic_force_assignments
+from lbmpy.phasefield_allen_cahn.numerical_solver import get_runge_kutta_update_assignments
+from lbmpy.phasefield_allen_cahn.parameter_calculation import ThermocapillaryParameters
+
+import pystencils_walberla
+from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_for_field, generate_info_header
+from lbmpy_walberla.additional_data_handler import DiffusionDirichletAdditionalDataHandler
+from lbmpy_walberla import generate_boundary, generate_lb_pack_info
+
+# to disable type conversion warnings
+logging.disable(logging.WARNING)
+
+one = sp.Number(1)
+two = sp.Number(2)
+three = sp.Number(3)
+half = sp.Rational(1, 2)
+
+
+with CodeGeneration() as ctx:
+    field_type = "float64"
+    field_type_pdfs = "float64"
+
+    subs = {sp.Symbol(f"xi_{ctr}"): TypedSymbol(f"xi_{ctr}", dtype=field_type) for ctr in range(0, 100)}
+
+    contact_angle_in_degrees = sp.Symbol("alpha")
+    generate_with_heat_source = False
+
+    stencil_phase = LBStencil(Stencil.D3Q19)
+    stencil_hydro = LBStencil(Stencil.D3Q19)
+    stencil_thermal = LBStencil(Stencil.D3Q7)
+    assert (stencil_phase.D == stencil_hydro.D == stencil_thermal.D)
+    full_stencil = LBStencil(Stencil.D2Q9) if stencil_phase.D == 2 else LBStencil(Stencil.D3Q27)
+
+    # In the codegeneration skript we only need the symbolic parameters
+    parameters = ThermocapillaryParameters(0, 0, 0, 0, 0, 0, 0)
+
+    ########################
+    # FIELDS #
+    ########################
+
+    # velocity field
+    u = fields(f"vel_field({stencil_hydro.D}): {field_type}[{stencil_hydro.D}D]", layout='fzyx')
+    # phase-field
+    C = fields(f"phase_field: {field_type}[{stencil_hydro.D}D]", layout='fzyx')
+    C_tmp = fields(f"phase_field_tmp: {field_type}[{stencil_hydro.D}D]", layout='fzyx')
+    # temperature field
+    temperature = fields(f"temperature_field: {field_type}[{stencil_hydro.D}D]", layout='fzyx')
+
+    # phase-field distribution functions
+    h = fields(f"lb_phase_field({stencil_phase.Q}): {field_type_pdfs}[{stencil_phase.D}D]", layout='fzyx')
+    h_tmp = fields(f"lb_phase_field_tmp({stencil_phase.Q}): {field_type_pdfs}[{stencil_phase.D}D]", layout='fzyx')
+    # hydrodynamic distribution functions
+    g = fields(f"lb_velocity_field({stencil_hydro.Q}): {field_type_pdfs}[{stencil_hydro.D}D]", layout='fzyx')
+    g_tmp = fields(f"lb_velocity_field_tmp({stencil_hydro.Q}): {field_type_pdfs}[{stencil_hydro.D}D]", layout='fzyx')
+    # thermal PDFs
+    f = fields(f"lb_thermal_field({stencil_thermal.Q}): {field_type_pdfs}[{stencil_hydro.D}D]", layout='fzyx')
+    f_tmp = fields(f"lb_thermal_field_tmp({stencil_thermal.Q}): {field_type_pdfs}[{stencil_hydro.D}D]", layout='fzyx')
+
+    RK1 = fields(f"Rk1: {field_type}[{stencil_hydro.D}D]", layout='fzyx')
+    RK2 = fields(f"Rk2: {field_type}[{stencil_hydro.D}D]", layout='fzyx')
+    RK3 = fields(f"Rk3: {field_type}[{stencil_hydro.D}D]", layout='fzyx')
+
+    ########################################
+    # RELAXATION RATES AND EXTERNAL FORCES #
+    ########################################
+    k_l = parameters.symbolic_heat_conductivity_light
+    k_h = parameters.symbolic_heat_conductivity_heavy
+
+    conductivity = k_l + C.center * (k_h - k_l)
+    relaxation_rate_thermal = one/(half + (three * conductivity))
+
+    # relaxation rate for interface tracking LB step
+    relaxation_rate_allen_cahn = one / (half + (three * parameters.symbolic_mobility))
+    # calculate the relaxation rate for hydrodynamic LB step
+    rho_L = parameters.symbolic_density_light
+    rho_H = parameters.symbolic_density_heavy
+    density = rho_L + C.center * (rho_H - rho_L)
+    # force acting on all phases of the model
+    body_force = [0, 0, 0]
+
+    omega = parameters.omega(C)
+
+    ###############
+    # LBM METHODS #
+    ###############
+
+    lbm_config_phase = LBMConfig(stencil=stencil_phase, method=Method.CENTRAL_MOMENT, compressible=True,
+                                 zero_centered=False,
+                                 force=sp.symbols(f"F_:{stencil_phase.D}"), velocity_input=u,
+                                 weighted=True, relaxation_rates=[relaxation_rate_allen_cahn, ] * stencil_phase.Q,
+                                 output={'density': C_tmp})
+    method_phase = create_lb_method(lbm_config=lbm_config_phase)
+
+    lbm_config_hydro = LBMConfig(stencil=stencil_hydro, method=Method.CENTRAL_MOMENT, compressible=False,
+                                 weighted=True, relaxation_rates=[omega, ] * stencil_hydro.Q,
+                                 force=sp.symbols(f"F_:{stencil_hydro.D}"),
+                                 output={'velocity': u})
+    method_hydro = create_lb_method(lbm_config=lbm_config_hydro)
+
+    config_thermal = LBMConfig(stencil=stencil_thermal, method=Method.CENTRAL_MOMENT, compressible=True,
+                               zero_centered=False,
+                               relaxation_rates=[relaxation_rate_thermal, ] * stencil_thermal.Q,
+                               velocity_input=u, output={'density': temperature})
+    method_thermal = create_lb_method(lbm_config=config_thermal)
+
+    # create the kernels for the initialization of the g and h field
+    h_updates = initializer_kernel_phase_field_lb(method_phase, C, u, h, parameters)
+    g_updates = initializer_kernel_hydro_lb(method_hydro, 1.0, u, g)
+    f_updates = pdf_initialization_assignments(method_thermal, temperature.center,
+                                               u.center_vector, f.center_vector)
+
+    force_h = interface_tracking_force(C, stencil_phase, parameters)
+    hydro_force = hydrodynamic_force(C, method_hydro, parameters, body_force, temperature_field=temperature)
+
+    heat_terms = list()
+    counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(stencil_thermal.D)]
+    if generate_with_heat_source:
+        block_offset = [TypedSymbol(f"block_offset_{i}", counter.dtype, nonnegative=True)
+                        for i, counter in enumerate(counters)]
+        ctr = [offset + counter for offset, counter in zip(counters, block_offset)]
+
+        heat_source_midpoint_one = [TypedSymbol(f"hm_one_{i}", counter.dtype, nonnegative=True)
+                                    for i, counter in enumerate(counters)]
+
+        heat_source_midpoint_two = [TypedSymbol(f"hm_two_{i}", counter.dtype, nonnegative=True)
+                                    for i, counter in enumerate(counters)]
+
+        ws = sp.Symbol("w_s")
+        size_diffused_hotspot = sp.Symbol("d_s")
+
+        maximum_heat_flux_one = sp.Symbol("qs_one")
+        maximum_heat_flux_two = sp.Symbol("qs_two")
+
+        nominator_one = sum([(c - hm)**2 for c, hm in zip(ctr, heat_source_midpoint_one)])
+        nominator_two = sum([(c - hm)**2 for c, hm in zip(ctr, heat_source_midpoint_two)])
+
+        term_one = maximum_heat_flux_one * sp.exp(-2 * nominator_one / (ws**2))
+        term_two = maximum_heat_flux_two * sp.exp(-2 * nominator_two / (ws**2))
+
+        cond_one = sp.Piecewise((term_one, nominator_one <= size_diffused_hotspot**2), (0.0, True))
+        cond_two = sp.Piecewise((term_two, nominator_two <= size_diffused_hotspot**2), (0.0, True))
+        heat_source = cond_one + cond_two
+
+        weights = method_thermal.weights
+
+        for i in range(stencil_thermal.Q):
+            heat_terms.append(weights[i] * heat_source)
+
+    ####################
+    # LBM UPDATE RULES #
+    ####################
+
+    lbm_optimisation = LBMOptimisation(symbolic_field=h, symbolic_temporary_field=h_tmp)
+    allen_cahn_update_rule = create_lb_update_rule(lbm_config=lbm_config_phase,
+                                                   lbm_optimisation=lbm_optimisation)
+
+    allen_cahn_update_rule = add_interface_tracking_force(allen_cahn_update_rule, force_h)
+    allen_cahn_update_rule = add_subexpressions_for_field_reads(allen_cahn_update_rule)
+    allen_cahn_update_rule = allen_cahn_update_rule.new_with_substitutions(substitutions=subs)
+    # ---------------------------------------------------------------------------------------------------------
+    force_Assignments = hydrodynamic_force_assignments(u, C, method_hydro, parameters, body_force, sub_iterations=1,
+                                                       temperature_field=temperature)
+
+    lbm_optimisation = LBMOptimisation(symbolic_field=g, symbolic_temporary_field=g_tmp)
+    hydro_lb_update_rule = create_lb_update_rule(lbm_config=lbm_config_hydro,
+                                                 lbm_optimisation=lbm_optimisation)
+
+    hydro_lb_update_rule = add_hydrodynamic_force(hydro_lb_update_rule, force_Assignments, C, g, parameters)
+    hydro_lb_update_rule = sympy_cse(hydro_lb_update_rule)
+    hydro_lb_update_rule = insert_aliases(hydro_lb_update_rule)
+    hydro_lb_update_rule = insert_constants(hydro_lb_update_rule)
+    hydro_lb_update_rule = insert_symbol_times_minus_one(hydro_lb_update_rule)
+    hydro_lb_update_rule = insert_squares(hydro_lb_update_rule)
+    hydro_lb_update_rule = insert_constant_multiples(hydro_lb_update_rule)
+    hydro_lb_update_rule = insert_constant_additions(hydro_lb_update_rule)
+    hydro_lb_update_rule = add_subexpressions_for_field_reads(hydro_lb_update_rule)
+    hydro_lb_update_rule = hydro_lb_update_rule.new_with_substitutions(substitutions=subs)
+    # ---------------------------------------------------------------------------------------------------------
+    lbm_optimisation = LBMOptimisation(symbolic_field=f, symbolic_temporary_field=f_tmp)
+    thermal_lb_update_rule = create_lb_update_rule(lbm_config=config_thermal,
+                                                   lbm_optimisation=lbm_optimisation)
+    thermal_lb_update_rule = add_subexpressions_for_field_reads(thermal_lb_update_rule)
+    thermal_lb_update_rule = thermal_lb_update_rule.new_with_substitutions(substitutions=subs)
+
+    main_assignments = thermal_lb_update_rule.main_assignments
+
+    if generate_with_heat_source:
+        for i in range(stencil_thermal.Q):
+            main_assignments[i] = Assignment(main_assignments[i].lhs, main_assignments[i].rhs + heat_terms[i])
+
+    init_rk2 = [Assignment(RK1.center, temperature.center)]
+
+    init_rk4 = [Assignment(RK1.center, temperature.center),
+                Assignment(RK2.center, temperature.center),
+                Assignment(RK3.center, temperature.center)]
+
+    rk2_assignments = get_runge_kutta_update_assignments(full_stencil, C, temperature, u, [RK1, ],
+                                                         conduction_h=k_h,
+                                                         conduction_l=k_l,
+                                                         heat_capacity_h=1,
+                                                         heat_capacity_l=1,
+                                                         density=density,
+                                                         stabiliser=1)
+
+    rk4_assignments = get_runge_kutta_update_assignments(full_stencil, C, temperature, u, [RK1, RK2, RK3],
+                                                         conduction_h=k_h,
+                                                         conduction_l=k_l,
+                                                         heat_capacity_h=1,
+                                                         heat_capacity_l=1,
+                                                         density=density,
+                                                         stabiliser=1)
+
+    contact_angle = ContactAngle(contact_angle_in_degrees, parameters.symbolic_interface_thickness,
+                                 data_type=field_type)
+
+    ###################
+    # GENERATE SWEEPS #
+    ###################
+
+    if ctx.gpu:
+        target = Target.GPU
+        openmp = False
+        cpu_vec = None
+        vp = [('int32_t', 'cudaBlockSize0'),
+              ('int32_t', 'cudaBlockSize1'),
+              ('int32_t', 'cudaBlockSize2')]
+
+        sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32),
+                            TypedSymbol("cudaBlockSize1", np.int32),
+                            TypedSymbol("cudaBlockSize2", np.int32))
+        sweep_params = {'block_size': sweep_block_size}
+    else:
+        if ctx.optimize_for_localhost:
+            cpu_vec = {"instruction_set": None}
+        else:
+            cpu_vec = None
+
+        openmp = True if ctx.openmp else False
+
+        target = Target.CPU
+        sweep_params = {}
+        vp = []
+
+    comm_stencil_phase = LBStencil(Stencil.D3Q27) if stencil_phase == LBStencil(Stencil.D3Q15) else stencil_phase
+    comm_stencil_hydro = LBStencil(Stencil.D3Q27) if stencil_hydro == LBStencil(Stencil.D3Q15) else stencil_hydro
+    comm_stencil_thermal = LBStencil(Stencil.D3Q27) if stencil_thermal == LBStencil(Stencil.D3Q15) else stencil_thermal
+
+    stencil_typedefs = {'Stencil_phase_T': stencil_phase, 'CommStencil_phase_T': comm_stencil_phase,
+                        'Stencil_hydro_T': stencil_hydro, 'CommStencil_hydro_T': comm_stencil_hydro,
+                        'Stencil_thermal_T': stencil_thermal, 'CommStencil_thermal_T': comm_stencil_thermal,
+                        'Full_Stencil_T': full_stencil}
+    field_typedefs = {'PdfField_phase_T': h,
+                      'PdfField_hydro_T': g,
+                      'PdfField_thermal_T': f,
+                      'VectorField_T': u,
+                      'ScalarField_T': C}
+
+    additional_code = f"""
+const char * StencilNamePhase = "{stencil_phase.name}";
+const char * StencilNameHydro = "{stencil_hydro.name}";
+const char * StencilNameThermal = "{stencil_thermal.name}";
+const char * CollisionSpacePhase = "{lbm_config_phase.method.name.lower()}";
+const char * CollisionSpaceHydro = "{lbm_config_hydro.method.name.lower()}";
+const char * CollisionSpaceThermal = "{config_thermal.method.name.lower()}";
+
+const char * fieldType = "{field_type}";
+const char * fieldTypePDFs = "{field_type_pdfs}";
+#if defined(WALBERLA_BUILD_WITH_GPU_SUPPORT)
+using GPUFieldPDFs = {"walberla::gpu::GPUField< float >" 
+    if field_type_pdfs == "float32" else "walberla::gpu::GPUField< double >"} ;
+using GPUField = walberla::gpu::GPUField< walberla::real_t >;
+#endif
+
+{"#define GENERATED_HEAT_SOURCE" if generate_with_heat_source else ""}
+
+#ifdef GENERATED_HEAT_SOURCE
+const bool GeneratedHeatSource = true;
+#else
+const bool GeneratedHeatSource = false;
+#endif
+"""
+
+    generate_sweep(ctx, 'initialize_phase_field_distributions', h_updates,
+                   target=target, cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
+    generate_sweep(ctx, 'initialize_velocity_based_distributions', g_updates,
+                   target=target, cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
+    generate_sweep(ctx, 'initialize_thermal_distributions', f_updates,
+                   target=target, cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
+
+    generate_sweep(ctx, 'phase_field_LB_step', allen_cahn_update_rule,
+                   field_swaps=[(h, h_tmp)],
+                   target=target,
+                   varying_parameters=vp, gpu_indexing_params=sweep_params,
+                   cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
+    generate_boundary(ctx, 'phase_field_LB_NoSlip', NoSlip(), method_phase,
+                      target=target, streaming_pattern='pull', cpu_openmp=openmp, data_type=field_type_pdfs)
+
+    generate_sweep(ctx, 'hydro_LB_step', hydro_lb_update_rule,
+                   field_swaps=[(g, g_tmp)],
+                   target=target,
+                   varying_parameters=vp, gpu_indexing_params=sweep_params,
+                   cpu_openmp=openmp, cpu_vectorize_info=cpu_vec, max_threads=256)
+
+    velocity_wall = tuple([TypedSymbol('w_x', dtype=field_type)] + [0.0] * (stencil_hydro.D - 1))
+    generate_boundary(ctx, 'hydro_LB_NoSlip', NoSlip(), method_hydro,
+                      target=target, streaming_pattern='pull', cpu_openmp=openmp, data_type=field_type_pdfs)
+    generate_boundary(ctx, 'hydro_LB_UBB', UBB(velocity_wall, data_type=field_type), method_hydro,
+                      target=target, streaming_pattern='pull', cpu_openmp=openmp, data_type=field_type_pdfs)
+
+    if generate_with_heat_source:
+        generate_sweep(ctx, 'thermal_LB_step', thermal_lb_update_rule,
+                       field_swaps=[(f, f_tmp)],
+                       target=target,
+                       varying_parameters=vp, gpu_indexing_params=sweep_params,
+                       cpu_openmp=openmp, cpu_vectorize_info=cpu_vec,
+                       block_offset=block_offset)
+    else:
+        generate_sweep(ctx, 'thermal_LB_step', thermal_lb_update_rule,
+                       field_swaps=[(f, f_tmp)],
+                       target=target,
+                       varying_parameters=vp, gpu_indexing_params=sweep_params,
+                       cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
+
+    generate_sweep(ctx, 'initialize_rk2', init_rk2, ghost_layers_to_include=1,
+                   target=target,
+                   varying_parameters=vp, gpu_indexing_params=sweep_params,
+                   cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
+    generate_sweep(ctx, 'rk2_first_stage', rk2_assignments[0],
+                   target=target,
+                   varying_parameters=vp, gpu_indexing_params=sweep_params,
+                   cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
+    generate_sweep(ctx, 'rk2_second_stage', rk2_assignments[1],
+                   target=target,
+                   varying_parameters=vp, gpu_indexing_params=sweep_params,
+                   cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
+
+    generate_sweep(ctx, 'initialize_rk4', init_rk4,
+                   target=target,
+                   varying_parameters=vp, gpu_indexing_params=sweep_params,
+                   cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
+    generate_sweep(ctx, 'rk4_first_stage', rk4_assignments[0],
+                   target=target,
+                   varying_parameters=vp, gpu_indexing_params=sweep_params,
+                   cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
+    generate_sweep(ctx, 'rk4_second_stage', rk4_assignments[1],
+                   target=target,
+                   varying_parameters=vp, gpu_indexing_params=sweep_params,
+                   cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
+    generate_sweep(ctx, 'rk4_third_stage', rk4_assignments[2],
+                   target=target,
+                   varying_parameters=vp, gpu_indexing_params=sweep_params,
+                   cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
+    generate_sweep(ctx, 'rk4_fourth_stage', rk4_assignments[3],
+                   target=target,
+                   varying_parameters=vp, gpu_indexing_params=sweep_params,
+                   cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
+
+    dirichlet_bc_static = DiffusionDirichlet(TypedSymbol('T_c', dtype=field_type), u, data_type=field_type)
+    generate_boundary(ctx, 'thermal_LB_DiffusionDirichlet_static', dirichlet_bc_static, method_thermal,
+                      target=target, streaming_pattern='pull', cpu_openmp=openmp, data_type=field_type_pdfs)
+
+    dirichlet_bc_dynamic = DiffusionDirichlet(lambda *args: None, u, data_type=field_type)
+    diffusion_data_handler = DiffusionDirichletAdditionalDataHandler(stencil_thermal, dirichlet_bc_dynamic)
+    generate_boundary(ctx, 'thermal_LB_DiffusionDirichlet_dynamic', dirichlet_bc_dynamic, method_thermal,
+                      additional_data_handler=diffusion_data_handler,
+                      target=target, streaming_pattern='pull', cpu_openmp=openmp, data_type=field_type_pdfs)
+
+    generate_boundary(ctx, 'thermal_LB_NeumannByCopy', NeumannByCopy(), method_thermal,
+                      target=target, streaming_pattern='pull', cpu_openmp=openmp, data_type=field_type_pdfs)
+
+    # communication
+
+    generate_lb_pack_info(ctx, 'PackInfo_phase_field_distributions', stencil_phase, h,
+                          streaming_pattern='pull', target=target)
+
+    generate_lb_pack_info(ctx, 'PackInfo_velocity_based_distributions', stencil_hydro, g,
+                          streaming_pattern='pull', target=target)
+
+    generate_lb_pack_info(ctx, 'PackInfo_thermal_field_distributions', stencil_thermal, f,
+                          streaming_pattern='pull', target=target)
+
+    generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target=target)
+    generate_pack_info_for_field(ctx, 'PackInfo_temperature_field', temperature, target=target)
+
+    pystencils_walberla.boundary.generate_boundary(ctx, 'ContactAngle', contact_angle,
+                                                   C.name, stencil_hydro, index_shape=[], target=target)
+
+    generate_info_header(ctx, 'GenDefines', stencil_typedefs=stencil_typedefs, field_typedefs=field_typedefs,
+                         additional_code=additional_code)
+
diff --git a/python/lbmpy_walberla/additional_data_handler.py b/python/lbmpy_walberla/additional_data_handler.py
index efd86fe92043777b69620b99d138fc1331a98580..770557d14f34fac81e77ab2f8e48d667d10e89ae 100644
--- a/python/lbmpy_walberla/additional_data_handler.py
+++ b/python/lbmpy_walberla/additional_data_handler.py
@@ -9,7 +9,7 @@ try:
 except ImportError:
     from lbmpy.custom_code_nodes import MirroredStencilDirections
 from lbmpy.boundaries.boundaryconditions import LbBoundary
-from lbmpy.boundaries import ExtrapolationOutflow, FreeSlip, UBB
+from lbmpy.boundaries import ExtrapolationOutflow, FreeSlip, UBB, DiffusionDirichlet
 
 from pystencils_walberla.additional_data_handler import AdditionalDataHandler
 
@@ -228,3 +228,38 @@ class OutflowAdditionalDataHandler(AdditionalDataHandler):
         result['pdf_nd'] = ', '.join(pos)
 
         return result
+
+
+class DiffusionDirichletAdditionalDataHandler(AdditionalDataHandler):
+    def __init__(self, stencil, boundary_object):
+        assert isinstance(boundary_object, DiffusionDirichlet)
+        self._boundary_object = boundary_object
+        super(DiffusionDirichletAdditionalDataHandler, self).__init__(stencil=stencil)
+
+    @property
+    def constructor_arguments(self):
+        return ", std::function<real_t(const Cell &, const shared_ptr<StructuredBlockForest>&, IBlock&)>& " \
+               "diffusionCallback "
+
+    @property
+    def initialiser_list(self):
+        return "elementInitaliser(diffusionCallback),"
+
+    @property
+    def additional_arguments_for_fill_function(self):
+        return "blocks, "
+
+    @property
+    def additional_parameters_for_fill_function(self):
+        return " const shared_ptr<StructuredBlockForest> &blocks, "
+
+    def data_initialisation(self, *_):
+        init_list = ["real_t InitialisatonAdditionalData = elementInitaliser(Cell(it.x(), it.y(), it.z()), "
+                     "blocks, *block);", "element.concentration = InitialisatonAdditionalData;"]
+
+        return "\n".join(init_list)
+
+    @property
+    def additional_member_variable(self):
+        return "std::function<real_t(const Cell &, const shared_ptr<StructuredBlockForest>&, IBlock&)> " \
+               "elementInitaliser; "
diff --git a/python/lbmpy_walberla/alternating_sweeps.py b/python/lbmpy_walberla/alternating_sweeps.py
index 444a2000adb65c3ad66bfc028f7bedcab4e60896..cc22d974e0279297eaa3e1aa796b8a40b4ecdcfa 100644
--- a/python/lbmpy_walberla/alternating_sweeps.py
+++ b/python/lbmpy_walberla/alternating_sweeps.py
@@ -67,6 +67,7 @@ def generate_alternating_lbm_sweep(generation_context, class_name, collision_rul
                                    inner_outer_split=False, ghost_layers_to_include=0,
                                    target=Target.CPU, data_type=None,
                                    cpu_openmp=None, cpu_vectorize_info=None, max_threads=None,
+                                   block_offset=False,
                                    **kernel_parameters):
     """Generates an Alternating lattice Boltzmann sweep class. This is in particular meant for
     in-place streaming patterns, but can of course also be used with two-fields patterns (why make it
@@ -91,6 +92,8 @@ def generate_alternating_lbm_sweep(generation_context, class_name, collision_rul
         cpu_openmp: if loops should use openMP or not.
         cpu_vectorize_info: dictionary containing necessary information for the usage of a SIMD instruction set.
         max_threads: only relevant for GPU kernels. Will be argument of `__launch_bounds__`.
+        block_offset: A tuple of TypedSymbols that will function as internal variable to store
+                      storage.getBlockCellBB(block).min())
         kernel_parameters: other parameters passed to the creation of a pystencils.CreateKernelConfig
     """
     config = config_from_context(generation_context, target=target, data_type=data_type, cpu_openmp=cpu_openmp,
@@ -124,4 +127,5 @@ def generate_alternating_lbm_sweep(generation_context, class_name, collision_rul
                              target=target, namespace=namespace,
                              field_swaps=field_swaps, varying_parameters=varying_parameters,
                              inner_outer_split=inner_outer_split, ghost_layers_to_include=ghost_layers_to_include,
-                             cpu_vectorize_info=vec_info, cpu_openmp=openmp, max_threads=max_threads)
+                             cpu_vectorize_info=vec_info, cpu_openmp=openmp, max_threads=max_threads,
+                             block_offset=block_offset)
diff --git a/python/pystencils_walberla/cmake_integration.py b/python/pystencils_walberla/cmake_integration.py
index 4d5654c08a1474b53852f643c2cf4249a12901db..a4970c6b0f290b419a7061caa83c2a4e6d231160 100644
--- a/python/pystencils_walberla/cmake_integration.py
+++ b/python/pystencils_walberla/cmake_integration.py
@@ -55,7 +55,7 @@ class CodeGeneration:
             written = set(os.path.realpath(f) for f in self.context.files_written)
             only_in_cmake = expected - written
             only_generated = written - expected
-            error_message = "Generated files (OUT_FILES) specified not correctly" \
+            error_message = "Generated files (OUT_FILES) specified not correctly " \
                             + "in cmake with 'waLBerla_generate_target_from_python'\n"
             if only_in_cmake:
                 error_message += "Files only specified in CMake {}\n".format(
diff --git a/python/pystencils_walberla/sweep.py b/python/pystencils_walberla/sweep.py
index c404618b21f79b0fcb5efb5a7d93a49ac327f86d..f6e190fded98f6063d5ccad4043daa34d3cb21da 100644
--- a/python/pystencils_walberla/sweep.py
+++ b/python/pystencils_walberla/sweep.py
@@ -1,10 +1,11 @@
+from collections.abc import Iterable
 from typing import Callable, Sequence
-
 from jinja2 import Environment, PackageLoader, StrictUndefined
 
 from pystencils import Target, Assignment
 from pystencils import Field, create_kernel, create_staggered_kernel
 from pystencils.astnodes import KernelFunction
+from pystencils.typing import numpy_name_to_c
 
 from pystencils_walberla.cmake_integration import CodeGenerationContext
 from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
@@ -16,6 +17,7 @@ def generate_sweep(generation_context: CodeGenerationContext, class_name: str, a
                    namespace: str = 'pystencils', field_swaps=(), staggered=False, varying_parameters=(),
                    inner_outer_split=False, ghost_layers_to_include=0,
                    target=Target.CPU, data_type=None, cpu_openmp=None, cpu_vectorize_info=None, max_threads=None,
+                   block_offset=False,
                    **create_kernel_params):
     """Generates a waLBerla sweep from a pystencils representation.
 
@@ -46,6 +48,8 @@ def generate_sweep(generation_context: CodeGenerationContext, class_name: str, a
         cpu_openmp: if loops should use openMP or not.
         cpu_vectorize_info: dictionary containing necessary information for the usage of a SIMD instruction set.
         max_threads: only relevant for GPU kernels. Will be argument of `__launch_bounds__`
+        block_offset: A tuple of TypedSymbols that will function as internal variable to store
+                      storage.getBlockCellBB(block).min())
         **create_kernel_params: remaining keyword arguments are passed to `pystencils.create_kernel`
     """
     if staggered:
@@ -70,13 +74,13 @@ def generate_sweep(generation_context: CodeGenerationContext, class_name: str, a
                              field_swaps=field_swaps, varying_parameters=varying_parameters,
                              inner_outer_split=inner_outer_split, ghost_layers_to_include=ghost_layers_to_include,
                              cpu_vectorize_info=config.cpu_vectorize_info,
-                             cpu_openmp=config.cpu_openmp, max_threads=max_threads)
+                             cpu_openmp=config.cpu_openmp, max_threads=max_threads, block_offset=block_offset)
 
 
 def generate_selective_sweep(generation_context, class_name, selection_tree, interface_mappings=(), target=None,
                              namespace='pystencils', field_swaps=(), varying_parameters=(),
                              inner_outer_split=False, ghost_layers_to_include=0,
-                             cpu_vectorize_info=None, cpu_openmp=False, max_threads=None):
+                             cpu_vectorize_info=None, cpu_openmp=False, max_threads=None, block_offset=False):
     """Generates a selective sweep from a kernel selection tree. A kernel selection tree consolidates multiple
     pystencils ASTs in a tree-like structure. See also module `pystencils_walberla.kernel_selection`.
 
@@ -94,7 +98,10 @@ def generate_selective_sweep(generation_context, class_name, selection_tree, int
         ghost_layers_to_include: see documentation of `generate_sweep`
         cpu_vectorize_info: Dictionary containing information about CPU vectorization applied to the kernels
         cpu_openmp: Whether or not CPU kernels use OpenMP parallelization
-        max_threads: only relevant for GPU kernels. Will be argument of `__launch_bounds__`
+        max_threads: only relevant for GPU kernels. Will be argument of `__launch_bounds__
+        block_offset: A tuple of TypedSymbols that will function as internal variable to store
+                      storage.getBlockCellBB(block).min())
+`
     """
     def to_name(f):
         return f.name if isinstance(f, Field) else f
@@ -121,6 +128,11 @@ def generate_selective_sweep(generation_context, class_name, selection_tree, int
 
     interface_spec = HighLevelInterfaceSpec(kernel_family.kernel_selection_parameters, interface_mappings)
 
+    parameters_to_ignore = None
+    if isinstance(block_offset, Iterable):
+        parameters_to_ignore = [b.name for b in block_offset]
+        block_offset = tuple((b.name, numpy_name_to_c(b.dtype.numpy_dtype.name)) for b in block_offset)
+
     jinja_context = {
         'kernel': kernel_family,
         'namespace': namespace,
@@ -133,7 +145,9 @@ def generate_selective_sweep(generation_context, class_name, selection_tree, int
         'generate_functor': True,
         'cpu_vectorize_info': cpu_vectorize_info,
         'cpu_openmp': cpu_openmp,
-        'max_threads': max_threads
+        'max_threads': max_threads,
+        'block_offset': block_offset,
+        'parameters_to_ignore': parameters_to_ignore
     }
     header = env.get_template("Sweep.tmpl.h").render(**jinja_context)
     source = env.get_template("Sweep.tmpl.cpp").render(**jinja_context)
diff --git a/python/pystencils_walberla/templates/GpuPackInfo.tmpl.cpp b/python/pystencils_walberla/templates/GpuPackInfo.tmpl.cpp
index 19b7b11ed507f8f068a3deb5908a1ca6fe867711..48440b5b4366204157b0486cadf5c14295dbfcc0 100644
--- a/python/pystencils_walberla/templates/GpuPackInfo.tmpl.cpp
+++ b/python/pystencils_walberla/templates/GpuPackInfo.tmpl.cpp
@@ -68,7 +68,7 @@ void {{class_name}}::pack(Direction dir, unsigned char * byte_buffer, IBlock * b
         {% endfor %}
 
         default:
-            WALBERLA_ASSERT(false);
+            return;
     }
 }
 
@@ -99,7 +99,7 @@ void {{class_name}}::unpack(Direction dir, unsigned char * byte_buffer, IBlock *
         {% endfor %}
 
         default:
-            WALBERLA_ASSERT(false);
+            return;
     }
 }
 
diff --git a/python/pystencils_walberla/templates/Sweep.tmpl.cpp b/python/pystencils_walberla/templates/Sweep.tmpl.cpp
index 8f3e14e59074a2f483fe14c5f85eb3e352c0a836..bc2c97147954a7d4a599d65e2cf328e3605affc9 100644
--- a/python/pystencils_walberla/templates/Sweep.tmpl.cpp
+++ b/python/pystencils_walberla/templates/Sweep.tmpl.cpp
@@ -57,6 +57,10 @@ namespace {{namespace}} {
 
 void {{class_name}}::run( {{- ["IBlock * block", kernel.kernel_selection_parameters, ["gpuStream_t stream"] if target == 'gpu' else []] | type_identifier_list -}} )
 {
+   {% if block_offset -%}
+   if (!this->configured_)
+      WALBERLA_ABORT("This Sweep contains a configure function that needs to be called manually")
+         {% endif %}
     {{kernel|generate_block_data_to_field_extraction|indent(4)}}
     {{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(4) }}
     {{kernel|generate_call(ghost_layers_to_include=ghost_layers_to_include, stream='stream')|indent(4)}}
@@ -70,6 +74,10 @@ void {{class_name}}::runOnCellInterval(
         | type_identifier_list -}}
 )
 {
+   {% if block_offset -%}
+   if (!this->configured_)
+      WALBERLA_ABORT("This Sweep contains a configure function that needs to be called manually")
+         {% endif %}
     CellInterval ci = globalCellInterval;
     CellInterval blockBB = blocks->getBlockCellBB( *block);
     blockBB.expand( ghostLayers );
diff --git a/python/pystencils_walberla/templates/Sweep.tmpl.h b/python/pystencils_walberla/templates/Sweep.tmpl.h
index e0b773ab1b1ab656a8db81ae10459d01b84766a9..f0f802288e4dd3a8b5835c7f5ad4960892ad6145 100644
--- a/python/pystencils_walberla/templates/Sweep.tmpl.h
+++ b/python/pystencils_walberla/templates/Sweep.tmpl.h
@@ -61,105 +61,122 @@ namespace {{namespace}} {
 class {{class_name}}
 {
 public:
-    {{class_name}}( {{kernel|generate_constructor_parameters}} {%if inner_outer_split%}, const Cell & outerWidth=Cell(1, 1, 1){% endif %})
-        : {{ kernel|generate_constructor_initializer_list }}{%if inner_outer_split%}{% if kernel|generate_constructor_initializer_list|length %},{% endif %} outerWidth_(outerWidth){% endif %}
-    {};
-
-    {{ kernel| generate_destructor(class_name) |indent(4) }}
-
-    void run( {{- ["IBlock * block", kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} );
-    
-    void runOnCellInterval(
-        {{- ["const shared_ptr<StructuredBlockStorage> & blocks", "const CellInterval & globalCellInterval", "cell_idx_t ghostLayers", "IBlock * block",
-             kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] 
-            | type_identifier_list -}}
-    );
-
-    {% if generate_functor %}
-    void operator() (
-        {{- ["IBlock * block", kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}
-    )
-    {
-        run( {{- ["block", kernel.kernel_selection_parameters, ["stream"] if target == 'gpu' else []] | identifier_list -}} );
-    }
-    {% endif %}
-
-    static std::function<void (IBlock *)> getSweep(
-        {{- ["const shared_ptr<" + class_name + "> & kernel", kernel.kernel_selection_parameters] | type_identifier_list -}}                                                  
-    )
-    {
-        return [ {{- [ ['kernel'], kernel.kernel_selection_parameters ] | identifier_list -}} ] 
-               (IBlock * b) 
-               { kernel->run( {{- [ ['b'], kernel.kernel_selection_parameters] | identifier_list -}} ); };
-    }
-
-    static std::function<void (IBlock* {%- if target is equalto 'gpu' %}, gpuStream_t {% endif -%} )> getSweepOnCellInterval(
-        {{- ["const shared_ptr<" + class_name + "> & kernel", "const shared_ptr<StructuredBlockStorage> & blocks", "const CellInterval & globalCellInterval",
-             kernel.kernel_selection_parameters, 'cell_idx_t ghostLayers=1']
-            | type_identifier_list -}}
-    )
-    {
-        return [ {{- ["kernel", "blocks", "globalCellInterval", "ghostLayers", kernel.kernel_selection_parameters] | identifier_list -}} ]
-               (IBlock * b{%- if target is equalto 'gpu'%}, gpuStream_t stream = nullptr{% endif -%}) 
-               { kernel->runOnCellInterval(
-                    {{- ["blocks", "globalCellInterval", "ghostLayers", "b", kernel.kernel_selection_parameters, ["stream"] if target == 'gpu' else []]
-                        | identifier_list 
-                    -}}
-                ); };
-    }
-
-    std::function<void (IBlock *)> getSweep( {{- [interface_spec.high_level_args, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
-    {
-        return [ {{- ["this", interface_spec.high_level_args, ["stream"] if target == 'gpu' else []] | identifier_list -}} ] 
-               (IBlock * b) 
-               { this->run( {{- [ ['b'], interface_spec.mapping_codes, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ); };
-    }
-
-    std::function<void (IBlock *)> getSweepOnCellInterval(
-        {{- ["const shared_ptr<StructuredBlockStorage> & blocks", "const CellInterval & globalCellInterval",
-             interface_spec.high_level_args, 'cell_idx_t ghostLayers=1', ["gpuStream_t stream = nullptr"] if target == 'gpu' else []]
-            | type_identifier_list -}}
-    )
-    {
-        return [ {{- ["this", "blocks", "globalCellInterval", "ghostLayers", interface_spec.high_level_args, ["stream"] if target == 'gpu' else []] | identifier_list -}} ]
-               (IBlock * b) 
-               { this->runOnCellInterval( {{- ["blocks", "globalCellInterval", "ghostLayers", "b", interface_spec.mapping_codes, ["stream"] if target == 'gpu' else []] | identifier_list -}} ); };
-    }
-
-{% if inner_outer_split %}
-    void inner( {{- ["IBlock * block", kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} );
-
-    void outer( {{- ["IBlock * block", kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} );
-
-    std::function<void (IBlock *)> getInnerSweep( {{- [interface_spec.high_level_args, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
-    {
-        return [ {{- [ ['this'], interface_spec.high_level_args, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ] 
-               (IBlock * b) 
-               { this->inner( {{- [ ['b'], interface_spec.mapping_codes, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ); };
-    }
-
-    std::function<void (IBlock *)> getOuterSweep( {{- [interface_spec.high_level_args, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
-    {
-        return [ {{- [ ['this'], interface_spec.high_level_args, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ] 
-               (IBlock * b) 
-               { this->outer( {{- [ ['b'], interface_spec.mapping_codes, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ); };
-    }
-
-    void setOuterPriority(int priority) 
-    {
-        {%if target is equalto 'gpu'%}
-        parallelStreams_.setStreamPriority(priority);
-        {%endif%}
-    }
-    {{kernel|generate_members|indent(4)}}
+   {{class_name}}( {{kernel|generate_constructor_parameters(parameters_to_ignore=parameters_to_ignore)}} {%if inner_outer_split%}, const Cell & outerWidth=Cell(1, 1, 1){% endif %})
+     : {{ kernel|generate_constructor_initializer_list(parameters_to_ignore=parameters_to_ignore) }}{%if inner_outer_split%}{% if kernel|generate_constructor_initializer_list|length %},{% endif %} outerWidth_(outerWidth){% endif %}{%if block_offset%}, configured_(false){% endif %}
+   {};
+
+   {{ kernel| generate_destructor(class_name) |indent(3) }}
+
+   void run( {{- ["IBlock * block", kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} );
+
+   void runOnCellInterval(
+   {{- ["const shared_ptr<StructuredBlockStorage> & blocks", "const CellInterval & globalCellInterval", "cell_idx_t ghostLayers", "IBlock * block",
+       kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []]
+      | type_identifier_list -}}
+   );
+
+   {% if generate_functor %}
+   void operator() ({{- ["IBlock * block", kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}})
+   {
+     run( {{- ["block", kernel.kernel_selection_parameters, ["stream"] if target == 'gpu' else []] | identifier_list -}} );
+   }
+   {% endif %}
+
+   static std::function<void (IBlock *)> getSweep({{- ["const shared_ptr<" + class_name + "> & kernel", kernel.kernel_selection_parameters] | type_identifier_list -}})
+   {
+     return [ {{- [ ['kernel'], kernel.kernel_selection_parameters ] | identifier_list -}} ]
+            (IBlock * b)
+            { kernel->run( {{- [ ['b'], kernel.kernel_selection_parameters] | identifier_list -}} ); };
+   }
+
+   static std::function<void (IBlock* {%- if target is equalto 'gpu' %}, gpuStream_t {% endif -%} )> getSweepOnCellInterval(
+     {{- ["const shared_ptr<" + class_name + "> & kernel", "const shared_ptr<StructuredBlockStorage> & blocks", "const CellInterval & globalCellInterval",
+          kernel.kernel_selection_parameters, 'cell_idx_t ghostLayers=1']
+         | type_identifier_list -}}
+   )
+   {
+     return [ {{- ["kernel", "blocks", "globalCellInterval", "ghostLayers", kernel.kernel_selection_parameters] | identifier_list -}} ]
+            (IBlock * b{%- if target is equalto 'gpu'%}, gpuStream_t stream = nullptr{% endif -%})
+            { kernel->runOnCellInterval(
+                 {{- ["blocks", "globalCellInterval", "ghostLayers", "b", kernel.kernel_selection_parameters, ["stream"] if target == 'gpu' else []]
+                     | identifier_list
+                 -}}
+             ); };
+   }
+
+   std::function<void (IBlock *)> getSweep( {{- [interface_spec.high_level_args, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
+   {
+     return [ {{- ["this", interface_spec.high_level_args, ["stream"] if target == 'gpu' else []] | identifier_list -}} ]
+            (IBlock * b)
+            { this->run( {{- [ ['b'], interface_spec.mapping_codes, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ); };
+   }
+
+   std::function<void (IBlock *)> getSweepOnCellInterval(
+     {{- ["const shared_ptr<StructuredBlockStorage> & blocks", "const CellInterval & globalCellInterval",
+          interface_spec.high_level_args, 'cell_idx_t ghostLayers=1', ["gpuStream_t stream = nullptr"] if target == 'gpu' else []]
+         | type_identifier_list -}}
+   )
+   {
+     return [ {{- ["this", "blocks", "globalCellInterval", "ghostLayers", interface_spec.high_level_args, ["stream"] if target == 'gpu' else []] | identifier_list -}} ]
+            (IBlock * b)
+            { this->runOnCellInterval( {{- ["blocks", "globalCellInterval", "ghostLayers", "b", interface_spec.mapping_codes, ["stream"] if target == 'gpu' else []] | identifier_list -}} ); };
+   }
+
+   {% if block_offset %}
+   void configure( const shared_ptr<StructuredBlockStorage> & blocks, IBlock * block )
+   {
+   Cell BlockCellBB = blocks->getBlockCellBB( *block).min();
+   {% for offset in block_offset -%}
+   {{offset[0]}}_ = {{offset[1]}}(BlockCellBB[{{loop.index0}}]);
+   {% endfor -%}
+   configured_ = true;
+   }
+   {% else %}
+   void configure( const shared_ptr<StructuredBlockStorage> & blocks, IBlock * block ){}
+   {% endif %}
+
+   {% if inner_outer_split %}
+   void inner( {{- ["IBlock * block", kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} );
+
+   void outer( {{- ["IBlock * block", kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} );
+
+   std::function<void (IBlock *)> getInnerSweep( {{- [interface_spec.high_level_args, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
+   {
+     return [ {{- [ ['this'], interface_spec.high_level_args, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ]
+            (IBlock * b)
+            { this->inner( {{- [ ['b'], interface_spec.mapping_codes, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ); };
+   }
+
+   std::function<void (IBlock *)> getOuterSweep( {{- [interface_spec.high_level_args, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
+   {
+     return [ {{- [ ['this'], interface_spec.high_level_args, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ]
+            (IBlock * b)
+            { this->outer( {{- [ ['b'], interface_spec.mapping_codes, ["stream"] if target == 'gpu' else [] ] | identifier_list -}} ); };
+   }
+
+   void setOuterPriority(int priority)
+   {
+     {%if target is equalto 'gpu'%}
+     parallelStreams_.setStreamPriority(priority);
+     {%endif%}
+   }
+
+   {{kernel|generate_members|indent(3)}}
 
 private:
-    {%if target is equalto 'gpu' -%} gpu::ParallelStreams parallelStreams_; {%- endif %}
 
-    Cell outerWidth_;
-    std::vector<CellInterval> layers_;
-{% else %}
-    {{ kernel|generate_members|indent(4) }}
+   {%if target is equalto 'gpu' %} gpu::ParallelStreams parallelStreams_; {% endif %}
+
+   Cell outerWidth_;
+   std::vector<CellInterval> layers_;
+   {% if block_offset -%}
+   bool configured_;
+   {% endif %}
+   {% else %}
+   {{ kernel|generate_members|indent(3) }}
+   {% if block_offset -%}
+   bool configured_;
+   {% endif %}
 {% endif %}
 };
 
diff --git a/src/core/timing/RemainingTimeLogger.h b/src/core/timing/RemainingTimeLogger.h
index a669f5a99852a3799ba3be802dc3d4755631d463..e6beecdcc691cc2c6ca76eee2bc7402840ab43fe 100644
--- a/src/core/timing/RemainingTimeLogger.h
+++ b/src/core/timing/RemainingTimeLogger.h
@@ -46,11 +46,12 @@ namespace timing
 class RemainingTimeLogger
 {
  public:
-   RemainingTimeLogger(const uint_t nrTimesteps, const real_t logIntervalInSec = 10, const int minOutputWidth = 8,
-                       const uint_t startTimestep = 0)
-      : logIntervalInSec_(logIntervalInSec), timestep_(startTimestep), nrTimesteps_(nrTimesteps),
-        minOutputWidth_(minOutputWidth)
-   { WALBERLA_UNUSED(minOutputWidth_); }
+   RemainingTimeLogger(const uint_t nrTimesteps, const real_t logIntervalInSec = 10,
+                       const int minOutputWidth = 8, const uint_t startTimestep = 0) :
+   logIntervalInSec_(logIntervalInSec), timestep_(startTimestep), nrTimesteps_(nrTimesteps), minOutputWidth_(minOutputWidth)
+   {
+      WALBERLA_UNUSED(minOutputWidth_);
+   }
 
    void operator()()
    {
@@ -80,7 +81,6 @@ class RemainingTimeLogger
             WALBERLA_LOG_INFO("Estimated Remaining Time: " << std::setw(minOutputWidth_) << std::right
                                                            << timing::timeToString(remainingTime));
          }
-
          timer_.start();
       }
    }
diff --git a/src/field/AddToStorage.h b/src/field/AddToStorage.h
index d1ef11d921e4e305ada51f590a0a1035be5d6ec5..14c7cd4a6e5951b13cfe277b4ebb5d8c2099a7ba 100644
--- a/src/field/AddToStorage.h
+++ b/src/field/AddToStorage.h
@@ -1,15 +1,15 @@
 //======================================================================================================================
 //
-//  This file is part of waLBerla. waLBerla is free software: you can 
+//  This file is part of waLBerla. waLBerla is free software: you can
 //  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
+//  License as published by the Free Software Foundation, either version 3 of
 //  the License, or (at your option) any later version.
-//  
-//  waLBerla is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 //  for more details.
-//  
+//
 //  You should have received a copy of the GNU General Public License along
 //  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
@@ -375,7 +375,7 @@ struct Creator< GhostLayerField_T,
          this->dataHandling_ = dataHandling;
       }
    }
-   
+
    Creator( const std::string & identifier = std::string(),
             const Set<SUID> & requiredSelectors = Set<SUID>::emptySet(),
             const Set<SUID> & incompatibleSelectors = Set<SUID>::emptySet(),
@@ -420,7 +420,7 @@ struct Creator< FlagField<T> > : public domain_decomposition::BlockDataCreator<
          this->dataHandling_ = dataHandling;
       }
    }
-   
+
    Creator( const std::string & identifier = std::string(),
             const Set<SUID> & requiredSelectors = Set<SUID>::emptySet(),
             const Set<SUID> & incompatibleSelectors = Set<SUID>::emptySet(),