diff --git a/AUTHORS.txt b/AUTHORS.txt
index ea429d78e53cf8ea4c0e8ee5f21521c6a760fdc7..fd6d6505b898fa66ff366a2b40b7bda45b0adc4e 100644
--- a/AUTHORS.txt
+++ b/AUTHORS.txt
@@ -1,6 +1,7 @@
 List of contributors
 ====================
 
+Cameron Stewart
 Christian Feichtinger
 Christian Godenschwager
 Christoph Rettinger
@@ -12,14 +13,20 @@ Dominik Bartuschat
 Ehsan Fattahi
 Felix Winterhalter
 Florian Schornbaum
+Helen Schottenhamml
 Jan Götz
+Jan Hönig
+João Victor Tozatti Risso
 Johannes Habich
 Klaus Iglberger
 Kristina Pickl
 Lorenz Hufnagel
+Lukas Werner
+Markus Holzer
 Martin Bauer
 Matthias Markl
 Michael Kuron
+Nils Kohl
 Paulo Carvalho
 Regina Ammer
 Sagar Dolas
@@ -27,7 +34,9 @@ Sebastian Eibl
 Silke Bergler
 Simon Bogner
 Stefan Donath
+Stephan Seitz
 Sunil Kontham
+Tobias Leemann
 Tobias Preclik
 Tobias Scharpff
 Tobias Schruff
diff --git a/apps/benchmarks/CMakeLists.txt b/apps/benchmarks/CMakeLists.txt
index 0e435719339d963c50a8eb26cdf0352071dac88e..5d392389249b53cabfa9fa9a0efcebe50ff89eda 100644
--- a/apps/benchmarks/CMakeLists.txt
+++ b/apps/benchmarks/CMakeLists.txt
@@ -14,9 +14,11 @@ add_subdirectory( PoiseuilleChannel )
 add_subdirectory( ProbeVsExtraMessage )
 add_subdirectory( SchaeferTurek )
 add_subdirectory( UniformGrid )
-if ( WALBERLA_BUILD_WITH_CODEGEN )
+if ( WALBERLA_BUILD_WITH_CODEGEN AND NOT WALBERLA_BUILD_WITH_CUDA )
 add_subdirectory( UniformGridGenerated )
+add_subdirectory( PhaseFieldAllenCahn )
 endif()
 if ( WALBERLA_BUILD_WITH_CUDA )
 add_subdirectory( UniformGridGPU )
-endif()
\ No newline at end of file
+add_subdirectory( PhaseFieldAllenCahn )
+endif()
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/CMakeLists.txt b/apps/benchmarks/PhaseFieldAllenCahn/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..629c9ec0a609c7312027d037f5504fc3a4d3ecb6
--- /dev/null
+++ b/apps/benchmarks/PhaseFieldAllenCahn/CMakeLists.txt
@@ -0,0 +1,35 @@
+waLBerla_link_files_to_builddir(*.prm)
+waLBerla_link_files_to_builddir(*.py)
+
+if (WALBERLA_BUILD_WITH_CUDA)
+    waLBerla_generate_target_from_python(NAME BenchmarkPhaseFieldCodeGenGPU
+            FILE multiphase_codegen.py
+            OUT_FILES initialize_phase_field_distributions.cu initialize_phase_field_distributions.h
+            initialize_velocity_based_distributions.cu initialize_velocity_based_distributions.h
+            phase_field_LB_step.cu phase_field_LB_step.h
+            hydro_LB_step.cu hydro_LB_step.h
+            PackInfo_phase_field_distributions.cu PackInfo_phase_field_distributions.h
+            PackInfo_phase_field.cu PackInfo_phase_field.h
+            PackInfo_velocity_based_distributions.cu PackInfo_velocity_based_distributions.h
+            GenDefines.h)
+
+    waLBerla_add_executable(NAME benchmark_multiphase
+            FILES benchmark_multiphase.cpp InitializerFunctions.cpp multiphase_codegen.py
+            DEPENDS blockforest core cuda field postprocessing lbm geometry timeloop gui BenchmarkPhaseFieldCodeGenGPU)
+else ()
+    waLBerla_generate_target_from_python(NAME BenchmarkPhaseFieldCodeGenCPU
+            FILE multiphase_codegen.py
+            OUT_FILES initialize_phase_field_distributions.cpp initialize_phase_field_distributions.h
+            initialize_velocity_based_distributions.cpp initialize_velocity_based_distributions.h
+            phase_field_LB_step.cpp phase_field_LB_step.h
+            hydro_LB_step.cpp hydro_LB_step.h
+            PackInfo_phase_field_distributions.cpp PackInfo_phase_field_distributions.h
+            PackInfo_phase_field.cpp PackInfo_phase_field.h
+            PackInfo_velocity_based_distributions.cpp PackInfo_velocity_based_distributions.h
+            GenDefines.h)
+
+    waLBerla_add_executable(NAME benchmark_multiphase
+            FILES benchmark_multiphase.cpp InitializerFunctions.cpp multiphase_codegen.py
+            DEPENDS blockforest core field postprocessing lbm geometry timeloop gui BenchmarkPhaseFieldCodeGenCPU)
+endif (WALBERLA_BUILD_WITH_CUDA)
+
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/InitializerFunctions.cpp b/apps/benchmarks/PhaseFieldAllenCahn/InitializerFunctions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..45e91e25a4e7190e16ef0fe53d78d6ed9d6cde89
--- /dev/null
+++ b/apps/benchmarks/PhaseFieldAllenCahn/InitializerFunctions.cpp
@@ -0,0 +1,73 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file InitializerFunctions.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/math/Constants.h"
+
+#include "field/FlagField.h"
+#include "field/communication/PackInfo.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "python_coupling/DictWrapper.h"
+
+namespace walberla
+{
+using PhaseField_T = GhostLayerField< real_t, 1 >;
+
+void initPhaseField_bubble(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID phaseFieldID,
+                           const real_t R                         = 10,
+                           const Vector3< real_t > bubbleMidPoint = Vector3< real_t >(0.0, 0.0, 0.0),
+                           const real_t W                         = 5)
+{
+   for (auto& block : *blocks)
+   {
+      auto phaseField = block.getData< PhaseField_T >(phaseFieldID);
+      // clang-format off
+      WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(phaseField, Cell globalCell;
+         blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
+         real_t Ri = sqrt((globalCell[0] - bubbleMidPoint[0]) * (globalCell[0] - bubbleMidPoint[0]) +
+                          (globalCell[1] - bubbleMidPoint[1]) * (globalCell[1] - bubbleMidPoint[1]) +
+                          (globalCell[2] - bubbleMidPoint[2]) * (globalCell[2] - bubbleMidPoint[2]));
+         phaseField->get(x, y, z) = 0.5 + 0.5 * tanh(2.0 * (Ri - R) / W);
+      )
+      // clang-format on
+   }
+}
+
+void initPhaseField_RTI(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID phaseFieldID,
+                        const real_t W = 5)
+{
+   auto X              = blocks->getDomainCellBB().xMax();
+   auto halfY          = (blocks->getDomainCellBB().yMax()) / 2.0;
+   double perturbation = 0.05;
+
+   for (auto& block : *blocks)
+   {
+      auto phaseField = block.getData< PhaseField_T >(phaseFieldID);
+      // clang-format off
+      WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(phaseField, Cell globalCell;
+         blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
+         real_t tmp = perturbation * X * (cos((2.0 * math::pi * globalCell[0]) / X) + cos((2.0 * math::pi * globalCell[2]) / X));
+         phaseField->get(x, y, z) = 0.5 + 0.5 * tanh(((globalCell[1] - halfY) - tmp) / (W / 2.0));
+      )
+      // clang-format on
+   }
+}
+} // namespace walberla
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/InitializerFunctions.h b/apps/benchmarks/PhaseFieldAllenCahn/InitializerFunctions.h
new file mode 100644
index 0000000000000000000000000000000000000000..4ba7896e77a3a901ab8b221e88e33aff1036d649
--- /dev/null
+++ b/apps/benchmarks/PhaseFieldAllenCahn/InitializerFunctions.h
@@ -0,0 +1,39 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file InitializerFunctions.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/math/Constants.h"
+
+#include "field/FlagField.h"
+#include "field/communication/PackInfo.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "python_coupling/DictWrapper.h"
+#pragma once
+
+namespace walberla
+{
+void initPhaseField_bubble(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID phaseFieldID, real_t R,
+                           Vector3< real_t > bubbleMidPoint, real_t W = 5);
+
+void initPhaseField_RTI(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID phaseFieldID, real_t W = 5);
+
+} // namespace walberla
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py b/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py
new file mode 100755
index 0000000000000000000000000000000000000000..2c447a3d88f3d331da8f4bc0fb7daa082584707e
--- /dev/null
+++ b/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py
@@ -0,0 +1,128 @@
+import os
+import waLBerla as wlb
+import pandas as pd
+
+from waLBerla.tools.sqlitedb import sequenceValuesToScalars
+
+import sys
+
+
+class Scenario:
+    def __init__(self, timeStepStrategy, overlappingWidth, cuda_block_size):
+        # output frequencies
+        self.vtkWriteFrequency = 0
+
+        # simulation parameters
+        self.timesteps = 101
+        edge_size = 32
+        self.cells = (edge_size, edge_size, edge_size)
+        self.blocks = (1, 1, 1)
+        self.periodic = (1, 1, 1)
+        self.size = (self.cells[0] * self.blocks[0],
+                     self.cells[1] * self.blocks[1],
+                     self.cells[2] * self.blocks[2])
+
+        self.timeStepStrategy = timeStepStrategy
+        self.overlappingWidth = overlappingWidth
+        self.cuda_block_size = cuda_block_size
+        self.warmupSteps = 10
+
+        # bubble parameters
+        self.bubbleRadius = min(self.size) // 4
+        self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2)
+
+        self.scenario = 1  # 1 rising bubble, 2 RTI
+        self.config_dict = self.config()
+
+        self.csv_file = "benchmark.csv"
+
+    @wlb.member_callback
+    def config(self):
+        return {
+            'DomainSetup': {
+                'blocks': self.blocks,
+                'cellsPerBlock': self.cells,
+                'periodic': self.periodic,
+            },
+            'Parameters': {
+                'timesteps': self.timesteps,
+                'vtkWriteFrequency': self.vtkWriteFrequency,
+                'useGui': 0,
+                'remainingTimeLoggerFrequency': -1,
+                'timeStepStrategy': self.timeStepStrategy,
+                'overlappingWidth': self.overlappingWidth,
+                'gpuBlockSize': self.cuda_block_size,
+                'warmupSteps': self.warmupSteps,
+                'scenario': self.scenario,
+            },
+            'Boundaries_GPU': {
+                'Border': []
+            },
+            'Boundaries_CPU': {
+                'Border': []
+            },
+            'Bubble': {
+                'bubbleMidPoint': self.bubbleMidPoint,
+                'bubbleRadius': self.bubbleRadius,
+            },
+        }
+
+    @wlb.member_callback
+    def results_callback(self, **kwargs):
+        data = {}
+        data.update(self.config_dict['Parameters'])
+        data.update(self.config_dict['DomainSetup'])
+        data.update(kwargs)
+        data['executable'] = sys.argv[0]
+        data['compile_flags'] = wlb.build_info.compiler_flags
+        data['walberla_version'] = wlb.build_info.version
+        data['build_machine'] = wlb.build_info.build_machine
+        sequenceValuesToScalars(data)
+
+        df = pd.DataFrame.from_records([data])
+        if not os.path.isfile(self.csv_file):
+            df.to_csv(self.csv_file, index=False)
+        else:
+            df.to_csv(self.csv_file, index=False, mode='a', header=False)
+
+
+def overlap_benchmark():
+    scenarios = wlb.ScenarioManager()
+    overlappingWidths = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1),
+                         (4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1),
+                         (4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32)]
+
+    cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1), (512, 1, 1),
+                   (32, 2, 1), (64, 2, 1), (128, 2, 1), (256, 2, 1),
+                   (32, 4, 1), (64, 4, 1), (128, 4, 1),
+                   (32, 4, 2), (64, 4, 2), (128, 2, 2),
+                   (32, 8, 1), (64, 8, 1), (64, 4, 2),
+                   (32, 16, 1), (16, 16, 1), (16, 16, 2)]
+
+    # no overlap
+    scenarios.add(Scenario(timeStepStrategy='normal', overlappingWidth=(1, 1, 1), cuda_block_size=(16, 16, 1)))
+
+    # overlap
+    for overlap_strategy in ['overlap']:
+        for overlappingWidth in overlappingWidths:
+            for cuda_block in cuda_blocks:
+                scenario = Scenario(timeStepStrategy=overlap_strategy,
+                                    overlappingWidth=overlappingWidth,
+                                    cuda_block_size=cuda_block)
+                scenarios.add(scenario)
+
+
+def kernel_benchmark():
+    scenarios = wlb.ScenarioManager()
+
+    # overlap
+    # for overlap_strategy in ['phase_only', 'hydro_only', 'kernel_only']:
+    for overlap_strategy in ['overlap']:
+        scenario = Scenario(timeStepStrategy=overlap_strategy,
+                            overlappingWidth=(1, 1, 1),
+                            cuda_block_size=(128, 1, 1))
+        scenarios.add(scenario)
+
+
+# overlap_benchmark()
+kernel_benchmark()
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/benchmark_cpu.py b/apps/benchmarks/PhaseFieldAllenCahn/benchmark_cpu.py
new file mode 100755
index 0000000000000000000000000000000000000000..e4fc118efaaaf6650ad0c756928f21796003bd2b
--- /dev/null
+++ b/apps/benchmarks/PhaseFieldAllenCahn/benchmark_cpu.py
@@ -0,0 +1,116 @@
+import os
+import waLBerla as wlb
+import pandas as pd
+
+from waLBerla.tools.sqlitedb import sequenceValuesToScalars
+
+import sys
+
+
+class Scenario:
+    def __init__(self, timeStepStrategy, overlappingWidth):
+        # output frequencies
+        self.vtkWriteFrequency = -1
+
+        # simulation parameters
+        self.timesteps = 500
+        edge_size = 50
+        self.cells = (edge_size, edge_size, edge_size)
+        self.blocks = (1, 1, 1)
+        self.periodic = (1, 1, 1)
+        self.size = (self.cells[0] * self.blocks[0],
+                     self.cells[1] * self.blocks[1],
+                     self.cells[2] * self.blocks[2])
+
+        self.timeStepStrategy = timeStepStrategy
+        self.overlappingWidth = overlappingWidth
+        self.cuda_block_size = (-1, -1, -1)
+        self.warmupSteps = 10
+
+        # bubble parameters
+        self.bubbleRadius = min(self.size) // 4
+        self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2)
+
+        self.scenario = 1  # 1 rising bubble, 2 RTI
+        self.config_dict = self.config()
+
+        self.csv_file = "benchmark_cpu.csv"
+
+    @wlb.member_callback
+    def config(self):
+        return {
+            'DomainSetup': {
+                'blocks': self.blocks,
+                'cellsPerBlock': self.cells,
+                'periodic': self.periodic,
+            },
+            'Parameters': {
+                'timesteps': self.timesteps,
+                'vtkWriteFrequency': self.vtkWriteFrequency,
+                'useGui': 0,
+                'remainingTimeLoggerFrequency': 10.0,
+                'timeStepStrategy': self.timeStepStrategy,
+                'overlappingWidth': self.overlappingWidth,
+                'gpuBlockSize': self.cuda_block_size,
+                'warmupSteps': self.warmupSteps,
+                'scenario': self.scenario,
+            },
+            'Boundaries_GPU': {
+                'Border': []
+            },
+            'Boundaries_CPU': {
+                'Border': []
+            },
+            'Bubble': {
+                'bubbleMidPoint': self.bubbleMidPoint,
+                'bubbleRadius': self.bubbleRadius,
+            },
+        }
+
+    @wlb.member_callback
+    def results_callback(self, **kwargs):
+        data = {}
+        data.update(self.config_dict['Parameters'])
+        data.update(self.config_dict['DomainSetup'])
+        data.update(kwargs)
+        data['executable'] = sys.argv[0]
+        data['compile_flags'] = wlb.build_info.compiler_flags
+        data['walberla_version'] = wlb.build_info.version
+        data['build_machine'] = wlb.build_info.build_machine
+        sequenceValuesToScalars(data)
+
+        df = pd.DataFrame.from_records([data])
+        if not os.path.isfile(self.csv_file):
+            df.to_csv(self.csv_file, index=False)
+        else:
+            df.to_csv(self.csv_file, index=False, mode='a', header=False)
+
+
+def overlap_benchmark():
+    scenarios = wlb.ScenarioManager()
+    overlappingWidths = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1),
+                         (4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1),
+                         (4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32),
+                         (64, 32, 32), (64, 64, 32), (64, 64, 64)]
+    # no overlap
+    scenarios.add(Scenario(timeStepStrategy='normal', overlappingWidth=(1, 1, 1)))
+
+    # overlap
+    for overlap_strategy in ['overlap']:
+        for overlappingWidth in overlappingWidths:
+            scenario = Scenario(timeStepStrategy=overlap_strategy,
+                                overlappingWidth=overlappingWidth)
+            scenarios.add(scenario)
+
+
+def kernel_benchmark():
+    scenarios = wlb.ScenarioManager()
+
+    # overlap
+    scenario = Scenario(timeStepStrategy='overlap',
+                        overlappingWidth=(8, 8, 8))
+    scenarios.add(scenario)
+
+
+# overlap_benchmark()
+kernel_benchmark()
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp b/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..944ca0a0b2febc784c95128cedb4ba2386c20193
--- /dev/null
+++ b/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp
@@ -0,0 +1,416 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file benchmark_multiphase.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "blockforest/Initialization.h"
+#include "blockforest/communication/UniformDirectScheme.h"
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/math/Constants.h"
+#include "core/timing/RemainingTimeLogger.h"
+
+#include "field/AddToStorage.h"
+#include "field/FlagField.h"
+#include "field/communication/PackInfo.h"
+#include "field/python/Exports.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "geometry/InitBoundaryHandling.h"
+
+#include "python_coupling/CreateConfig.h"
+#include "python_coupling/DictWrapper.h"
+#include "python_coupling/PythonCallback.h"
+
+#include "timeloop/SweepTimeloop.h"
+
+#include <blockforest/communication/UniformBufferedScheme.h>
+
+#include "InitializerFunctions.h"
+
+//////////////////////////////
+// INCLUDE GENERATED FILES //
+////////////////////////////
+
+#if defined(WALBERLA_BUILD_WITH_CUDA)
+#   include "cuda/AddGPUFieldToStorage.h"
+#   include "cuda/DeviceSelectMPI.h"
+#   include "cuda/HostFieldAllocator.h"
+#   include "cuda/NVTX.h"
+#   include "cuda/ParallelStreams.h"
+#   include "cuda/communication/GPUPackInfo.h"
+#   include "cuda/communication/MemcpyPackInfo.h"
+#   include "cuda/communication/UniformGPUScheme.h"
+
+#   include "GenDefines.h"
+#   include "PackInfo_phase_field.h"
+#   include "PackInfo_phase_field_distributions.h"
+#   include "PackInfo_velocity_based_distributions.h"
+#   include "hydro_LB_step.h"
+#   include "initialize_phase_field_distributions.h"
+#   include "initialize_velocity_based_distributions.h"
+#   include "phase_field_LB_step.h"
+
+#else
+#   include "GenDefines.h"
+#   include "PackInfo_phase_field.h"
+#   include "PackInfo_phase_field_distributions.h"
+#   include "PackInfo_velocity_based_distributions.h"
+#   include "hydro_LB_step.h"
+#   include "initialize_phase_field_distributions.h"
+#   include "initialize_velocity_based_distributions.h"
+#   include "phase_field_LB_step.h"
+#endif
+
+using namespace walberla;
+
+using PdfField_phase_T = GhostLayerField< real_t, Stencil_phase_T::Size >;
+using PdfField_hydro_T = GhostLayerField< real_t, Stencil_hydro_T::Size >;
+using VelocityField_T  = GhostLayerField< real_t, Stencil_hydro_T::Dimension >;
+using PhaseField_T     = GhostLayerField< real_t, 1 >;
+using flag_t           = walberla::uint8_t;
+using FlagField_T      = FlagField< flag_t >;
+
+#if defined(WALBERLA_BUILD_WITH_CUDA)
+typedef cuda::GPUField< double > GPUField;
+#endif
+// using CommScheme_T = cuda::communication::UniformGPUScheme<stencil::D2Q9>;
+
+int main(int argc, char** argv)
+{
+   mpi::Environment env(argc, argv);
+#if defined(WALBERLA_BUILD_WITH_CUDA)
+   cuda::selectDeviceBasedOnMpiRank();
+#endif
+
+   for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
+   {
+      WALBERLA_MPI_WORLD_BARRIER()
+
+      auto config = *cfg;
+      logging::configureLogging(config);
+      shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGridFromConfig(config);
+
+      Vector3< uint_t > cellsPerBlock =
+         config->getBlock("DomainSetup").getParameter< Vector3< uint_t > >("cellsPerBlock");
+      // Reading parameters
+      auto parameters                    = config->getOneBlock("Parameters");
+      const std::string timeStepStrategy = parameters.getParameter< std::string >("timeStepStrategy", "normal");
+      const uint_t timesteps             = parameters.getParameter< uint_t >("timesteps", uint_c(50));
+      const real_t remainingTimeLoggerFrequency =
+         parameters.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0);
+      const uint_t scenario = parameters.getParameter< uint_t >("scenario", uint_c(1));
+      Vector3< int > overlappingWidth =
+         parameters.getParameter< Vector3< int > >("overlappingWidth", Vector3< int >(1, 1, 1));
+      const uint_t warmupSteps = parameters.getParameter< uint_t >("warmupSteps", uint_t(2));
+
+#if defined(WALBERLA_BUILD_WITH_CUDA)
+      // CPU fields
+      BlockDataID vel_field   = field::addToStorage< VelocityField_T >(blocks, "vel", real_t(0), field::fzyx);
+      BlockDataID phase_field = field::addToStorage< PhaseField_T >(blocks, "phase", real_t(0), field::fzyx);
+      // GPU fields
+      BlockDataID lb_phase_field_gpu = cuda::addGPUFieldToStorage< cuda::GPUField< real_t > >(
+         blocks, "lb phase field on GPU", Stencil_phase_T::Size, field::fzyx, 1);
+      BlockDataID lb_velocity_field_gpu = cuda::addGPUFieldToStorage< cuda::GPUField< real_t > >(
+         blocks, "lb velocity field on GPU", Stencil_hydro_T::Size, field::fzyx, 1);
+      BlockDataID vel_field_gpu =
+         cuda::addGPUFieldToStorage< VelocityField_T >(blocks, vel_field, "velocity field on GPU", true);
+      BlockDataID phase_field_gpu =
+         cuda::addGPUFieldToStorage< PhaseField_T >(blocks, phase_field, "phase field on GPU", true);
+#else
+      BlockDataID lb_phase_field =
+         field::addToStorage< PdfField_phase_T >(blocks, "lb phase field", real_t(0), field::fzyx);
+      BlockDataID lb_velocity_field =
+         field::addToStorage< PdfField_hydro_T >(blocks, "lb velocity field", real_t(0), field::fzyx);
+      BlockDataID vel_field   = field::addToStorage< VelocityField_T >(blocks, "vel", real_t(0), field::fzyx);
+      BlockDataID phase_field = field::addToStorage< PhaseField_T >(blocks, "phase", real_t(0), field::fzyx);
+#endif
+
+      if (timeStepStrategy != "phase_only" && timeStepStrategy != "hydro_only" && timeStepStrategy != "kernel_only")
+      {
+         WALBERLA_LOG_INFO_ON_ROOT("initialization of the phase field")
+         if (scenario == 1)
+         {
+            auto bubbleParameters = config->getOneBlock("Bubble");
+            const Vector3< real_t > bubbleMidPoint =
+               bubbleParameters.getParameter< Vector3< real_t > >("bubbleMidPoint");
+            const real_t bubbleRadius = bubbleParameters.getParameter< real_t >("bubbleRadius", 20.0);
+            initPhaseField_bubble(blocks, phase_field, bubbleRadius, bubbleMidPoint);
+         }
+         else if (scenario == 2)
+         {
+            initPhaseField_RTI(blocks, phase_field);
+         }
+#if defined(WALBERLA_BUILD_WITH_CUDA)
+         cuda::fieldCpy< GPUField, PhaseField_T >(blocks, phase_field_gpu, phase_field);
+#endif
+         WALBERLA_LOG_INFO_ON_ROOT("initialization of the phase field done")
+      }
+
+#if defined(WALBERLA_BUILD_WITH_CUDA)
+      Vector3< int32_t > gpuBlockSize =
+         parameters.getParameter< Vector3< int32_t > >("gpuBlockSize", Vector3< int32_t >(256, 1, 1));
+      pystencils::initialize_phase_field_distributions init_h(lb_phase_field_gpu, phase_field_gpu, vel_field_gpu);
+      pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field_gpu, vel_field_gpu);
+
+      pystencils::phase_field_LB_step phase_field_LB_step(
+         lb_phase_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0], gpuBlockSize[1],
+         Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2]));
+      pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0],
+                                              gpuBlockSize[1],
+                                              Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2]));
+#else
+      pystencils::initialize_phase_field_distributions init_h(lb_phase_field, phase_field, vel_field);
+      pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field, vel_field);
+      pystencils::phase_field_LB_step phase_field_LB_step(
+         lb_phase_field, phase_field, vel_field, Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2]));
+      pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field, phase_field, vel_field,
+                                              Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2]));
+#endif
+
+// add communication
+#if defined(WALBERLA_BUILD_WITH_CUDA)
+      auto Comm_velocity_based_distributions =
+         make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 0);
+      auto generatedPackInfo_velocity_based_distributions =
+         make_shared< pystencils::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu);
+      Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_velocity_based_distributions);
+      auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field_gpu);
+      Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_phase_field);
+
+      auto Comm_phase_field_distributions =
+         make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 0);
+      auto generatedPackInfo_phase_field_distributions =
+         make_shared< pystencils::PackInfo_phase_field_distributions >(lb_phase_field_gpu);
+      Comm_phase_field_distributions->addPackInfo(generatedPackInfo_phase_field_distributions);
+#else
+
+
+      blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_velocity_based_distributions(blocks);
+
+      auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field);
+      auto generatedPackInfo_velocity_based_distributions =
+         make_shared< pystencils::PackInfo_velocity_based_distributions >(lb_velocity_field);
+
+      Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_phase_field);
+      Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_velocity_based_distributions);
+
+      blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_phase_field_distributions(blocks);
+      auto generatedPackInfo_phase_field_distributions =
+         make_shared< pystencils::PackInfo_phase_field_distributions >(lb_phase_field);
+      Comm_phase_field_distributions.addPackInfo(generatedPackInfo_phase_field_distributions);
+#endif
+
+      BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
+      // Boundaries
+      const FlagUID fluidFlagUID("Fluid");
+      auto boundariesConfig = config->getBlock("Boundaries_GPU");
+      if (boundariesConfig)
+      {
+         geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldID, boundariesConfig);
+         geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID);
+      }
+
+      // initialize the two lattice Boltzmann fields
+      if (timeStepStrategy != "phase_only" && timeStepStrategy != "hydro_only" && timeStepStrategy != "kernel_only")
+      {
+         WALBERLA_LOG_INFO_ON_ROOT("initialization of the distributions")
+         for (auto& block : *blocks)
+         {
+            init_h(&block);
+            init_g(&block);
+         }
+         WALBERLA_LOG_INFO_ON_ROOT("initialization of the distributions done")
+      }
+
+#if defined(WALBERLA_BUILD_WITH_CUDA)
+      int streamLowPriority  = 0;
+      int streamHighPriority = 0;
+      auto defaultStream     = cuda::StreamRAII::newPriorityStream(streamLowPriority);
+      auto innerOuterStreams = cuda::ParallelStreams(streamHighPriority);
+#endif
+
+      auto timeLoop = make_shared< SweepTimeloop >(blocks->getBlockStorage(), timesteps);
+#if defined(WALBERLA_BUILD_WITH_CUDA)
+      auto normalTimeStep = [&]() {
+         for (auto& block : *blocks)
+         {
+            Comm_phase_field_distributions->communicate(nullptr);
+            phase_field_LB_step(&block);
+
+            Comm_velocity_based_distributions->communicate(nullptr);
+            hydro_LB_step(&block);
+         }
+      };
+      auto simpleOverlapTimeStep = [&]() {
+         Comm_phase_field_distributions->startCommunication(defaultStream);
+         for (auto& block : *blocks)
+            phase_field_LB_step.inner(&block, defaultStream);
+         Comm_phase_field_distributions->wait(defaultStream);
+         for (auto& block : *blocks)
+            phase_field_LB_step.outer(&block, defaultStream);
+
+         Comm_velocity_based_distributions->startCommunication(defaultStream);
+         for (auto& block : *blocks)
+            hydro_LB_step.inner(&block, defaultStream);
+         Comm_velocity_based_distributions->wait(defaultStream);
+         for (auto& block : *blocks)
+            hydro_LB_step.outer(&block, defaultStream);
+      };
+      auto phase_only = [&]() {
+         for (auto& block : *blocks)
+            phase_field_LB_step(&block);
+      };
+      auto hydro_only = [&]() {
+         for (auto& block : *blocks)
+            hydro_LB_step(&block);
+      };
+      auto without_comm = [&]() {
+         for (auto& block : *blocks)
+            phase_field_LB_step(&block);
+         for (auto& block : *blocks)
+            hydro_LB_step(&block);
+      };
+#else
+      auto normalTimeStep = [&]() {
+         for (auto& block : *blocks)
+         {
+            Comm_phase_field_distributions();
+            phase_field_LB_step(&block);
+
+            Comm_velocity_based_distributions();
+            hydro_LB_step(&block);
+         }
+      };
+      auto simpleOverlapTimeStep = [&]() {
+        Comm_phase_field_distributions.startCommunication();
+        for (auto& block : *blocks)
+           phase_field_LB_step.inner(&block);
+        Comm_phase_field_distributions.wait();
+        for (auto& block : *blocks)
+           phase_field_LB_step.outer(&block);
+
+        Comm_velocity_based_distributions.startCommunication();
+        for (auto& block : *blocks)
+           hydro_LB_step.inner(&block);
+        Comm_velocity_based_distributions.wait();
+        for (auto& block : *blocks)
+           hydro_LB_step.outer(&block);
+      };
+      auto phase_only = [&]() {
+         for (auto& block : *blocks)
+            phase_field_LB_step(&block);
+      };
+      auto hydro_only = [&]() {
+         for (auto& block : *blocks)
+            hydro_LB_step(&block);
+      };
+      auto without_comm = [&]() {
+         for (auto& block : *blocks)
+            phase_field_LB_step(&block);
+         for (auto& block : *blocks)
+            hydro_LB_step(&block);
+      };
+#endif
+      std::function< void() > timeStep;
+      if (timeStepStrategy == "overlap")
+      {
+         timeStep = std::function< void() >(simpleOverlapTimeStep);
+         WALBERLA_LOG_INFO_ON_ROOT("overlapping timestep")
+      }
+      else if (timeStepStrategy == "phase_only")
+      {
+         timeStep = std::function< void() >(phase_only);
+         WALBERLA_LOG_INFO_ON_ROOT("started only phasefield step without communication for benchmarking")
+      }
+      else if (timeStepStrategy == "hydro_only")
+      {
+         timeStep = std::function< void() >(hydro_only);
+         WALBERLA_LOG_INFO_ON_ROOT("started only hydro step without communication for benchmarking")
+      }
+      else if (timeStepStrategy == "kernel_only")
+      {
+         timeStep = std::function< void() >(without_comm);
+         WALBERLA_LOG_INFO_ON_ROOT("started complete phasefield model without communication for benchmarking")
+      }
+      else
+      {
+         timeStep = std::function< void() >(normalTimeStep);
+         WALBERLA_LOG_INFO_ON_ROOT("normal timestep with no overlapping")
+      }
+
+      timeLoop->add() << BeforeFunction(timeStep) << Sweep([](IBlock*) {}, "time step");
+
+      // remaining time logger
+      if (remainingTimeLoggerFrequency > 0)
+         timeLoop->addFuncAfterTimeStep(
+            timing::RemainingTimeLogger(timeLoop->getNrOfTimeSteps(), remainingTimeLoggerFrequency),
+            "remaining time logger");
+
+      uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
+      if (vtkWriteFrequency > 1)
+      {
+         auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
+                                                         "simulation_step", false, true, true, false, 0);
+#if defined(WALBERLA_BUILD_WITH_CUDA)
+         vtkOutput->addBeforeFunction([&]() {
+            cuda::fieldCpy< PhaseField_T, GPUField >(blocks, phase_field, phase_field_gpu);
+            // cuda::fieldCpy<VelocityField_T, GPUField>( blocks, vel_field, vel_field_gpu );
+         });
+#endif
+         auto phaseWriter = make_shared< field::VTKWriter< PhaseField_T > >(phase_field, "phase");
+         vtkOutput->addCellDataWriter(phaseWriter);
+
+         timeLoop->addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
+      }
+
+      for (uint_t i = 0; i < warmupSteps; ++i)
+         timeLoop->singleStep();
+
+      timeLoop->setCurrentTimeStepToZero();
+      WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps")
+      WcTimer simTimer;
+#if defined(WALBERLA_BUILD_WITH_CUDA)
+      cudaDeviceSynchronize();
+#endif
+      simTimer.start();
+      timeLoop->run();
+#if defined(WALBERLA_BUILD_WITH_CUDA)
+      cudaDeviceSynchronize();
+#endif
+      simTimer.end();
+      WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
+      auto time            = simTimer.last();
+      auto nrOfCells       = real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
+      auto mlupsPerProcess = nrOfCells * real_c(timesteps) / time * 1e-6;
+      WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process: " << mlupsPerProcess)
+      WALBERLA_LOG_RESULT_ON_ROOT("Time per time step: " << time / real_c(timesteps) << " s")
+      WALBERLA_ROOT_SECTION()
+      {
+         python_coupling::PythonCallback pythonCallbackResults("results_callback");
+         if (pythonCallbackResults.isCallable())
+         {
+            pythonCallbackResults.data().exposeValue("mlupsPerProcess", mlupsPerProcess);
+            // Call Python function to report results
+            pythonCallbackResults();
+         }
+      }
+   }
+
+   return EXIT_SUCCESS;
+}
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/benchmark_piz_daint.py b/apps/benchmarks/PhaseFieldAllenCahn/benchmark_piz_daint.py
new file mode 100755
index 0000000000000000000000000000000000000000..816d74b2787e1ca718ce7454be06b8f818011593
--- /dev/null
+++ b/apps/benchmarks/PhaseFieldAllenCahn/benchmark_piz_daint.py
@@ -0,0 +1,149 @@
+import os
+import waLBerla as wlb
+import csv
+
+from waLBerla.tools.sqlitedb import sequenceValuesToScalars
+
+import sys
+
+
+class Scenario:
+    def __init__(self, timeStepStrategy, overlappingWidth, cuda_block_size):
+        # output frequencies
+        self.vtkWriteFrequency = 0
+
+        # simulation parameters
+        self.timesteps = 201
+        edge_size = 100
+        self.cells = (edge_size, edge_size, edge_size)
+        self.blocks = (1, 1, 1)
+        self.periodic = (1, 1, 1)
+        self.size = (self.cells[0] * self.blocks[0],
+                     self.cells[1] * self.blocks[1],
+                     self.cells[2] * self.blocks[2])
+
+        self.timeStepStrategy = timeStepStrategy
+        self.overlappingWidth = overlappingWidth
+        self.cuda_block_size = cuda_block_size
+        self.warmupSteps = 20
+
+        # bubble parameters
+        self.bubbleRadius = min(self.size) // 4
+        self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2)
+
+        self.scenario = 1  # 1 rising bubble, 2 RTI
+        self.config_dict = self.config()
+
+        self.csv_file = "benchmark_piz_daint.csv"
+
+    @wlb.member_callback
+    def config(self):
+        return {
+            'DomainSetup': {
+                'blocks': self.blocks,
+                'cellsPerBlock': self.cells,
+                'periodic': self.periodic,
+            },
+            'Parameters': {
+                'timesteps': self.timesteps,
+                'vtkWriteFrequency': self.vtkWriteFrequency,
+                'useGui': 0,
+                'remainingTimeLoggerFrequency': -1,
+                'timeStepStrategy': self.timeStepStrategy,
+                'overlappingWidth': self.overlappingWidth,
+                'gpuBlockSize': self.cuda_block_size,
+                'warmupSteps': self.warmupSteps,
+                'scenario': self.scenario,
+            },
+            'Boundaries_GPU': {
+                'Border': []
+            },
+            'Boundaries_CPU': {
+                'Border': []
+            },
+            'Bubble': {
+                'bubbleMidPoint': self.bubbleMidPoint,
+                'bubbleRadius': self.bubbleRadius,
+            },
+        }
+
+    @wlb.member_callback
+    def results_callback(self, **kwargs):
+        data = {}
+        data.update(self.config_dict['Parameters'])
+        data.update(self.config_dict['DomainSetup'])
+        data.update(kwargs)
+        data['executable'] = sys.argv[0]
+        data['compile_flags'] = wlb.build_info.compiler_flags
+        data['walberla_version'] = wlb.build_info.version
+        data['build_machine'] = wlb.build_info.build_machine
+        sequenceValuesToScalars(data)
+
+        if not os.path.isfile(self.csv_file):
+            try:
+                with open(self.csv_file, 'w') as csvfile:
+                    writer = csv.DictWriter(csvfile, fieldnames=data)
+                    writer.writeheader()
+                    writer.writerow(data)
+            except IOError:
+                print("could not create csv file")
+        else:
+            try:
+                with open(self.csv_file, 'a') as csvfile:
+                    writer = csv.DictWriter(csvfile, fieldnames=data)
+                    writer.writerow(data)
+            except IOError:
+                print("could not write to csv file")
+
+
+def overlap_benchmark():
+    scenarios = wlb.ScenarioManager()
+    overlappingWidths = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1),
+                         (4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1),
+                         (4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32)]
+
+    cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1),
+                   (32, 2, 1), (64, 2, 1), (128, 2, 1),
+                   (32, 4, 1), (64, 4, 1),
+                   (32, 4, 2), (32, 8, 1), (16, 16, 1)]
+
+    # no overlap
+    scenarios.add(Scenario(timeStepStrategy='normal', overlappingWidth=(1, 1, 1), cuda_block_size=(16, 16, 1)))
+
+    # overlap
+    for overlap_strategy in ['overlap']:
+        for overlappingWidth in overlappingWidths:
+            for cuda_block in cuda_blocks:
+                scenario = Scenario(timeStepStrategy=overlap_strategy,
+                                    overlappingWidth=overlappingWidth,
+                                    cuda_block_size=cuda_block)
+                scenarios.add(scenario)
+
+
+def kernel_benchmark():
+    scenarios = wlb.ScenarioManager()
+
+    # overlap
+    # for overlap_strategy in ['phase_only', 'hydro_only', 'kernel_only']:
+    for overlap_strategy in ['overlap']:
+        scenario = Scenario(timeStepStrategy=overlap_strategy,
+                            overlappingWidth=(16, 16, 16),
+                            cuda_block_size=(128, 4, 1))
+        scenarios.add(scenario)
+
+
+def weak_scaling():
+    scenarios = wlb.ScenarioManager()
+
+    # overlap
+    # for overlap_strategy in ['phase_only', 'hydro_only', 'kernel_only']:
+    for overlap_strategy in ['overlap']:
+        scenario = Scenario(timeStepStrategy=overlap_strategy,
+                            overlappingWidth=(32, 32, 32),
+                            cuda_block_size=(128, 2, 1))
+        scenarios.add(scenario)
+
+
+# overlap_benchmark()
+# kernel_benchmark()
+weak_scaling()
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py
new file mode 100644
index 0000000000000000000000000000000000000000..d46056d8207c3892c98b1e8c11f5af2f8383a6ab
--- /dev/null
+++ b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py
@@ -0,0 +1,211 @@
+from pystencils import fields, TypedSymbol
+from pystencils.simp import sympy_cse
+from pystencils import AssignmentCollection
+
+from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule
+from lbmpy.stencils import get_stencil
+
+from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_from_kernel
+
+from lbmpy.phasefield_allen_cahn.kernel_equations import initializer_kernel_phase_field_lb, \
+    initializer_kernel_hydro_lb, interface_tracking_force, \
+    hydrodynamic_force, get_collision_assignments_hydro
+
+from lbmpy.phasefield_allen_cahn.force_model import MultiphaseForceModel
+
+import numpy as np
+
+stencil_phase = get_stencil("D3Q15", "walberla")
+stencil_hydro = get_stencil("D3Q27", "walberla")
+q_phase = len(stencil_phase)
+q_hydro = len(stencil_hydro)
+
+maxwellian_moments = False
+assert (len(stencil_phase[0]) == len(stencil_hydro[0]))
+dimensions = len(stencil_hydro[0])
+
+########################
+# PARAMETER DEFINITION #
+########################
+
+density_liquid = 1.0
+density_gas = 0.001
+surface_tension = 0.0001
+M = 0.02
+
+# phase-field parameter
+drho3 = (density_liquid - density_gas) / 3
+# interface thickness
+W = 5
+# coefficient related to surface tension
+beta = 12.0 * (surface_tension / W)
+# coefficient related to surface tension
+kappa = 1.5 * surface_tension * W
+# relaxation rate allen cahn (h)
+w_c = 1.0 / (0.5 + (3.0 * M))
+
+########################
+# FIELDS #
+########################
+
+# velocity field
+u = fields(f"vel_field({dimensions}): [{dimensions}D]", layout='fzyx')
+# phase-field
+C = fields(f"phase_field: [{dimensions}D]", layout='fzyx')
+
+# phase-field distribution functions
+h = fields(f"lb_phase_field({q_phase}): [{dimensions}D]", layout='fzyx')
+h_tmp = fields(f"lb_phase_field_tmp({q_phase}): [{dimensions}D]", layout='fzyx')
+# hydrodynamic distribution functions
+g = fields(f"lb_velocity_field({q_hydro}): [{dimensions}D]", layout='fzyx')
+g_tmp = fields(f"lb_velocity_field_tmp({q_hydro}): [{dimensions}D]", layout='fzyx')
+
+########################################
+# RELAXATION RATES AND EXTERNAL FORCES #
+########################################
+
+# calculate the relaxation rate for the hydro lb as well as the body force
+density = density_gas + C.center * (density_liquid - density_gas)
+
+# force acting on all phases of the model
+body_force = np.array([0, 1e-6, 0])
+
+relaxation_time = 0.03 + 0.5
+relaxation_rate = 1.0 / relaxation_time
+
+###############
+# LBM METHODS #
+###############
+
+method_phase = create_lb_method(stencil=stencil_phase, method='srt', relaxation_rate=w_c, compressible=True)
+
+method_hydro = create_lb_method(stencil=stencil_hydro, method="mrt", weighted=True,
+                                relaxation_rates=[relaxation_rate, 1, 1, 1, 1, 1],
+                                maxwellian_moments=maxwellian_moments)
+
+# create the kernels for the initialization of the g and h field
+h_updates = initializer_kernel_phase_field_lb(h, C, u, method_phase, W)
+g_updates = initializer_kernel_hydro_lb(g, u, method_hydro)
+
+
+force_h = [f / 3 for f in interface_tracking_force(C, stencil_phase, W)]
+force_model_h = MultiphaseForceModel(force=force_h)
+
+force_g = hydrodynamic_force(g, C, method_hydro, relaxation_time, density_liquid, density_gas, kappa, beta, body_force)
+
+h_tmp_symbol_list = [h_tmp.center(i) for i, _ in enumerate(stencil_phase)]
+sum_h = np.sum(h_tmp_symbol_list[:])
+
+####################
+# LBM UPDATE RULES #
+####################
+
+method_phase.set_force_model(force_model_h)
+
+phase_field_LB_step = create_lb_update_rule(lb_method=method_phase,
+                                            velocity_input=u,
+                                            compressible=True,
+                                            optimization={"symbolic_field": h,
+                                                          "symbolic_temporary_field": h_tmp},
+                                            kernel_type='stream_pull_collide')
+
+
+phase_field_LB_step.set_main_assignments_from_dict({**phase_field_LB_step.main_assignments_dict, **{C.center: sum_h}})
+
+phase_field_LB_step = AssignmentCollection(main_assignments=phase_field_LB_step.main_assignments,
+                                           subexpressions=phase_field_LB_step.subexpressions)
+phase_field_LB_step = sympy_cse(phase_field_LB_step)
+
+# ---------------------------------------------------------------------------------------------------------
+
+hydro_LB_step = get_collision_assignments_hydro(lb_method=method_hydro,
+                                                density=density,
+                                                velocity_input=u,
+                                                force=force_g,
+                                                optimization={"symbolic_field": g,
+                                                              "symbolic_temporary_field": g_tmp},
+                                                kernel_type='collide_stream_push')
+
+# streaming of the hydrodynamic distribution
+stream_hydro = create_lb_update_rule(stencil=stencil_hydro,
+                                     optimization={"symbolic_field": g,
+                                                   "symbolic_temporary_field": g_tmp},
+                                     kernel_type='stream_pull_only')
+
+###################
+# GENERATE SWEEPS #
+###################
+
+cpu_vec = {'instruction_set': 'sse', 'assume_inner_stride_one': True, 'nontemporal': True}
+
+vp = [('int32_t', 'cudaBlockSize0'),
+      ('int32_t', 'cudaBlockSize1')]
+
+sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32),
+                    TypedSymbol("cudaBlockSize1", np.int32),
+                    1)
+sweep_params = {'block_size': sweep_block_size}
+
+info_header = f"""
+#include "stencil/D3Q{q_phase}.h"\nusing Stencil_phase_T = walberla::stencil::D3Q{q_phase}; 
+#include "stencil/D3Q{q_hydro}.h"\nusing Stencil_hydro_T = walberla::stencil::D3Q{q_hydro};
+"""
+
+with CodeGeneration() as ctx:
+    if ctx.cuda is False:
+        if not ctx.optimize_for_localhost:
+            cpu_vec = {'instruction_set': None}
+
+        generate_sweep(ctx, 'initialize_phase_field_distributions', h_updates)
+        generate_sweep(ctx, 'initialize_velocity_based_distributions', g_updates)
+
+        generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
+                       field_swaps=[(h, h_tmp)],
+                       inner_outer_split=True,
+                       cpu_vectorize_info=cpu_vec)
+
+        generate_sweep(ctx, 'hydro_LB_step', hydro_LB_step,
+                       field_swaps=[(g, g_tmp)],
+                       inner_outer_split=True,
+                       cpu_vectorize_info=cpu_vec)
+
+        # communication
+        generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field_distributions',
+                                       phase_field_LB_step.main_assignments, target='cpu')
+        generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field',
+                                       hydro_LB_step.all_assignments, target='cpu', kind='pull')
+        generate_pack_info_from_kernel(ctx, 'PackInfo_velocity_based_distributions',
+                                       hydro_LB_step.all_assignments, target='cpu', kind='push')
+
+        ctx.write_file("GenDefines.h", info_header)
+
+    if ctx.cuda:
+        generate_sweep(ctx, 'initialize_phase_field_distributions',
+                       h_updates, target='gpu')
+        generate_sweep(ctx, 'initialize_velocity_based_distributions',
+                       g_updates, target='gpu')
+
+        generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
+                       field_swaps=[(h, h_tmp)],
+                       inner_outer_split=True,
+                       target='gpu',
+                       gpu_indexing_params=sweep_params,
+                       varying_parameters=vp)
+
+        generate_sweep(ctx, 'hydro_LB_step', hydro_LB_step,
+                       field_swaps=[(g, g_tmp)],
+                       inner_outer_split=True,
+                       target='gpu',
+                       gpu_indexing_params=sweep_params,
+                       varying_parameters=vp)
+        # communication
+        generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field_distributions',
+                                       phase_field_LB_step.main_assignments, target='gpu')
+        generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field',
+                                       hydro_LB_step.all_assignments, target='gpu', kind='pull')
+        generate_pack_info_from_kernel(ctx, 'PackInfo_velocity_based_distributions',
+                                       hydro_LB_step.all_assignments, target='gpu', kind='push')
+
+        ctx.write_file("GenDefines.h", info_header)
+
+print("finished code generation successfully")
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/profiling.py b/apps/benchmarks/PhaseFieldAllenCahn/profiling.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d8c9375efdc6aea843b390a9c29d00d999e48a3
--- /dev/null
+++ b/apps/benchmarks/PhaseFieldAllenCahn/profiling.py
@@ -0,0 +1,66 @@
+import os
+import waLBerla as wlb
+
+
+class Scenario:
+    def __init__(self):
+        # output frequencies
+        self.vtkWriteFrequency = 100
+
+        # simulation parameters
+        self.timesteps = 3
+        edge_size = 200
+        self.cells = (edge_size, edge_size, edge_size)
+        self.blocks = (1, 1, 1)
+        self.periodic = (1, 1, 1)
+        self.size = (self.cells[0] * self.blocks[0],
+                     self.cells[1] * self.blocks[1],
+                     self.cells[2] * self.blocks[2])
+
+        self.timeStepStrategy = 'phase_only'
+        self.overlappingWidth = (1, 1, 1)
+        self.cuda_block_size = (128, 1, 1)
+
+        # bubble parameters
+        self.bubbleRadius = min(self.size) // 4
+        self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2)
+
+        self.scenario = 1  # 1 rising bubble, 2 RTI
+        self.config_dict = self.config()
+
+        self.csv_file = "benchmark.csv"
+
+    @wlb.member_callback
+    def config(self):
+        return {
+            'DomainSetup': {
+                'blocks': self.blocks,
+                'cellsPerBlock': self.cells,
+                'periodic': self.periodic,
+            },
+            'Parameters': {
+                'timesteps': self.timesteps,
+                'vtkWriteFrequency': self.vtkWriteFrequency,
+                'useGui': 0,
+                'remainingTimeLoggerFrequency': -1,
+                'timeStepStrategy': self.timeStepStrategy,
+                'overlappingWidth': self.overlappingWidth,
+                'gpuBlockSize': self.cuda_block_size,
+                'warmupSteps': 0,
+                'scenario': self.scenario,
+            },
+            'Boundaries_GPU': {
+                'Border': []
+            },
+            'Boundaries_CPU': {
+                'Border': []
+            },
+            'Bubble': {
+                'bubbleMidPoint': self.bubbleMidPoint,
+                'bubbleRadius': self.bubbleRadius,
+            },
+        }
+
+
+scenarios = wlb.ScenarioManager()
+scenarios.add(Scenario())
diff --git a/apps/showcases/CMakeLists.txt b/apps/showcases/CMakeLists.txt
index 266582c2283dca2e7d8dd44973a1e6517027fb46..cda00720ff23502b5664f43fe43c140be2d026ee 100644
--- a/apps/showcases/CMakeLists.txt
+++ b/apps/showcases/CMakeLists.txt
@@ -2,3 +2,6 @@ add_subdirectory( BidisperseFluidizedBed )
 add_subdirectory( CombinedResolvedUnresolved )
 add_subdirectory( HeatConduction )
 add_subdirectory( Mixer )
+if ( WALBERLA_BUILD_WITH_CODEGEN)
+add_subdirectory( PhaseFieldAllenCahn )
+endif()
diff --git a/apps/showcases/PhaseFieldAllenCahn/CMakeLists.txt b/apps/showcases/PhaseFieldAllenCahn/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed6d22966d3b63e776d5a1a02642cf1f4cf7bd07
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/CMakeLists.txt
@@ -0,0 +1,7 @@
+if ( NOT WALBERLA_BUILD_WITH_CUDA)
+add_subdirectory( CPU )
+endif()
+
+if ( WALBERLA_BUILD_WITH_CUDA)
+add_subdirectory( GPU )
+endif()
diff --git a/apps/showcases/PhaseFieldAllenCahn/CPU/CMakeLists.txt b/apps/showcases/PhaseFieldAllenCahn/CPU/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b155b2f5b06877094acf351dfe8cad8fc6d8eac0
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/CPU/CMakeLists.txt
@@ -0,0 +1,21 @@
+waLBerla_link_files_to_builddir(*.prm)
+waLBerla_link_files_to_builddir(*.py)
+waLBerla_link_files_to_builddir(*.obj)
+
+waLBerla_generate_target_from_python(NAME PhaseFieldCodeGenCPU
+        FILE multiphase_codegen.py
+        OUT_FILES initialize_phase_field_distributions.cpp initialize_phase_field_distributions.h
+        initialize_velocity_based_distributions.cpp initialize_velocity_based_distributions.h
+        phase_field_LB_step.cpp phase_field_LB_step.h
+        phase_field_LB_NoSlip.cpp phase_field_LB_NoSlip.h
+        hydro_LB_step.cpp hydro_LB_step.h
+        hydro_LB_NoSlip.cpp hydro_LB_NoSlip.h
+        stream_hydro.cpp stream_hydro.h
+        GenDefines.h)
+
+waLBerla_add_executable(NAME multiphase
+        FILES multiphase.cpp PythonExports.cpp InitializerFunctions.cpp contact.cpp CalculateNormals.cpp multiphase_codegen.py
+        DEPENDS blockforest core field postprocessing lbm geometry timeloop gui PhaseFieldCodeGenCPU)
+
+
+
diff --git a/apps/showcases/PhaseFieldAllenCahn/CPU/CalculateNormals.cpp b/apps/showcases/PhaseFieldAllenCahn/CPU/CalculateNormals.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e20053e38130262fdf2bb8c556f1fd665888a945
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/CPU/CalculateNormals.cpp
@@ -0,0 +1,83 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file CalculateNormals.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "CalculateNormals.h"
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+
+#include "field/FlagField.h"
+
+namespace walberla
+{
+using FlagField_T    = FlagField< uint8_t >;
+using NormalsField_T = GhostLayerField< int8_t, 3 >;
+
+void calculate_normals(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID normalsFieldID,
+                       ConstBlockDataID flagFieldID, FlagUID domainFlagUID, FlagUID boundaryFlagUID)
+{
+   for (auto& block : *blocks)
+   {
+      CellInterval globalCellBB = blocks->getBlockCellBB(block);
+      CellInterval blockLocalCellBB;
+      blocks->transformGlobalToBlockLocalCellInterval(blockLocalCellBB, block, globalCellBB);
+
+      auto* normalsField = block.getData< NormalsField_T >(normalsFieldID);
+      auto* flagField    = block.getData< FlagField_T >(flagFieldID);
+      auto boundaryFlag  = flagField->getFlag(boundaryFlagUID);
+      auto domainFlag    = flagField->getFlag(domainFlagUID);
+
+      // clang-format off
+      WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(normalsField,
+
+         if( x < blockLocalCellBB.xMax() ){
+            if(flagField->get(x, y, z) == boundaryFlag && flagField->get(x + 1, y, z) == domainFlag)
+               normalsField->get(x, y, z, 0) = 1;
+         }
+
+         if( x > blockLocalCellBB.xMin() ){
+            if(flagField->get(x, y, z) == boundaryFlag && flagField->get(x - 1, y, z) == domainFlag)
+               normalsField->get(x, y, z, 0) = - 1;
+         }
+
+         if( y < blockLocalCellBB.yMax() ){
+            if(flagField->get(x, y, z) == boundaryFlag && flagField->get(x, y + 1, z) == domainFlag)
+               normalsField->get(x, y, z, 1) = 1;
+         }
+
+         if( y > blockLocalCellBB.yMin() ){
+            if(flagField->get(x, y, z) == boundaryFlag && flagField->get(x, y - 1, z) == domainFlag)
+               normalsField->get(x, y, z, 1) = - 1;
+         }
+
+         if( z < blockLocalCellBB.zMax() ){
+            if(flagField->get(x, y, z) == boundaryFlag && flagField->get(x, y, z + 1) == domainFlag)
+               normalsField->get(x, y, z, 2) = 1;
+         }
+
+         if( z > blockLocalCellBB.zMin() ){
+            if(flagField->get(x, y, z) == boundaryFlag && flagField->get(x, y, z - 1) == domainFlag)
+               normalsField->get(x, y, z, 2) = - 1;
+         }
+
+      )
+      // clang-format on
+   }
+}
+} // namespace walberla
diff --git a/apps/showcases/PhaseFieldAllenCahn/CPU/CalculateNormals.h b/apps/showcases/PhaseFieldAllenCahn/CPU/CalculateNormals.h
new file mode 100644
index 0000000000000000000000000000000000000000..901db8544732cea2953e651fc10c78943326bf56
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/CPU/CalculateNormals.h
@@ -0,0 +1,32 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file CalculateNormals.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/StructuredBlockForest.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+
+#include "field/FlagField.h"
+
+namespace walberla
+{
+void calculate_normals(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID normalsFieldID,
+                       ConstBlockDataID flagFieldID, FlagUID fluidFlagUID, FlagUID boundaryFlagUID);
+}
\ No newline at end of file
diff --git a/apps/showcases/PhaseFieldAllenCahn/CPU/InitializerFunctions.cpp b/apps/showcases/PhaseFieldAllenCahn/CPU/InitializerFunctions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0fd64cb73cefcfefb25a565fee9c80b4c127b155
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/CPU/InitializerFunctions.cpp
@@ -0,0 +1,76 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file InitializerFunctions.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "core/logging/Initialization.h"
+#include "core/math/Constants.h"
+#include "core/math/Random.h"
+
+#include "field/FlagField.h"
+#include "field/communication/PackInfo.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "python_coupling/DictWrapper.h"
+
+namespace walberla
+{
+using PhaseField_T = GhostLayerField< real_t, 1 >;
+using FlagField_T  = FlagField< uint8_t >;
+
+void initPhaseField_sphere(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID phaseFieldID,
+                           const real_t R                         = 10,
+                           const Vector3< real_t > bubbleMidPoint = Vector3< real_t >(0.0, 0.0, 0.0),
+                           const bool bubble = true, const real_t W = 5)
+{
+   for (auto& block : *blocks)
+   {
+      auto phaseField = block.getData< PhaseField_T >(phaseFieldID);
+      // clang-format off
+      WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(phaseField, Cell globalCell;
+         blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
+         real_t Ri = sqrt((globalCell[0] - bubbleMidPoint[0]) * (globalCell[0] - bubbleMidPoint[0]) +
+                          (globalCell[1] - bubbleMidPoint[1]) * (globalCell[1] - bubbleMidPoint[1]) +
+                          (globalCell[2] - bubbleMidPoint[2]) * (globalCell[2] - bubbleMidPoint[2]));
+         if (bubble) phaseField->get(x, y, z) = 0.5 + 0.5 * tanh(2.0 * (Ri - R) / W);
+         else phaseField->get(x, y, z)        = 0.5 - 0.5 * tanh(2.0 * (Ri - R) / W);
+      )
+      // clang-format on
+   }
+}
+
+void initPhaseField_RTI(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID phaseFieldID,
+                        const real_t W = 5)
+{
+   auto X              = blocks->getDomainCellBB().xMax();
+   auto halfY          = (blocks->getDomainCellBB().yMax()) / 2.0;
+   double perturbation = 0.05;
+
+   for (auto& block : *blocks)
+   {
+      auto phaseField = block.getData< PhaseField_T >(phaseFieldID);
+      // clang-format off
+      WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(phaseField, Cell globalCell;
+         blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
+         real_t tmp =
+         perturbation * X * (cos((2.0 * math::pi * globalCell[0]) / X) + cos((2.0 * math::pi * globalCell[2]) / X));
+         phaseField->get(x, y, z) = 0.5 + 0.5 * tanh(((globalCell[1] - halfY) - tmp) / (W / 2.0));
+      )
+      // clang-format on
+   }
+}
+} // namespace walberla
diff --git a/apps/showcases/PhaseFieldAllenCahn/CPU/InitializerFunctions.h b/apps/showcases/PhaseFieldAllenCahn/CPU/InitializerFunctions.h
new file mode 100644
index 0000000000000000000000000000000000000000..04bac53f2e75d17ddd393da0e247b63df05d0704
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/CPU/InitializerFunctions.h
@@ -0,0 +1,39 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file InitializerFunctions.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/math/Constants.h"
+
+#include "field/FlagField.h"
+#include "field/communication/PackInfo.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "python_coupling/DictWrapper.h"
+#pragma once
+
+namespace walberla
+{
+void initPhaseField_sphere(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID phaseFieldID, real_t R,
+                           Vector3< real_t > bubbleMidPoint, bool bubble = true, real_t W = 5);
+
+void initPhaseField_RTI(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID phaseFieldID, real_t W = 5);
+
+} // namespace walberla
diff --git a/apps/showcases/PhaseFieldAllenCahn/CPU/PythonExports.cpp b/apps/showcases/PhaseFieldAllenCahn/CPU/PythonExports.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..40bb8c52bf9a4b320b3f5300c0714ef8bda06e43
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/CPU/PythonExports.cpp
@@ -0,0 +1,95 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PythonExports.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "waLBerlaDefinitions.h"
+#ifdef WALBERLA_BUILD_WITH_PYTHON
+
+#include "python_coupling/Manager.h"
+
+#include "core/math/Constants.h"
+
+#include "blockforest/python/Exports.h"
+
+#include "field/communication/PackInfo.h"
+#include "field/python/Exports.h"
+
+#include "geometry/python/Exports.h"
+
+#include "postprocessing/python/Exports.h"
+
+#include "timeloop/python/Exports.h"
+
+
+namespace walberla {
+    using flag_t = uint8_t;
+    // clang-format off
+    void exportDataStructuresToPython() {
+
+        namespace bmpl = boost::mpl;
+
+        auto pythonManager = python_coupling::Manager::instance();
+
+        typedef bmpl::vector<
+                Field<walberla::real_t, 1>,
+                Field<walberla::real_t, 3>,
+                Field<walberla::real_t, 9>,
+                Field<walberla::real_t, 19>,
+                Field<walberla::real_t, 27>>
+                FieldTypes;
+
+        typedef bmpl::vector<stencil::D2Q9,
+                stencil::D3Q19,
+                stencil::D3Q27>
+                Stencils;
+
+        typedef bmpl::vector<
+                GhostLayerField<real_t,1>,
+                GhostLayerField<real_t,3>>
+                RealFieldTypes;
+
+        typedef bmpl::vector<
+                FlagField<flag_t>>
+                FlagFieldTypes;
+        // Field
+        pythonManager->addExporterFunction(field::exportModuleToPython<FieldTypes>);
+        pythonManager->addExporterFunction(field::exportGatherFunctions<FieldTypes>);
+        pythonManager->addBlockDataConversion<FieldTypes>();
+
+        // Blockforest
+        pythonManager->addExporterFunction(blockforest::exportModuleToPython<Stencils>);
+
+        // Timeloop
+        pythonManager->addExporterFunction(timeloop::exportModuleToPython);
+
+        // Postprocessing
+        pythonManager->addExporterFunction( postprocessing::exportModuleToPython<RealFieldTypes, FlagFieldTypes> );
+
+        // Geometry
+        pythonManager->addExporterFunction( geometry::exportModuleToPython );
+    }
+   // clang-format on
+}
+
+#else
+
+namespace walberla {
+   void exportDataStructuresToPython() {}
+} // namespace walberla
+
+#endif
\ No newline at end of file
diff --git a/apps/showcases/PhaseFieldAllenCahn/CPU/PythonExports.h b/apps/showcases/PhaseFieldAllenCahn/CPU/PythonExports.h
new file mode 100644
index 0000000000000000000000000000000000000000..df5ff959a99e37be3a48f84d65e123f716126f2e
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/CPU/PythonExports.h
@@ -0,0 +1,26 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PythonExports.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+namespace walberla
+{
+void exportDataStructuresToPython();
+} // namespace walberla
diff --git a/apps/showcases/PhaseFieldAllenCahn/CPU/contact.cpp b/apps/showcases/PhaseFieldAllenCahn/CPU/contact.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4e87ab28f4c6e5c011d2e78dda90efc08f0e6c94
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/CPU/contact.cpp
@@ -0,0 +1,116 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file contact.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "contact.h"
+
+#include "core/DataTypes.h"
+
+#include <cmath>
+
+#define FUNC_PREFIX
+
+namespace walberla
+{
+namespace lbm
+{
+#ifdef __GNUC__
+#   pragma GCC diagnostic push
+#endif
+
+namespace internal_boundary_contact
+{
+static FUNC_PREFIX void contact_angle_treatment(uint8_t* RESTRICT const _data_indexVector, double* RESTRICT _data_phase,
+                                                int64_t const _stride_phase_0, int64_t const _stride_phase_1,
+                                                int64_t const _stride_phase_2, int64_t indexVectorSize, double alpha)
+{
+   for (int ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
+   {
+      const int32_t x = *((int32_t*) (&_data_indexVector[24 * ctr_0]));
+      const int32_t y = *((int32_t*) (&_data_indexVector[24 * ctr_0 + 4]));
+      const int32_t z = *((int32_t*) (&_data_indexVector[24 * ctr_0 + 8]));
+
+      const int32_t nx = *((int32_t*) (&_data_indexVector[24 * ctr_0 + 12]));
+      const int32_t ny = *((int32_t*) (&_data_indexVector[24 * ctr_0 + 16]));
+      const int32_t nz = *((int32_t*) (&_data_indexVector[24 * ctr_0 + 20]));
+
+      const int32_t x1 = x + nx;
+      const int32_t y1 = y + ny;
+      const int32_t z1 = z + nz;
+
+      const double h = 0.5 * sqrt((float) (nx * nx + ny * ny + nz * nz));
+      const double a = cos(alpha);
+      const double W = 5;
+
+      double* RESTRICT _phase_wall     = _data_phase + _stride_phase_1 * y + _stride_phase_2 * z;
+      double* RESTRICT _phase_interior = _data_phase + _stride_phase_1 * y1 + _stride_phase_2 * z1;
+      if (h < 0.001) { _phase_wall[_stride_phase_0 * x] = 1.0; }
+      else if (a > 1e-8 || a < -1e-8)
+      {
+         const double var = -h * (4.0 / W) * a;
+         _phase_wall[_stride_phase_0 * x] =
+            (1 + var - sqrt((1 + var) * (1 + var) - 4 * var * _phase_interior[_stride_phase_0 * x1])) / (var + 1e-12) -
+            _phase_interior[_stride_phase_0 * x1];
+      }
+      else
+      {
+         _phase_wall[_stride_phase_0 * x] = _phase_interior[_stride_phase_0 * x1];
+      }
+   }
+}
+} // namespace internal_boundary_contact
+
+#ifdef __GNUC__
+#   pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#   pragma pop
+#endif
+
+void contact::run(IBlock* block, IndexVectors::Type type)
+{
+   auto* indexVectors      = block->getData< IndexVectors >(indexVectorID);
+   int64_t indexVectorSize = int64_c(indexVectors->indexVector(type).size());
+   if (indexVectorSize == 0) return;
+
+   auto pointer = indexVectors->pointerCpu(type);
+
+   auto* _data_indexVector = reinterpret_cast< uint8_t* >(pointer);
+
+   auto phaseField = block->getData< field::GhostLayerField< double, 1 > >(phaseFieldID);
+   auto& alpha     = this->alpha_;
+
+   WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(phaseField->nrOfGhostLayers()))
+   double* RESTRICT _data_phase = phaseField->dataAt(0, 0, 0, 0);
+   const auto _stride_pdfs_0    = int64_t(phaseField->xStride());
+   const auto _stride_pdfs_1    = int64_t(phaseField->yStride());
+   const auto _stride_pdfs_2    = int64_t(phaseField->zStride());
+   internal_boundary_contact::contact_angle_treatment(_data_indexVector, _data_phase, _stride_pdfs_0, _stride_pdfs_1,
+                                                      _stride_pdfs_2, indexVectorSize, alpha);
+}
+
+void contact::operator()(IBlock* block) { run(block, IndexVectors::ALL); }
+
+void contact::inner(IBlock* block) { run(block, IndexVectors::INNER); }
+
+void contact::outer(IBlock* block) { run(block, IndexVectors::OUTER); }
+
+} // namespace lbm
+} // namespace walberla
diff --git a/apps/showcases/PhaseFieldAllenCahn/CPU/contact.h b/apps/showcases/PhaseFieldAllenCahn/CPU/contact.h
new file mode 100644
index 0000000000000000000000000000000000000000..c534195169519a8634fad6fd0dc00b836b1b0634
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/CPU/contact.h
@@ -0,0 +1,140 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file contact.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/StructuredBlockForest.h"
+
+#include "core/DataTypes.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+
+#include "field/FlagField.h"
+#include "field/GhostLayerField.h"
+
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#   define RESTRICT __restrict__
+#elif _MSC_VER
+#   define RESTRICT __restrict
+#else
+#   define RESTRICT
+#endif
+
+namespace walberla
+{
+namespace lbm
+{
+class contact
+{
+ public:
+   struct IndexInfo
+   {
+      int32_t x1;
+      int32_t y1;
+      int32_t z1;
+      int32_t x2;
+      int32_t y2;
+      int32_t z2;
+      IndexInfo(int32_t x1_, int32_t y1_, int32_t z1_, int32_t x2_, int32_t y2_, int32_t z2_)
+         : x1(x1_), y1(y1_), z1(z1_), x2(x2_), y2(y2_), z2(z2_)
+      {}
+      bool operator==(const IndexInfo& o) const
+      {
+         return x1 == o.x1 && y1 == o.y1 && z1 == o.z1 && x2 == o.x2 && y2 == o.y2 && z2 == o.z2;
+      }
+   };
+
+   class IndexVectors
+   {
+    public:
+      using CpuIndexVector = std::vector< IndexInfo >;
+
+      enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+      IndexVectors() : cpuVectors_(NUM_TYPES) {}
+      bool operator==(IndexVectors& other) { return other.cpuVectors_ == cpuVectors_; }
+
+      CpuIndexVector& indexVector(Type t) { return cpuVectors_[t]; }
+      IndexInfo* pointerCpu(Type t) { return &(cpuVectors_[t][0]); }
+
+    private:
+      std::vector< CpuIndexVector > cpuVectors_;
+   };
+
+   contact(const shared_ptr< StructuredBlockForest >& blocks, BlockDataID phaseFieldID_, double alpha)
+      : phaseFieldID(phaseFieldID_), alpha_(alpha)
+   {
+      auto createIdxVector = [](IBlock* const, StructuredBlockStorage* const) { return new IndexVectors(); };
+      indexVectorID = blocks->addStructuredBlockData< IndexVectors >(createIdxVector, "IndexField_contact_angle");
+   };
+
+   void operator()(IBlock* block);
+   void inner(IBlock* block);
+   void outer(IBlock* block);
+
+   template< typename NormalField_T >
+   void fillFromNormalField(const shared_ptr< StructuredBlockForest >& blocks, ConstBlockDataID normalFieldID)
+   {
+      for (auto& block : *blocks)
+      {
+         auto* indexVectors     = block.getData< IndexVectors >(indexVectorID);
+         auto& indexVectorAll   = indexVectors->indexVector(IndexVectors::ALL);
+         auto& indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+         auto& indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+         auto* normalField = block.getData< NormalField_T >(normalFieldID);
+
+         auto inner = normalField->xyzSize();
+         inner.expand(cell_idx_t(-1));
+
+         indexVectorAll.clear();
+         indexVectorInner.clear();
+         indexVectorOuter.clear();
+         // clang-format off
+         WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(normalField, Cell globalCell;
+            blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
+            if(normalField->get(x, y, z, 0) != 0 || normalField->get(x, y, z, 1) != 0 || normalField->get(x, y, z, 2) != 0)
+               {
+                  auto element = IndexInfo(x, y,  z, normalField->get(x, y, z, 0), normalField->get(x, y, z, 1), normalField->get(x, y, z, 2));
+                  indexVectorAll.push_back( element );
+                  if( inner.contains( x, y, z ) )
+                      indexVectorInner.push_back( element );
+                  else
+                     indexVectorOuter.push_back( element );
+               }
+         )
+         // clang-format on
+      }
+   }
+
+ private:
+   void run(IBlock* block, IndexVectors::Type type);
+
+   BlockDataID indexVectorID;
+
+ public:
+   BlockDataID phaseFieldID;
+   double alpha_;
+};
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/apps/showcases/PhaseFieldAllenCahn/CPU/droplet_contact_angle.py b/apps/showcases/PhaseFieldAllenCahn/CPU/droplet_contact_angle.py
new file mode 100755
index 0000000000000000000000000000000000000000..68229ece6fda45f0467a6277548ac23515054a8b
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/CPU/droplet_contact_angle.py
@@ -0,0 +1,78 @@
+import waLBerla as wlb
+
+
+class Scenario:
+    def __init__(self):
+        # output frequencies
+        self.vtkWriteFrequency = 1000
+
+        # simulation parameters
+        self.timesteps = 5001
+        self.cells = (32, 64, 32)
+        self.blocks = (4, 1, 4)
+        self.periodic = (0, 0, 0)
+        self.size = (self.cells[0] * self.blocks[0],
+                     self.cells[1] * self.blocks[1],
+                     self.cells[2] * self.blocks[2])
+
+        self.overlappingWidth = (8, 1, 1)
+        self.timeStepStrategy = 'normal'
+
+        self.contactAngle = 22
+
+        # bubble parameters
+        self.dropletRadius = 24.0
+        self.dropletMidPoint = (64, 24, 64)
+
+        # everything else
+        self.scenario = 1  # 1 rising bubble, 2 RTI
+
+        self.counter = 0
+        self.yPositions = []
+
+    @wlb.member_callback
+    def config(self, **kwargs):
+        return {
+            'DomainSetup': {
+                'blocks': self.blocks,
+                'cellsPerBlock': self.cells,
+                'periodic': self.periodic,
+            },
+            'Parameters': {
+                'timesteps': self.timesteps,
+                'vtkWriteFrequency': self.vtkWriteFrequency,
+                'timeStepStrategy': self.timeStepStrategy,
+                'overlappingWidth': self.overlappingWidth,
+                'remainingTimeLoggerFrequency': 10.0,
+                'scenario': self.scenario,
+                'contactAngle': self.contactAngle
+            },
+            'PhysicalParameters': {
+                'density_liquid': 1.0,
+                'density_gas': 0.001,
+                'surface_tension': 5e-5,
+                'mobility': 0.05,
+                'gravitational_acceleration': 0.0,
+                'relaxation_time_liquid': 3 * 0.166,
+                'relaxation_time_gas': 3 * 0.0166,
+            },
+            'Boundaries': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'W', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'E', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'T', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'B', 'walldistance': -1, 'flag': 'NoSlip'},
+                ],
+            },
+            'Bubble': {
+                'bubbleMidPoint': self.dropletMidPoint,
+                'bubbleRadius': self.dropletRadius,
+                'bubble': False
+            },
+        }
+
+
+scenarios = wlb.ScenarioManager()
+scenarios.add(Scenario())
diff --git a/apps/showcases/PhaseFieldAllenCahn/CPU/multiphase.cpp b/apps/showcases/PhaseFieldAllenCahn/CPU/multiphase.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cf1eff4d3d60b7d43f11d8070dcd7e00c4a5bca0
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/CPU/multiphase.cpp
@@ -0,0 +1,338 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file multiphase.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "blockforest/Initialization.h"
+#include "blockforest/communication/UniformBufferedScheme.h"
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/math/Constants.h"
+#include "core/timing/RemainingTimeLogger.h"
+
+#include "field/AddToStorage.h"
+#include "field/FlagField.h"
+#include "field/communication/PackInfo.h"
+#include "field/python/Exports.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "geometry/InitBoundaryHandling.h"
+
+#include "python_coupling/CreateConfig.h"
+#include "python_coupling/PythonCallback.h"
+
+#include "timeloop/SweepTimeloop.h"
+
+#include "CalculateNormals.h"
+#include "InitializerFunctions.h"
+#include "PythonExports.h"
+#include "contact.h"
+
+//////////////////////////////
+// INCLUDE GENERATED FILES //
+////////////////////////////
+
+#include "GenDefines.h"
+#include "hydro_LB_NoSlip.h"
+#include "hydro_LB_step.h"
+#include "initialize_phase_field_distributions.h"
+#include "initialize_velocity_based_distributions.h"
+#include "phase_field_LB_NoSlip.h"
+#include "phase_field_LB_step.h"
+#include "stream_hydro.h"
+
+////////////
+// USING //
+//////////
+
+using namespace walberla;
+
+using PdfField_phase_T = GhostLayerField< real_t, Stencil_phase_T::Size >;
+using PdfField_hydro_T = GhostLayerField< real_t, Stencil_hydro_T::Size >;
+using VelocityField_T  = GhostLayerField< real_t, Stencil_hydro_T::Dimension >;
+using NormalsField_T   = GhostLayerField< int8_t, Stencil_hydro_T::Dimension >;
+using PhaseField_T     = GhostLayerField< real_t, 1 >;
+using flag_t           = walberla::uint8_t;
+using FlagField_T      = FlagField< flag_t >;
+
+int main(int argc, char** argv)
+{
+   mpi::Environment Env(argc, argv);
+   exportDataStructuresToPython();
+
+   for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
+   {
+      WALBERLA_MPI_WORLD_BARRIER()
+
+      auto config = *cfg;
+      logging::configureLogging(config);
+      shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGridFromConfig(config);
+
+      ////////////////////////////
+      // ADD DOMAIN PARAMETERS //
+      //////////////////////////
+
+      auto domainSetup                = config->getOneBlock("DomainSetup");
+      Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
+
+      ////////////////////////////////////////
+      // ADD GENERAL SIMULATION PARAMETERS //
+      //////////////////////////////////////
+
+      auto parameters                    = config->getOneBlock("Parameters");
+      const std::string timeStepStrategy = parameters.getParameter< std::string >("timeStepStrategy", "normal");
+      const uint_t timesteps             = parameters.getParameter< uint_t >("timesteps", uint_c(50));
+      const real_t remainingTimeLoggerFrequency =
+         parameters.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0);
+      const uint_t scenario  = parameters.getParameter< uint_t >("scenario", uint_c(1));
+      const real_t alpha     = parameters.getParameter< real_t >("contactAngle", real_c(90));
+      const real_t alpha_rad = alpha * (math::pi / 180);
+      Vector3< int > overlappingWidth =
+         parameters.getParameter< Vector3< int > >("overlappingWidth", Vector3< int >(1, 1, 1));
+
+      /////////////////////////
+      // ADD DATA TO BLOCKS //
+      ///////////////////////
+
+      BlockDataID lb_phase_field =
+         field::addToStorage< PdfField_phase_T >(blocks, "lb phase field", real_t(0), field::fzyx);
+      BlockDataID lb_velocity_field =
+         field::addToStorage< PdfField_hydro_T >(blocks, "lb velocity field", real_t(0), field::fzyx);
+      BlockDataID vel_field   = field::addToStorage< VelocityField_T >(blocks, "vel", real_t(0), field::fzyx);
+      BlockDataID phase_field = field::addToStorage< PhaseField_T >(blocks, "phase", real_t(0), field::fzyx);
+
+      BlockDataID normals     = field::addToStorage< NormalsField_T >(blocks, "normals", int8_t(0), field::fzyx);
+      BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
+
+      WALBERLA_LOG_INFO_ON_ROOT("Initialisation of the phase-field")
+      if (scenario == 1)
+      {
+         auto bubbleParameters                  = config->getOneBlock("Bubble");
+         const Vector3< real_t > bubbleMidPoint = bubbleParameters.getParameter< Vector3< real_t > >("bubbleMidPoint");
+         const real_t bubbleRadius              = bubbleParameters.getParameter< real_t >("bubbleRadius", 20.0);
+         const bool bubble                      = bubbleParameters.getParameter< bool >("bubble", true);
+         initPhaseField_sphere(blocks, phase_field, bubbleRadius, bubbleMidPoint, bubble);
+      }
+      else if (scenario == 2)
+      {
+         initPhaseField_RTI(blocks, phase_field);
+      }
+      WALBERLA_LOG_INFO_ON_ROOT("Initialisation of the phase-field done")
+
+      /////////////////
+      // ADD SWEEPS //
+      ///////////////
+
+      auto physical_parameters     = config->getOneBlock("PhysicalParameters");
+      const real_t density_liquid  = physical_parameters.getParameter< real_t >("density_liquid", real_c(1.0));
+      const real_t density_gas     = physical_parameters.getParameter< real_t >("density_gas");
+      const real_t surface_tension = physical_parameters.getParameter< real_t >("surface_tension");
+      const real_t mobility        = physical_parameters.getParameter< real_t >("mobility");
+      const real_t gravitational_acceleration =
+         physical_parameters.getParameter< real_t >("gravitational_acceleration");
+      const real_t relaxation_time_liquid = physical_parameters.getParameter< real_t >("relaxation_time_liquid");
+      const real_t relaxation_time_gas    = physical_parameters.getParameter< real_t >("relaxation_time_gas");
+
+      pystencils::initialize_phase_field_distributions init_h(lb_phase_field, phase_field, vel_field);
+      pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field, vel_field);
+
+      pystencils::phase_field_LB_step phase_field_LB_step(
+         lb_phase_field, phase_field, vel_field, mobility,
+         Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2]));
+      pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field, phase_field, vel_field, gravitational_acceleration,
+                                              density_liquid, density_gas, surface_tension, relaxation_time_liquid,
+                                              relaxation_time_gas,
+                                              Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2]));
+      pystencils::stream_hydro stream_hydro(lb_velocity_field,
+                                            Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2]));
+
+      ////////////////////////
+      // ADD COMMUNICATION //
+      //////////////////////
+
+      blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_phase_field(blocks);
+      Comm_phase_field.addPackInfo(make_shared< field::communication::PackInfo< PhaseField_T > >(phase_field));
+
+      blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_velocity_based_distributions(blocks);
+      Comm_velocity_based_distributions.addPackInfo(
+         make_shared< field::communication::PackInfo< PdfField_hydro_T > >(lb_velocity_field));
+
+      blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_phase_field_distributions(blocks);
+      Comm_phase_field_distributions.addPackInfo(
+         make_shared< field::communication::PackInfo< PdfField_phase_T > >(lb_phase_field));
+
+      ////////////////////////
+      // BOUNDARY HANDLING //
+      //////////////////////
+
+      const FlagUID fluidFlagUID("Fluid");
+      const FlagUID wallFlagUID("NoSlip");
+
+      auto boundariesConfig = config->getBlock("Boundaries");
+      if (boundariesConfig)
+      {
+         geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldID, boundariesConfig);
+         geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID);
+      }
+
+      calculate_normals(blocks, normals, flagFieldID, fluidFlagUID, wallFlagUID);
+      lbm::phase_field_LB_NoSlip phase_field_LB_NoSlip(blocks, lb_phase_field);
+      lbm::hydro_LB_NoSlip hydro_LB_NoSlip(blocks, lb_velocity_field);
+      lbm::contact contact_angle(blocks, phase_field, alpha_rad);
+
+      phase_field_LB_NoSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldID, FlagUID("NoSlip"), fluidFlagUID);
+      hydro_LB_NoSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldID, FlagUID("NoSlip"), fluidFlagUID);
+      contact_angle.fillFromNormalField< NormalsField_T >(blocks, normals);
+
+      WALBERLA_LOG_INFO_ON_ROOT("Initialisation of the normals-field done")
+
+      ////////////////
+      // TIME LOOP //
+      //////////////
+
+      auto timeLoop       = make_shared< SweepTimeloop >(blocks->getBlockStorage(), timesteps);
+      auto normalTimeStep = [&]() {
+         for (auto& block : *blocks)
+         {
+            phase_field_LB_NoSlip(&block);
+            Comm_phase_field_distributions();
+
+            phase_field_LB_step(&block);
+
+            Comm_phase_field();
+            contact_angle(&block);
+
+            hydro_LB_step(&block);
+            hydro_LB_NoSlip(&block);
+            Comm_velocity_based_distributions();
+
+            stream_hydro(&block);
+         }
+      };
+      auto simpleOverlapTimeStep = [&]() {
+         for (auto& block : *blocks)
+            phase_field_LB_NoSlip(&block);
+
+         Comm_phase_field_distributions.startCommunication();
+         for (auto& block : *blocks)
+            phase_field_LB_step.inner(&block);
+         Comm_phase_field_distributions.wait();
+         for (auto& block : *blocks)
+            phase_field_LB_step.outer(&block);
+
+         Comm_phase_field.startCommunication();
+         for (auto& block : *blocks)
+            hydro_LB_step.inner(&block);
+         Comm_phase_field.wait();
+         for (auto& block : *blocks)
+            hydro_LB_step.outer(&block);
+
+         for (auto& block : *blocks)
+            hydro_LB_NoSlip(&block);
+
+         Comm_velocity_based_distributions.startCommunication();
+         for (auto& block : *blocks)
+            stream_hydro.inner(&block);
+         Comm_velocity_based_distributions.wait();
+         for (auto& block : *blocks)
+            stream_hydro.outer(&block);
+      };
+      std::function< void() > timeStep;
+      if (timeStepStrategy == "overlap")
+      {
+         timeStep = std::function< void() >(simpleOverlapTimeStep);
+         WALBERLA_LOG_INFO_ON_ROOT("overlapping timestep")
+      }
+      else
+      {
+         timeStep = std::function< void() >(normalTimeStep);
+         WALBERLA_LOG_INFO_ON_ROOT("normal timestep with no overlapping")
+      }
+
+      timeLoop->add() << BeforeFunction(timeStep) << Sweep([](IBlock*) {}, "time step");
+
+      if (scenario == 4)
+      {
+         python_coupling::PythonCallback smear_interface("interface_diffusion");
+         if (smear_interface.isCallable())
+         {
+            smear_interface.data().exposePtr("blocks", blocks);
+            smear_interface();
+         }
+      }
+
+      WALBERLA_LOG_INFO_ON_ROOT("Initialisation of the PDFs")
+      for (auto& block : *blocks)
+      {
+         init_h(&block);
+         init_g(&block);
+      }
+      WALBERLA_LOG_INFO_ON_ROOT("Initialisation of the PDFs done")
+      uint_t dbWriteFrequency = parameters.getParameter< uint_t >("dbWriteFrequency", 10000000);
+
+      timeLoop->addFuncAfterTimeStep(
+         [&]() {
+            if (timeLoop->getCurrentTimeStep() % dbWriteFrequency == 0)
+            {
+               python_coupling::PythonCallback callback("at_end_of_time_step");
+               if (callback.isCallable())
+               {
+                  callback.data().exposePtr("blocks", blocks);
+                  callback.data().exposePtr("time_loop", timeLoop);
+                  callback();
+               }
+            }
+         },
+         "Python callback");
+
+      // remaining time logger
+      timeLoop->addFuncAfterTimeStep(
+         timing::RemainingTimeLogger(timeLoop->getNrOfTimeSteps(), remainingTimeLoggerFrequency),
+         "remaining time logger");
+
+      uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
+      if (vtkWriteFrequency > 0)
+      {
+         auto vtkOutput   = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
+                                                         "simulation_step", false, true, true, false, 0);
+         auto phaseWriter = make_shared< field::VTKWriter< PhaseField_T > >(phase_field, "PhaseField");
+         vtkOutput->addCellDataWriter(phaseWriter);
+
+         // auto normlasWriter = make_shared<field::VTKWriter<NormalsField_T>>(normals, "Normals");
+         // vtkOutput->addCellDataWriter(normlasWriter);
+
+         // auto velWriter = make_shared<field::VTKWriter<VelocityField_T>>(vel_field, "Velocity");
+         // vtkOutput->addCellDataWriter(velWriter);
+
+         timeLoop->addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
+      }
+
+      WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps")
+      WcTimer simTimer;
+      simTimer.start();
+      timeLoop->run();
+      simTimer.end();
+      WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
+      auto time            = simTimer.last();
+      auto nrOfCells       = real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
+      auto mlupsPerProcess = nrOfCells * real_c(timesteps) / time * 1e-6;
+      WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process: " << mlupsPerProcess)
+      WALBERLA_LOG_RESULT_ON_ROOT("Time per time step: " << time / real_c(timesteps) << " s")
+   }
+   return EXIT_SUCCESS;
+}
diff --git a/apps/showcases/PhaseFieldAllenCahn/CPU/multiphase_RTI_3D.py b/apps/showcases/PhaseFieldAllenCahn/CPU/multiphase_RTI_3D.py
new file mode 100755
index 0000000000000000000000000000000000000000..1050b08e9d1c8d5825816828e03823667bc5a1f9
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/CPU/multiphase_RTI_3D.py
@@ -0,0 +1,134 @@
+import waLBerla as wlb
+import waLBerla.tools.sqlitedb as wlbSqlite
+from waLBerla.core_extension import makeSlice
+
+import numpy as np
+from lbmpy.phasefield_allen_cahn.parameter_calculation import calculate_parameters_rti
+
+
+class Scenario:
+    def __init__(self):
+        # output frequencies
+        self.vtkWriteFrequency = 1000
+        self.dbWriteFrequency = 200
+
+        # simulation parameters
+        self.timesteps = 6001
+
+        self.cells = (64, 32, 64)
+        self.blocks = (1, 8, 1)
+        self.periodic = (1, 0, 1)
+        self.size = (self.cells[0] * self.blocks[0],
+                     self.cells[1] * self.blocks[1],
+                     self.cells[2] * self.blocks[2])
+
+        # physical parameters
+        self.density_heavy = 1.0
+        self.reference_time = 6000
+        self.parameters = calculate_parameters_rti(reference_length=128,
+                                                   reference_time=self.reference_time,
+                                                   density_heavy=self.density_heavy,
+                                                   capillary_number=9.1,
+                                                   reynolds_number=128,
+                                                   atwood_number=1.0,
+                                                   peclet_number=140,
+                                                   density_ratio=3,
+                                                   viscosity_ratio=3)
+
+        # everything else
+        self.dbFile = "RTI.db"
+
+        self.scenario = 2  # 1 rising bubble, 2 RTI
+
+        self.counter = 0
+        self.yPositions = []
+
+    @wlb.member_callback
+    def config(self):
+        return {
+            'DomainSetup': {
+                'blocks': self.blocks,
+                'cellsPerBlock': self.cells,
+                'periodic': self.periodic,
+            },
+            'Parameters': {
+                'timesteps': self.timesteps,
+                'vtkWriteFrequency': self.vtkWriteFrequency,
+                'dbWriteFrequency': self.dbWriteFrequency,
+                'useGui': 0,
+                'remainingTimeLoggerFrequency': 10.0,
+                'scenario': self.scenario,
+            },
+            'PhysicalParameters': {
+                'density_liquid': self.density_heavy,
+                'density_gas': self.parameters["density_light"],
+                'surface_tension': self.parameters["surface_tension"],
+                'mobility': self.parameters.get("mobility", 0.1),
+                'gravitational_acceleration': self.parameters["gravitational_acceleration"],
+                'relaxation_time_liquid': self.parameters.get("relaxation_time_heavy"),
+                'relaxation_time_gas': self.parameters.get("relaxation_time_light"),
+            },
+            'Boundaries': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
+                ]
+            },
+        }
+
+    @wlb.member_callback
+    def at_end_of_time_step(self, blocks, time_loop):
+        t = time_loop.getCurrentTimeStep()
+        ny = self.size[1]
+        l0 = self.size[0]
+        if t % self.dbWriteFrequency == 0:
+            location_of_spike = -100
+            location_of_bubble = -100
+            location_of_saddle = -100
+            mass = -100
+            spike_data = wlb.field.gather(blocks, 'phase', makeSlice[self.size[0] // 2, :, self.size[2] // 2])
+            if spike_data:
+                spike_field = np.asarray(spike_data.buffer()).squeeze()
+                location_of_spike = (np.argmax(spike_field > 0.5) - ny//2)/l0
+
+            bubble_data = wlb.field.gather(blocks, 'phase', makeSlice[0, :, 0])
+            if bubble_data:
+                bubble_field = np.asarray(bubble_data.buffer()).squeeze()
+                location_of_bubble = (np.argmax(bubble_field > 0.5) - ny//2)/l0
+
+            saddle_data = wlb.field.gather(blocks, 'phase', makeSlice[0, :, self.size[2] // 2])
+            if saddle_data:
+                saddle_field = np.asarray(saddle_data.buffer()).squeeze()
+                location_of_saddle = (np.argmax(saddle_field > 0.5) - ny//2)/l0
+
+            phase = wlb.field.gather(blocks, 'phase', makeSlice[:, :, :])
+            if phase:
+                phase_field = np.asarray(phase.buffer()).squeeze()
+                mass = np.sum(phase_field)
+
+            self.write_result_to_database(t, location_of_spike, location_of_bubble, location_of_saddle, mass)
+
+    def write_result_to_database(self, t, spike, bubble, saddle, mass):
+        """Writes the simulation result stored in the global variables shapeFactors and angles to
+               an sqlite3 database, and resets the global variables."""
+        result = {'waLBerlaVersion': wlb.build_info.version,
+                  'xCells': self.size[0],
+                  'yCells': self.size[1],
+                  'zCells': self.size[2],
+                  'spike': spike,
+                  'bubble': bubble,
+                  'saddle': saddle,
+                  'mass': mass,
+                  'timesteps': t,
+                  'normalized_time': t / self.reference_time,
+                  'processes': wlb.mpi.numProcesses(),
+                  }
+        try:
+            wlbSqlite.checkAndUpdateSchema(result, 'interface_location', self.dbFile)
+            wlbSqlite.storeSingle(result, 'interface_location', self.dbFile)
+        except Exception as e:
+            wlb.log_warning("Failed to store run in database " + str(e) + "\n" + str(result))
+
+
+scenarios = wlb.ScenarioManager()
+scenarios.add(Scenario())
diff --git a/apps/showcases/PhaseFieldAllenCahn/CPU/multiphase_codegen.py b/apps/showcases/PhaseFieldAllenCahn/CPU/multiphase_codegen.py
new file mode 100644
index 0000000000000000000000000000000000000000..9010cdcf8adc60a5d2b9082e3f635a444ff8633b
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/CPU/multiphase_codegen.py
@@ -0,0 +1,189 @@
+from pystencils import fields, TypedSymbol
+from pystencils.simp import sympy_cse
+from pystencils import AssignmentCollection
+
+from lbmpy.boundaries import NoSlip
+from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule
+from lbmpy.stencils import get_stencil
+
+from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_from_kernel
+from lbmpy_walberla import generate_boundary
+
+from lbmpy.phasefield_allen_cahn.kernel_equations import initializer_kernel_phase_field_lb, \
+    initializer_kernel_hydro_lb, interface_tracking_force, \
+    hydrodynamic_force, get_collision_assignments_hydro
+
+from lbmpy.phasefield_allen_cahn.force_model import MultiphaseForceModel
+
+import numpy as np
+import sympy as sp
+
+stencil_phase = get_stencil("D3Q19")
+stencil_hydro = get_stencil("D3Q27")
+q_phase = len(stencil_phase)
+q_hydro = len(stencil_hydro)
+
+assert (len(stencil_phase[0]) == len(stencil_hydro[0]))
+dimensions = len(stencil_hydro[0])
+
+########################
+# PARAMETER DEFINITION #
+########################
+
+density_liquid = sp.Symbol("rho_H")
+density_gas = sp.Symbol("rho_L")
+
+surface_tension = sp.Symbol("sigma")
+mobility = sp.Symbol("mobility")
+
+gravitational_acceleration = sp.Symbol("gravity")
+
+relaxation_time_liquid = sp.Symbol("tau_H")
+relaxation_time_gas = sp.Symbol("tau_L")
+
+# phase-field parameter
+drho3 = (density_liquid - density_gas) / 3
+# interface thickness
+W = 5
+# coefficients related to surface tension
+beta = 12.0 * (surface_tension / W)
+kappa = 1.5 * surface_tension * W
+
+########################
+# FIELDS #
+########################
+
+# velocity field
+u = fields(f"vel_field({dimensions}): [{dimensions}D]", layout='fzyx')
+# phase-field
+C = fields(f"phase_field: [{dimensions}D]", layout='fzyx')
+
+flag = fields(f"flag_field: uint8[{dimensions}D]", layout='fzyx')
+# phase-field distribution functions
+h = fields(f"lb_phase_field({q_phase}): [{dimensions}D]", layout='fzyx')
+h_tmp = fields(f"lb_phase_field_tmp({q_phase}): [{dimensions}D]", layout='fzyx')
+# hydrodynamic distribution functions
+g = fields(f"lb_velocity_field({q_hydro}): [{dimensions}D]", layout='fzyx')
+g_tmp = fields(f"lb_velocity_field_tmp({q_hydro}): [{dimensions}D]", layout='fzyx')
+
+########################################
+# RELAXATION RATES AND EXTERNAL FORCES #
+########################################
+
+# relaxation rate for interface tracking LB step
+relaxation_rate_allen_cahn = 1.0 / (0.5 + (3.0 * mobility))
+# calculate the relaxation rate for hydrodynamic LB step
+density = density_gas + C.center * (density_liquid - density_gas)
+# force acting on all phases of the model
+body_force = np.array([0, gravitational_acceleration * density, 0])
+# calculation of the relaxation time via viscosities
+# viscosity = viscosity_gas * viscosity_liquid + C.center\
+#             * (density_liquid*viscosity_liquid - viscosity_liquid*viscosity_gas)
+# relaxation_time = 3 * viscosity / density + 0.5
+
+relaxation_time = 0.5 + relaxation_time_gas + C.center * (relaxation_time_liquid - relaxation_time_gas)
+# calculate the relaxation time if the phase-field has values over one
+# relaxation_rate = 1.0 / relaxation_time
+relaxation_rate = sp.Symbol("s8")
+relaxation_rate_cutoff = sp.Piecewise((1 / (0.5 + relaxation_time_liquid), C.center > 0.999),   # True value
+                                      (1 / relaxation_time, True))                              # Else value
+
+###############
+# LBM METHODS #
+###############
+
+method_phase = create_lb_method(stencil=stencil_phase, method='srt',
+                                relaxation_rate=relaxation_rate_allen_cahn, compressible=True)
+
+method_hydro = create_lb_method(stencil=stencil_hydro, method="mrt", weighted=True,
+                                relaxation_rates=[relaxation_rate, 1, 1, 1, 1, 1])
+
+# create the kernels for the initialization of the g and h field
+h_updates = initializer_kernel_phase_field_lb(h, C, u, method_phase, W, fd_stencil=get_stencil("D3Q27"))
+g_updates = initializer_kernel_hydro_lb(g, u, method_hydro)
+
+force_h = [f / 3 for f in interface_tracking_force(C, stencil_phase, W, fd_stencil=get_stencil("D3Q27"))]
+force_model_h = MultiphaseForceModel(force=force_h)
+
+force_g = hydrodynamic_force(g, C, method_hydro, relaxation_time, density_liquid, density_gas, kappa, beta, body_force,
+                             fd_stencil=get_stencil("D3Q27"))
+
+####################
+# LBM UPDATE RULES #
+####################
+
+h_tmp_symbol_list = [h_tmp.center(i) for i, _ in enumerate(stencil_phase)]
+sum_h = np.sum(h_tmp_symbol_list[:])
+
+method_phase.set_force_model(force_model_h)
+
+phase_field_LB_step = create_lb_update_rule(lb_method=method_phase,
+                                            velocity_input=u,
+                                            compressible=True,
+                                            optimization={"symbolic_field": h,
+                                                          "symbolic_temporary_field": h_tmp},
+                                            kernel_type='stream_pull_collide')
+
+phase_field_LB_step.set_main_assignments_from_dict({**phase_field_LB_step.main_assignments_dict, **{C.center: sum_h}})
+phase_field_LB_step = AssignmentCollection(main_assignments=phase_field_LB_step.main_assignments,
+                                           subexpressions=phase_field_LB_step.subexpressions)
+phase_field_LB_step = sympy_cse(phase_field_LB_step)
+
+# ---------------------------------------------------------------------------------------------------------
+hydro_LB_step = get_collision_assignments_hydro(lb_method=method_hydro,
+                                                density=density,
+                                                velocity_input=u,
+                                                force=force_g,
+                                                optimization={"symbolic_field": g,
+                                                              "symbolic_temporary_field": g_tmp},
+                                                kernel_type='collide_only')
+
+hydro_LB_step.set_sub_expressions_from_dict({**{relaxation_rate: relaxation_rate_cutoff},
+                                             **hydro_LB_step.subexpressions_dict})
+
+stream_hydro = create_lb_update_rule(stencil=stencil_hydro,
+                                     optimization={"symbolic_field": g,
+                                                   "symbolic_temporary_field": g_tmp},
+                                     kernel_type='stream_pull_only')
+
+###################
+# GENERATE SWEEPS #
+###################
+cpu_vec = {'instruction_set': 'sse', 'assume_inner_stride_one': True, 'nontemporal': True}
+
+vp = [('int32_t', 'cudaBlockSize0'),
+      ('int32_t', 'cudaBlockSize1')]
+
+info_header = f"""
+#include "stencil/D3Q{q_phase}.h"\nusing Stencil_phase_T = walberla::stencil::D3Q{q_phase}; 
+#include "stencil/D3Q{q_hydro}.h"\nusing Stencil_hydro_T = walberla::stencil::D3Q{q_hydro};
+"""
+
+with CodeGeneration() as ctx:
+    if not ctx.optimize_for_localhost:
+        cpu_vec = {'instruction_set': None}
+
+    generate_sweep(ctx, 'initialize_phase_field_distributions', h_updates)
+    generate_sweep(ctx, 'initialize_velocity_based_distributions', g_updates)
+
+    generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
+                   field_swaps=[(h, h_tmp)],
+                   inner_outer_split=True,
+                   cpu_vectorize_info=cpu_vec)
+    generate_boundary(ctx, 'phase_field_LB_NoSlip', NoSlip(), method_phase)
+
+    generate_sweep(ctx, 'hydro_LB_step', hydro_LB_step,
+                   inner_outer_split=True,
+                   cpu_vectorize_info=cpu_vec)
+    generate_boundary(ctx, 'hydro_LB_NoSlip', NoSlip(), method_hydro)
+
+    generate_sweep(ctx, 'stream_hydro', stream_hydro,
+                   field_swaps=[(g, g_tmp)],
+                   inner_outer_split=True,
+                   cpu_vectorize_info=cpu_vec)
+
+    ctx.write_file("GenDefines.h", info_header)
+
+    # TODO: generate communication (PackInfo)
+
+print("finished code generation successfully")
diff --git a/apps/showcases/PhaseFieldAllenCahn/CPU/multiphase_rising_bubble.py b/apps/showcases/PhaseFieldAllenCahn/CPU/multiphase_rising_bubble.py
new file mode 100755
index 0000000000000000000000000000000000000000..e1ca7bddab6ee2b0d148ebf784ad448bf7bcee14
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/CPU/multiphase_rising_bubble.py
@@ -0,0 +1,137 @@
+import waLBerla as wlb
+import waLBerla.tools.sqlitedb as wlbSqlite
+import numpy as np
+
+from waLBerla.core_extension import makeSlice
+from lbmpy.phasefield_allen_cahn.parameter_calculation import calculate_dimensionless_rising_bubble
+
+class Scenario:
+    def __init__(self):
+        # output frequencies
+        self.vtkWriteFrequency = 1000
+        self.dbWriteFrequency = 200
+
+        # simulation parameters
+        self.timesteps = 10000
+        self.cells = (64, 32, 64)
+        self.blocks = (1, 4, 1)
+        self.periodic = (0, 0, 0)
+        self.size = (self.cells[0] * self.blocks[0],
+                     self.cells[1] * self.blocks[1],
+                     self.cells[2] * self.blocks[2])
+
+        self.overlappingWidth = (8, 1, 1)
+        self.timeStepStrategy = 'normal'
+
+        # physical parameters
+        self.bubbleRadius = 16
+        self.bubbleMidPoint = (self.size[0] / 2, self.bubbleRadius + 10, self.size[2] / 2)
+
+        # physical parameters
+        self.density_heavy = 1.0
+        self.reference_time = 18000
+        self.parameters = calculate_dimensionless_rising_bubble(reference_time=self.reference_time,
+                                                                density_heavy=self.density_heavy,
+                                                                bubble_radius=self.bubbleRadius,
+                                                                bond_number=1,
+                                                                reynolds_number=40,
+                                                                density_ratio=1000,
+                                                                viscosity_ratio=100)
+
+        # everything else
+        self.dbFile = "risingBubble3D.db"
+
+        self.scenario = 1  # 1 rising bubble, 2 RTI
+
+        self.counter = 0
+        self.yPositions = []
+
+    @wlb.member_callback
+    def config(self):
+        return {
+            'DomainSetup': {
+                'blocks': self.blocks,
+                'cellsPerBlock': self.cells,
+                'periodic': self.periodic,
+            },
+            'Parameters': {
+                'timesteps': self.timesteps,
+                'vtkWriteFrequency': self.vtkWriteFrequency,
+                'dbWriteFrequency': self.dbWriteFrequency,
+                'timeStepStrategy': self.timeStepStrategy,
+                'overlappingWidth': self.overlappingWidth,
+                'remainingTimeLoggerFrequency': 10.0,
+                'scenario': self.scenario,
+            },
+            'PhysicalParameters': {
+                'density_liquid': self.density_heavy,
+                'density_gas': self.parameters["density_light"],
+                'surface_tension': self.parameters["surface_tension"],
+                'mobility': self.parameters.get("mobility", 0.1),
+                'gravitational_acceleration': self.parameters["gravitational_acceleration"],
+                'relaxation_time_liquid': self.parameters.get("relaxation_time_heavy"),
+                'relaxation_time_gas': self.parameters.get("relaxation_time_light"),
+            },
+            'Boundaries': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'W', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'E', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'T', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'B', 'walldistance': -1, 'flag': 'NoSlip'},
+                ]
+            },
+            'Bubble': {
+                'bubbleMidPoint': self.bubbleMidPoint,
+                'bubbleRadius': self.bubbleRadius,
+                'bubble': True
+            },
+        }
+
+    @wlb.member_callback
+    def at_end_of_time_step(self, blocks, time_loop):
+        t = time_loop.getCurrentTimeStep()
+        if t % self.dbWriteFrequency == 0:
+            wlb_field = wlb.field.gather(blocks, 'phase', makeSlice[:, :, self.size[2] // 2])
+            if wlb_field:
+                phase_field = np.asarray(wlb_field.buffer()).squeeze()
+
+                location_of_gas = np.where(phase_field < 0.5)
+                cov = np.cov(location_of_gas)
+                eig = np.linalg.eig(cov)
+                axis_of_the_bubble = np.sqrt(eig[0])
+
+                center_of_mass = np.mean(location_of_gas, axis=1)
+                self.yPositions.append(center_of_mass[1])
+                if len(self.yPositions) > 1:
+                    speed = self.yPositions[-1] - self.yPositions[-2]
+                    self.write_result_to_database(t, speed, axis_of_the_bubble, center_of_mass)
+
+                self.counter += 1
+
+    def write_result_to_database(self, t, speed, axis_of_the_bubble, center_of_mass):
+        """Writes the simulation result stored in the global variables shapeFactors and angles to
+               an sqlite3 database, and resets the global variables."""
+        result = {'waLBerlaVersion': wlb.build_info.version,
+                  'xCells': self.size[0],
+                  'yCells': self.size[1],
+                  'zCells': self.size[2],
+                  'bubbleDiameter': self.bubbleRadius * 2.0,
+                  'bubble_axis_x': axis_of_the_bubble[0],
+                  'bubble_axis_y': axis_of_the_bubble[1],
+                  'center_of_mass_x': center_of_mass[0],
+                  'center_of_mass_y': center_of_mass[1],
+                  'rising_speed': speed,
+                  'timesteps': t,
+                  'processes': wlb.mpi.numProcesses(),
+                  }
+        try:
+            wlbSqlite.checkAndUpdateSchema(result, 'data', self.dbFile)
+            wlbSqlite.storeSingle(result, 'data', self.dbFile)
+        except Exception as e:
+            wlb.log_warning("Failed to store run in database " + str(e) + "\n" + str(result))
+
+
+scenarios = wlb.ScenarioManager()
+scenarios.add(Scenario())
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/CMakeLists.txt b/apps/showcases/PhaseFieldAllenCahn/GPU/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2982ff9355663358789d257de9d679c3264e098
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/CMakeLists.txt
@@ -0,0 +1,24 @@
+waLBerla_link_files_to_builddir(*.prm)
+waLBerla_link_files_to_builddir(*.py)
+waLBerla_link_files_to_builddir(*.obj)
+
+waLBerla_generate_target_from_python(NAME PhaseFieldCodeGenGPU
+        FILE multiphase_codegen.py
+        OUT_FILES initialize_phase_field_distributions.cu initialize_phase_field_distributions.h
+        initialize_velocity_based_distributions.cu initialize_velocity_based_distributions.h
+        phase_field_LB_step.cu phase_field_LB_step.h
+        phase_field_LB_NoSlip.cu phase_field_LB_NoSlip.h
+        hydro_LB_step.cu hydro_LB_step.h
+        hydro_LB_NoSlip.cu hydro_LB_NoSlip.h
+        stream_hydro.cu stream_hydro.h
+        PackInfo_phase_field_distributions.cu PackInfo_phase_field_distributions.h
+        PackInfo_phase_field.cu PackInfo_phase_field.h
+        PackInfo_velocity_based_distributions.cu PackInfo_velocity_based_distributions.h
+        GenDefines.h)
+
+waLBerla_add_executable(NAME multiphase
+        FILES multiphase.cpp PythonExports.cpp InitializerFunctions.cpp CalculateNormals.cpp contact.cu multiphase_codegen.py
+        DEPENDS blockforest core cuda field postprocessing lbm geometry timeloop gui PhaseFieldCodeGenGPU)
+
+
+
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/CalculateNormals.cpp b/apps/showcases/PhaseFieldAllenCahn/GPU/CalculateNormals.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e20053e38130262fdf2bb8c556f1fd665888a945
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/CalculateNormals.cpp
@@ -0,0 +1,83 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file CalculateNormals.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "CalculateNormals.h"
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+
+#include "field/FlagField.h"
+
+namespace walberla
+{
+using FlagField_T    = FlagField< uint8_t >;
+using NormalsField_T = GhostLayerField< int8_t, 3 >;
+
+void calculate_normals(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID normalsFieldID,
+                       ConstBlockDataID flagFieldID, FlagUID domainFlagUID, FlagUID boundaryFlagUID)
+{
+   for (auto& block : *blocks)
+   {
+      CellInterval globalCellBB = blocks->getBlockCellBB(block);
+      CellInterval blockLocalCellBB;
+      blocks->transformGlobalToBlockLocalCellInterval(blockLocalCellBB, block, globalCellBB);
+
+      auto* normalsField = block.getData< NormalsField_T >(normalsFieldID);
+      auto* flagField    = block.getData< FlagField_T >(flagFieldID);
+      auto boundaryFlag  = flagField->getFlag(boundaryFlagUID);
+      auto domainFlag    = flagField->getFlag(domainFlagUID);
+
+      // clang-format off
+      WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(normalsField,
+
+         if( x < blockLocalCellBB.xMax() ){
+            if(flagField->get(x, y, z) == boundaryFlag && flagField->get(x + 1, y, z) == domainFlag)
+               normalsField->get(x, y, z, 0) = 1;
+         }
+
+         if( x > blockLocalCellBB.xMin() ){
+            if(flagField->get(x, y, z) == boundaryFlag && flagField->get(x - 1, y, z) == domainFlag)
+               normalsField->get(x, y, z, 0) = - 1;
+         }
+
+         if( y < blockLocalCellBB.yMax() ){
+            if(flagField->get(x, y, z) == boundaryFlag && flagField->get(x, y + 1, z) == domainFlag)
+               normalsField->get(x, y, z, 1) = 1;
+         }
+
+         if( y > blockLocalCellBB.yMin() ){
+            if(flagField->get(x, y, z) == boundaryFlag && flagField->get(x, y - 1, z) == domainFlag)
+               normalsField->get(x, y, z, 1) = - 1;
+         }
+
+         if( z < blockLocalCellBB.zMax() ){
+            if(flagField->get(x, y, z) == boundaryFlag && flagField->get(x, y, z + 1) == domainFlag)
+               normalsField->get(x, y, z, 2) = 1;
+         }
+
+         if( z > blockLocalCellBB.zMin() ){
+            if(flagField->get(x, y, z) == boundaryFlag && flagField->get(x, y, z - 1) == domainFlag)
+               normalsField->get(x, y, z, 2) = - 1;
+         }
+
+      )
+      // clang-format on
+   }
+}
+} // namespace walberla
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/CalculateNormals.h b/apps/showcases/PhaseFieldAllenCahn/GPU/CalculateNormals.h
new file mode 100644
index 0000000000000000000000000000000000000000..901db8544732cea2953e651fc10c78943326bf56
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/CalculateNormals.h
@@ -0,0 +1,32 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file CalculateNormals.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/StructuredBlockForest.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+
+#include "field/FlagField.h"
+
+namespace walberla
+{
+void calculate_normals(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID normalsFieldID,
+                       ConstBlockDataID flagFieldID, FlagUID fluidFlagUID, FlagUID boundaryFlagUID);
+}
\ No newline at end of file
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/InitializerFunctions.cpp b/apps/showcases/PhaseFieldAllenCahn/GPU/InitializerFunctions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0fd64cb73cefcfefb25a565fee9c80b4c127b155
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/InitializerFunctions.cpp
@@ -0,0 +1,76 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file InitializerFunctions.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "core/logging/Initialization.h"
+#include "core/math/Constants.h"
+#include "core/math/Random.h"
+
+#include "field/FlagField.h"
+#include "field/communication/PackInfo.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "python_coupling/DictWrapper.h"
+
+namespace walberla
+{
+using PhaseField_T = GhostLayerField< real_t, 1 >;
+using FlagField_T  = FlagField< uint8_t >;
+
+void initPhaseField_sphere(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID phaseFieldID,
+                           const real_t R                         = 10,
+                           const Vector3< real_t > bubbleMidPoint = Vector3< real_t >(0.0, 0.0, 0.0),
+                           const bool bubble = true, const real_t W = 5)
+{
+   for (auto& block : *blocks)
+   {
+      auto phaseField = block.getData< PhaseField_T >(phaseFieldID);
+      // clang-format off
+      WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(phaseField, Cell globalCell;
+         blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
+         real_t Ri = sqrt((globalCell[0] - bubbleMidPoint[0]) * (globalCell[0] - bubbleMidPoint[0]) +
+                          (globalCell[1] - bubbleMidPoint[1]) * (globalCell[1] - bubbleMidPoint[1]) +
+                          (globalCell[2] - bubbleMidPoint[2]) * (globalCell[2] - bubbleMidPoint[2]));
+         if (bubble) phaseField->get(x, y, z) = 0.5 + 0.5 * tanh(2.0 * (Ri - R) / W);
+         else phaseField->get(x, y, z)        = 0.5 - 0.5 * tanh(2.0 * (Ri - R) / W);
+      )
+      // clang-format on
+   }
+}
+
+void initPhaseField_RTI(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID phaseFieldID,
+                        const real_t W = 5)
+{
+   auto X              = blocks->getDomainCellBB().xMax();
+   auto halfY          = (blocks->getDomainCellBB().yMax()) / 2.0;
+   double perturbation = 0.05;
+
+   for (auto& block : *blocks)
+   {
+      auto phaseField = block.getData< PhaseField_T >(phaseFieldID);
+      // clang-format off
+      WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(phaseField, Cell globalCell;
+         blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
+         real_t tmp =
+         perturbation * X * (cos((2.0 * math::pi * globalCell[0]) / X) + cos((2.0 * math::pi * globalCell[2]) / X));
+         phaseField->get(x, y, z) = 0.5 + 0.5 * tanh(((globalCell[1] - halfY) - tmp) / (W / 2.0));
+      )
+      // clang-format on
+   }
+}
+} // namespace walberla
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/InitializerFunctions.h b/apps/showcases/PhaseFieldAllenCahn/GPU/InitializerFunctions.h
new file mode 100644
index 0000000000000000000000000000000000000000..04bac53f2e75d17ddd393da0e247b63df05d0704
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/InitializerFunctions.h
@@ -0,0 +1,39 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file InitializerFunctions.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/math/Constants.h"
+
+#include "field/FlagField.h"
+#include "field/communication/PackInfo.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "python_coupling/DictWrapper.h"
+#pragma once
+
+namespace walberla
+{
+void initPhaseField_sphere(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID phaseFieldID, real_t R,
+                           Vector3< real_t > bubbleMidPoint, bool bubble = true, real_t W = 5);
+
+void initPhaseField_RTI(const shared_ptr< StructuredBlockStorage >& blocks, BlockDataID phaseFieldID, real_t W = 5);
+
+} // namespace walberla
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/PythonExports.cpp b/apps/showcases/PhaseFieldAllenCahn/GPU/PythonExports.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..40bb8c52bf9a4b320b3f5300c0714ef8bda06e43
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/PythonExports.cpp
@@ -0,0 +1,95 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PythonExports.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "waLBerlaDefinitions.h"
+#ifdef WALBERLA_BUILD_WITH_PYTHON
+
+#include "python_coupling/Manager.h"
+
+#include "core/math/Constants.h"
+
+#include "blockforest/python/Exports.h"
+
+#include "field/communication/PackInfo.h"
+#include "field/python/Exports.h"
+
+#include "geometry/python/Exports.h"
+
+#include "postprocessing/python/Exports.h"
+
+#include "timeloop/python/Exports.h"
+
+
+namespace walberla {
+    using flag_t = uint8_t;
+    // clang-format off
+    void exportDataStructuresToPython() {
+
+        namespace bmpl = boost::mpl;
+
+        auto pythonManager = python_coupling::Manager::instance();
+
+        typedef bmpl::vector<
+                Field<walberla::real_t, 1>,
+                Field<walberla::real_t, 3>,
+                Field<walberla::real_t, 9>,
+                Field<walberla::real_t, 19>,
+                Field<walberla::real_t, 27>>
+                FieldTypes;
+
+        typedef bmpl::vector<stencil::D2Q9,
+                stencil::D3Q19,
+                stencil::D3Q27>
+                Stencils;
+
+        typedef bmpl::vector<
+                GhostLayerField<real_t,1>,
+                GhostLayerField<real_t,3>>
+                RealFieldTypes;
+
+        typedef bmpl::vector<
+                FlagField<flag_t>>
+                FlagFieldTypes;
+        // Field
+        pythonManager->addExporterFunction(field::exportModuleToPython<FieldTypes>);
+        pythonManager->addExporterFunction(field::exportGatherFunctions<FieldTypes>);
+        pythonManager->addBlockDataConversion<FieldTypes>();
+
+        // Blockforest
+        pythonManager->addExporterFunction(blockforest::exportModuleToPython<Stencils>);
+
+        // Timeloop
+        pythonManager->addExporterFunction(timeloop::exportModuleToPython);
+
+        // Postprocessing
+        pythonManager->addExporterFunction( postprocessing::exportModuleToPython<RealFieldTypes, FlagFieldTypes> );
+
+        // Geometry
+        pythonManager->addExporterFunction( geometry::exportModuleToPython );
+    }
+   // clang-format on
+}
+
+#else
+
+namespace walberla {
+   void exportDataStructuresToPython() {}
+} // namespace walberla
+
+#endif
\ No newline at end of file
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/PythonExports.h b/apps/showcases/PhaseFieldAllenCahn/GPU/PythonExports.h
new file mode 100644
index 0000000000000000000000000000000000000000..07d2004372dfb18bd14ad45c071558c1d1f9ad33
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/PythonExports.h
@@ -0,0 +1,27 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PythonExports.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+namespace walberla
+{
+void exportDataStructuresToPython();
+
+} // namespace walberla
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/contact.cu b/apps/showcases/PhaseFieldAllenCahn/GPU/contact.cu
new file mode 100644
index 0000000000000000000000000000000000000000..6a42483ba1376328e71bff03d8bb234dbfb0474d
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/contact.cu
@@ -0,0 +1,130 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file contact.cu
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "core/DataTypes.h"
+#include "core/Macros.h"
+
+#include <cmath>
+
+#include "contact.h"
+
+#define FUNC_PREFIX __global__
+
+namespace walberla
+{
+namespace lbm
+{
+#ifdef __GNUC__
+#   pragma GCC diagnostic push
+#endif
+
+#ifdef __CUDACC__
+#   pragma push
+#endif
+
+namespace internal_boundary_contact
+{
+static FUNC_PREFIX void contact_angle_treatment(uint8_t* RESTRICT const _data_indexVector, double* RESTRICT _data_phase,
+                                                int64_t const _stride_phase_0, int64_t const _stride_phase_1,
+                                                int64_t const _stride_phase_2, int64_t indexVectorSize, double alpha)
+{
+   if (blockDim.x * blockIdx.x + threadIdx.x < indexVectorSize)
+   {
+      uint8_t* RESTRICT _data_indexVector_10 = _data_indexVector;
+      const int32_t x = *((int32_t*) (&_data_indexVector_10[24 * blockDim.x * blockIdx.x + 24 * threadIdx.x]));
+      uint8_t* RESTRICT _data_indexVector_14 = _data_indexVector + 4;
+      const int32_t y = *((int32_t*) (&_data_indexVector_14[24 * blockDim.x * blockIdx.x + 24 * threadIdx.x]));
+      uint8_t* RESTRICT _data_indexVector_18 = _data_indexVector + 8;
+      const int32_t z = *((int32_t*) (&_data_indexVector_18[24 * blockDim.x * blockIdx.x + 24 * threadIdx.x]));
+      uint8_t* RESTRICT _data_indexVector_112 = _data_indexVector + 12;
+      const int32_t nx = *((int32_t*) (&_data_indexVector_112[24 * blockDim.x * blockIdx.x + 24 * threadIdx.x]));
+      const int32_t x1 = x + nx;
+      uint8_t* RESTRICT _data_indexVector_116 = _data_indexVector + 16;
+      const int32_t ny = *((int32_t*) (&_data_indexVector_116[24 * blockDim.x * blockIdx.x + 24 * threadIdx.x]));
+      const int32_t y1 = y + ny;
+      uint8_t* RESTRICT _data_indexVector_200 = _data_indexVector + 20;
+      const int32_t nz = *((int32_t*) (&_data_indexVector_200[24 * blockDim.x * blockIdx.x + 24 * threadIdx.x]));
+      const int32_t z1 = z + nz;
+
+      const double h = 0.5 * sqrt((float) (nx * nx + ny * ny + nz * nz));
+      const double a = cos(alpha);
+      const double W = 5;
+
+      double* RESTRICT _phase_wall     = _data_phase + _stride_phase_1 * y + _stride_phase_2 * z;
+      double* RESTRICT _phase_interior = _data_phase + _stride_phase_1 * y1 + _stride_phase_2 * z1;
+      if (h < 0.001) { _phase_wall[_stride_phase_0 * x] = 1.0; }
+      else if (a > 1e-8 || a < -1e-8)
+      {
+         const double var = -h * (4.0 / W) * a;
+         _phase_wall[_stride_phase_0 * x] =
+            (1 + var - sqrt((1 + var) * (1 + var) - 4 * var * _phase_interior[_stride_phase_0 * x1])) / (var + 1e-12) -
+            _phase_interior[_stride_phase_0 * x1];
+      }
+      else
+      {
+         _phase_wall[_stride_phase_0 * x] = _phase_interior[_stride_phase_0 * x1];
+      }
+   }
+}
+} // namespace internal_boundary_contact
+
+#ifdef __GNUC__
+#   pragma GCC diagnostic pop
+#endif
+
+#ifdef __CUDACC__
+#   pragma pop
+#endif
+
+void contact::run(IBlock* block, IndexVectors::Type type, cudaStream_t stream)
+{
+   auto* indexVectors      = block->getData< IndexVectors >(indexVectorID);
+   int64_t indexVectorSize = int64_c(indexVectors->indexVector(type).size());
+   if (indexVectorSize == 0) return;
+
+   auto pointer = indexVectors->pointerGpu(type);
+
+   auto* _data_indexVector = reinterpret_cast< uint8_t* >(pointer);
+
+   auto phaseField = block->getData< cuda::GPUField< double > >(phaseFieldID);
+
+   auto& alpha = this->alpha_;
+   WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(phaseField->nrOfGhostLayers()))
+   double* RESTRICT _data_phase = phaseField->dataAt(0, 0, 0, 0);
+   const auto _stride_pdfs_0    = int64_t(phaseField->xStride());
+   const auto _stride_pdfs_1    = int64_t(phaseField->yStride());
+   const auto _stride_pdfs_2    = int64_t(phaseField->zStride());
+   dim3 _block(int(((256 < indexVectorSize) ? 256 : indexVectorSize)), int(1), int(1));
+   dim3 _grid(int(((indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ?
+                      (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) :
+                      ((int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize))) + 1)),
+              int(1), int(1));
+   internal_boundary_contact::contact_angle_treatment<<< _grid, _block, 0, stream >>>(
+      _data_indexVector, _data_phase, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, indexVectorSize, alpha);
+}
+
+void contact::operator()(IBlock* block, cudaStream_t stream) { run(block, IndexVectors::ALL, stream); }
+
+void contact::inner(IBlock* block, cudaStream_t stream) { run(block, IndexVectors::INNER, stream); }
+
+void contact::outer(IBlock* block, cudaStream_t stream) { run(block, IndexVectors::OUTER, stream); }
+
+} // namespace lbm
+} // namespace walberla
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/contact.h b/apps/showcases/PhaseFieldAllenCahn/GPU/contact.h
new file mode 100644
index 0000000000000000000000000000000000000000..2ce5deefbe2f41df6ed295d9a8e9a44e863f6756
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/contact.h
@@ -0,0 +1,166 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file contact.h
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "blockforest/StructuredBlockForest.h"
+
+#include "core/DataTypes.h"
+
+#include "cuda/GPUField.h"
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+
+#include "field/FlagField.h"
+
+#include <set>
+#include <vector>
+
+#ifdef __GNUC__
+#   define RESTRICT __restrict__
+#elif _MSC_VER
+#   define RESTRICT __restrict
+#else
+#   define RESTRICT
+#endif
+
+namespace walberla
+{
+namespace lbm
+{
+class contact
+{
+ public:
+   struct IndexInfo
+   {
+      int32_t x1;
+      int32_t y1;
+      int32_t z1;
+      int32_t x2;
+      int32_t y2;
+      int32_t z2;
+      IndexInfo(int32_t x1_, int32_t y1_, int32_t z1_, int32_t x2_, int32_t y2_, int32_t z2_)
+         : x1(x1_), y1(y1_), z1(z1_), x2(x2_), y2(y2_), z2(z2_)
+      {}
+      bool operator==(const IndexInfo& o) const
+      {
+         return x1 == o.x1 && y1 == o.y1 && z1 == o.z1 && x2 == o.x2 && y2 == o.y2 && z2 == o.z2;
+      }
+   };
+
+   class IndexVectors
+   {
+    public:
+      using CpuIndexVector = std::vector< IndexInfo >;
+
+      enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
+
+      IndexVectors() : cpuVectors_(NUM_TYPES) {}
+      bool operator==(IndexVectors& other) { return other.cpuVectors_ == cpuVectors_; }
+
+      ~IndexVectors()
+      {
+         for (auto& gpuVec : gpuVectors_)
+            cudaFree(gpuVec);
+      }
+
+      CpuIndexVector& indexVector(Type t) { return cpuVectors_[t]; }
+      IndexInfo* pointerCpu(Type t) { return &(cpuVectors_[t][0]); }
+
+      IndexInfo* pointerGpu(Type t) { return gpuVectors_[t]; }
+
+      void syncGPU()
+      {
+         gpuVectors_.resize(cpuVectors_.size());
+         for (size_t i = 0; i < size_t(NUM_TYPES); ++i)
+         {
+            auto& gpuVec = gpuVectors_[i];
+            auto& cpuVec = cpuVectors_[i];
+            cudaFree(gpuVec);
+            cudaMalloc(&gpuVec, sizeof(IndexInfo) * cpuVec.size());
+            cudaMemcpy(gpuVec, &cpuVec[0], sizeof(IndexInfo) * cpuVec.size(), cudaMemcpyHostToDevice);
+         }
+      }
+
+    private:
+      std::vector< CpuIndexVector > cpuVectors_;
+
+      using GpuIndexVector = IndexInfo*;
+      std::vector< GpuIndexVector > gpuVectors_;
+   };
+
+   contact(const shared_ptr< StructuredBlockForest >& blocks, BlockDataID phaseFieldID_, double alpha)
+      : phaseFieldID(phaseFieldID_), alpha_(alpha)
+   {
+      auto createIdxVector = [](IBlock* const, StructuredBlockStorage* const) { return new IndexVectors(); };
+      indexVectorID = blocks->addStructuredBlockData< IndexVectors >(createIdxVector, "IndexField_hydro_NoSlip_gpu");
+   };
+
+   void operator()(IBlock* block, cudaStream_t stream = nullptr);
+   void inner(IBlock* block, cudaStream_t stream = nullptr);
+   void outer(IBlock* block, cudaStream_t stream = nullptr);
+
+   template< typename NormalField_T >
+   void fillFromNormalField(const shared_ptr< StructuredBlockForest >& blocks, ConstBlockDataID normalFieldID)
+   {
+      for (auto& block : *blocks)
+      {
+         auto* indexVectors     = block.getData< IndexVectors >(indexVectorID);
+         auto& indexVectorAll   = indexVectors->indexVector(IndexVectors::ALL);
+         auto& indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+         auto& indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+         auto* normalField = block.getData< NormalField_T >(normalFieldID);
+
+         auto inner = normalField->xyzSize();
+         inner.expand(cell_idx_t(-1));
+
+         indexVectorAll.clear();
+         indexVectorInner.clear();
+         indexVectorOuter.clear();
+         // clang-format off
+         WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(normalField, Cell globalCell;
+            blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
+            if (normalField->get(x, y, z, 0) != 0 || normalField->get(x, y, z, 1) != 0 || normalField->get(x, y, z, 2) != 0)
+               {
+                  auto element = IndexInfo(x, y, z, normalField->get(x, y, z, 0), normalField->get(x, y, z, 1), normalField->get(x, y, z, 2));
+                  indexVectorAll.push_back(element);
+                  if (inner.contains(x, y, z))
+                     indexVectorInner.push_back(element);
+                  else
+                     indexVectorOuter.push_back(element);
+               }
+         )
+         // clang-format off
+         indexVectors->syncGPU();
+      }
+   }
+
+ private:
+   void run(IBlock* block, IndexVectors::Type type, cudaStream_t stream = nullptr);
+
+   BlockDataID indexVectorID;
+
+ public:
+   BlockDataID phaseFieldID;
+   double alpha_;
+};
+
+} // namespace lbm
+} // namespace walberla
\ No newline at end of file
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/droplet_contact_angle.py b/apps/showcases/PhaseFieldAllenCahn/GPU/droplet_contact_angle.py
new file mode 100755
index 0000000000000000000000000000000000000000..1b3e9d6a8e15642ea4855d6be13eb378bcb8cc1e
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/droplet_contact_angle.py
@@ -0,0 +1,78 @@
+import waLBerla as wlb
+
+
+class Scenario:
+    def __init__(self):
+        # output frequencies
+        self.vtkWriteFrequency = 1000
+
+        # simulation parameters
+        self.timesteps = 10001
+        self.cells = (128, 64, 128)
+        self.blocks = (1, 1, 1)
+        self.periodic = (0, 0, 0)
+        self.size = (self.cells[0] * self.blocks[0],
+                     self.cells[1] * self.blocks[1],
+                     self.cells[2] * self.blocks[2])
+
+        self.overlappingWidth = (8, 1, 1)
+        self.timeStepStrategy = 'normal'
+
+        self.contactAngle = 22
+
+        # bubble parameters
+        self.dropletRadius = 24.0
+        self.dropletMidPoint = (64, 24, 64)
+
+        # everything else
+        self.scenario = 1  # 1 rising bubble, 2 RTI
+
+        self.counter = 0
+        self.yPositions = []
+
+    @wlb.member_callback
+    def config(self, **kwargs):
+        return {
+            'DomainSetup': {
+                'blocks': self.blocks,
+                'cellsPerBlock': self.cells,
+                'periodic': self.periodic,
+            },
+            'Parameters': {
+                'timesteps': self.timesteps,
+                'vtkWriteFrequency': self.vtkWriteFrequency,
+                'timeStepStrategy': self.timeStepStrategy,
+                'overlappingWidth': self.overlappingWidth,
+                'remainingTimeLoggerFrequency': 10.0,
+                'scenario': self.scenario,
+                'contactAngle': self.contactAngle
+            },
+            'PhysicalParameters': {
+                'density_liquid': 1.0,
+                'density_gas': 0.001,
+                'surface_tension': 5e-5,
+                'mobility': 0.05,
+                'gravitational_acceleration': 0.0,
+                'relaxation_time_liquid': 3 * 0.166,
+                'relaxation_time_gas': 3 * 0.0166,
+            },
+            'Boundaries': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'W', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'E', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'T', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'B', 'walldistance': -1, 'flag': 'NoSlip'},
+                ],
+            },
+            'Bubble': {
+                'bubbleMidPoint': self.dropletMidPoint,
+                'bubbleRadius': self.dropletRadius,
+                'bubble': False
+            },
+        }
+
+
+scenarios = wlb.ScenarioManager()
+scenarios.add(Scenario())
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase.cpp b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a0029b5408f093cfe5f581ce76102c00cc75d45c
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase.cpp
@@ -0,0 +1,377 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file multiphase.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+#include "blockforest/Initialization.h"
+#include "blockforest/communication/UniformDirectScheme.h"
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/math/Constants.h"
+#include "core/timing/RemainingTimeLogger.h"
+
+#include "cuda/AddGPUFieldToStorage.h"
+#include "cuda/DeviceSelectMPI.h"
+#include "cuda/NVTX.h"
+#include "cuda/ParallelStreams.h"
+#include "cuda/communication/UniformGPUScheme.h"
+
+#include "field/AddToStorage.h"
+#include "field/FlagField.h"
+#include "field/communication/PackInfo.h"
+#include "field/python/Exports.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "geometry/InitBoundaryHandling.h"
+
+#include "python_coupling/CreateConfig.h"
+#include "python_coupling/PythonCallback.h"
+
+#include "timeloop/SweepTimeloop.h"
+
+#include "CalculateNormals.h"
+#include "InitializerFunctions.h"
+#include "PythonExports.h"
+#include "contact.h"
+
+//////////////////////////////
+// INCLUDE GENERATED FILES //
+////////////////////////////
+
+#include "GenDefines.h"
+#include "PackInfo_phase_field.h"
+#include "PackInfo_phase_field_distributions.h"
+#include "PackInfo_velocity_based_distributions.h"
+#include "hydro_LB_NoSlip.h"
+#include "hydro_LB_step.h"
+#include "initialize_phase_field_distributions.h"
+#include "initialize_velocity_based_distributions.h"
+#include "phase_field_LB_NoSlip.h"
+#include "phase_field_LB_step.h"
+#include "stream_hydro.h"
+
+////////////
+// USING //
+//////////
+
+using namespace walberla;
+
+using PdfField_phase_T = GhostLayerField< real_t, Stencil_phase_T::Size >;
+using PdfField_hydro_T = GhostLayerField< real_t, Stencil_hydro_T::Size >;
+using VelocityField_T  = GhostLayerField< real_t, Stencil_hydro_T::Dimension >;
+using NormalsField_T   = GhostLayerField< int8_t, Stencil_hydro_T::Dimension >;
+using PhaseField_T     = GhostLayerField< real_t, 1 >;
+using FlagField_T      = FlagField< uint8_t >;
+
+typedef cuda::GPUField< double > GPUField;
+
+int main(int argc, char** argv)
+{
+   mpi::Environment Env(argc, argv);
+   cuda::selectDeviceBasedOnMpiRank();
+   exportDataStructuresToPython();
+
+   for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
+   {
+      WALBERLA_MPI_WORLD_BARRIER()
+
+      auto config = *cfg;
+      logging::configureLogging(config);
+      shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGridFromConfig(config);
+
+      ////////////////////////////
+      // ADD DOMAIN PARAMETERS //
+      //////////////////////////
+
+      auto domainSetup                = config->getOneBlock("DomainSetup");
+      Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
+
+      ////////////////////////////////////////
+      // ADD GENERAL SIMULATION PARAMETERS //
+      //////////////////////////////////////
+
+      auto parameters                    = config->getOneBlock("Parameters");
+      const std::string timeStepStrategy = parameters.getParameter< std::string >("timeStepStrategy", "normal");
+      const uint_t timesteps             = parameters.getParameter< uint_t >("timesteps", uint_c(50));
+      const real_t remainingTimeLoggerFrequency =
+         parameters.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0);
+      const uint_t scenario  = parameters.getParameter< uint_t >("scenario", uint_c(1));
+      const real_t alpha     = parameters.getParameter< real_t >("contactAngle", real_c(90));
+      const real_t alpha_rad = alpha * (math::pi / 180);
+      Vector3< int > overlappingWidth =
+         parameters.getParameter< Vector3< int > >("overlappingWidth", Vector3< int >(1, 1, 1));
+      Vector3< int > gpuBlockSize =
+         parameters.getParameter< Vector3< int > >("gpuBlockSize", Vector3< int >(128, 1, 1));
+
+      /////////////////////////
+      // ADD DATA TO BLOCKS //
+      ///////////////////////
+
+      // CPU fields
+      BlockDataID vel_field   = field::addToStorage< VelocityField_T >(blocks, "vel", real_t(0), field::fzyx);
+      BlockDataID phase_field = field::addToStorage< PhaseField_T >(blocks, "phase", real_t(0), field::fzyx);
+      BlockDataID normals     = field::addToStorage< NormalsField_T >(blocks, "normals", int8_t(0), field::fzyx);
+      // GPU fields
+      BlockDataID lb_phase_field_gpu = cuda::addGPUFieldToStorage< cuda::GPUField< real_t > >(
+         blocks, "lb phase field on GPU", Stencil_phase_T::Size, field::fzyx, 1);
+      BlockDataID lb_velocity_field_gpu = cuda::addGPUFieldToStorage< cuda::GPUField< real_t > >(
+         blocks, "lb velocity field on GPU", Stencil_hydro_T::Size, field::fzyx, 1);
+      BlockDataID vel_field_gpu =
+         cuda::addGPUFieldToStorage< VelocityField_T >(blocks, vel_field, "velocity field on GPU", true);
+      BlockDataID phase_field_gpu =
+         cuda::addGPUFieldToStorage< PhaseField_T >(blocks, phase_field, "phase field on GPU", true);
+      // Flag field
+      BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
+
+      WALBERLA_LOG_INFO_ON_ROOT("Initialisation of the phase-field")
+      if (scenario == 1)
+      {
+         auto bubbleParameters                  = config->getOneBlock("Bubble");
+         const Vector3< real_t > bubbleMidPoint = bubbleParameters.getParameter< Vector3< real_t > >("bubbleMidPoint");
+         const real_t bubbleRadius              = bubbleParameters.getParameter< real_t >("bubbleRadius", 20.0);
+         const bool bubble                      = bubbleParameters.getParameter< bool >("bubble", true);
+         initPhaseField_sphere(blocks, phase_field, bubbleRadius, bubbleMidPoint, bubble);
+      }
+      else if (scenario == 2)
+      {
+         initPhaseField_RTI(blocks, phase_field);
+      }
+
+      WALBERLA_LOG_INFO_ON_ROOT("Initialisation of the phase-field done")
+
+      /////////////////
+      // ADD SWEEPS //
+      ///////////////
+
+      auto physical_parameters     = config->getOneBlock("PhysicalParameters");
+      const real_t density_liquid  = physical_parameters.getParameter< real_t >("density_liquid", real_c(1.0));
+      const real_t density_gas     = physical_parameters.getParameter< real_t >("density_gas");
+      const real_t surface_tension = physical_parameters.getParameter< real_t >("surface_tension");
+      const real_t mobility        = physical_parameters.getParameter< real_t >("mobility");
+      const real_t gravitational_acceleration =
+         physical_parameters.getParameter< real_t >("gravitational_acceleration");
+      const real_t relaxation_time_liquid = physical_parameters.getParameter< real_t >("relaxation_time_liquid");
+      const real_t relaxation_time_gas    = physical_parameters.getParameter< real_t >("relaxation_time_gas");
+
+      pystencils::initialize_phase_field_distributions init_h(lb_phase_field_gpu, phase_field_gpu, vel_field_gpu);
+      pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field_gpu, vel_field_gpu);
+
+      pystencils::phase_field_LB_step phase_field_LB_step(
+         lb_phase_field_gpu, phase_field_gpu, vel_field_gpu, mobility, gpuBlockSize[0], gpuBlockSize[1],
+         Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2]));
+
+      pystencils::hydro_LB_step hydro_LB_step(
+         lb_velocity_field_gpu, phase_field_gpu, vel_field_gpu, gravitational_acceleration, density_liquid, density_gas,
+         surface_tension, relaxation_time_liquid, relaxation_time_gas, gpuBlockSize[0], gpuBlockSize[1],
+         Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2]));
+
+      pystencils::stream_hydro stream_hydro(lb_velocity_field_gpu, gpuBlockSize[0], gpuBlockSize[1],
+                                            Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2]));
+
+      ////////////////////////
+      // ADD COMMUNICATION //
+      //////////////////////
+
+      auto Comm_velocity_based_distributions =
+         make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 0);
+      auto generatedPackInfo_velocity_based_distributions =
+         make_shared< pystencils::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu);
+      Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_velocity_based_distributions);
+
+      auto Comm_phase_field = make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 0);
+      auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field_gpu);
+      Comm_phase_field->addPackInfo(generatedPackInfo_phase_field);
+
+      auto Comm_phase_field_distributions =
+         make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 0);
+      auto generatedPackInfo_phase_field_distributions =
+         make_shared< pystencils::PackInfo_phase_field_distributions >(lb_phase_field_gpu);
+      Comm_phase_field_distributions->addPackInfo(generatedPackInfo_phase_field_distributions);
+
+      ////////////////////////
+      // BOUNDARY HANDLING //
+      //////////////////////
+
+      const FlagUID fluidFlagUID("Fluid");
+      const FlagUID wallFlagUID("NoSlip");
+
+      auto boundariesConfig = config->getBlock("Boundaries");
+      if (boundariesConfig)
+      {
+         geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldID, boundariesConfig);
+         geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID);
+      }
+
+      calculate_normals(blocks, normals, flagFieldID, fluidFlagUID, wallFlagUID);
+      lbm::phase_field_LB_NoSlip phase_field_LB_NoSlip(blocks, lb_phase_field_gpu);
+      lbm::hydro_LB_NoSlip hydro_LB_NoSlip(blocks, lb_velocity_field_gpu);
+      lbm::contact contact_angle(blocks, phase_field_gpu, alpha_rad);
+
+      phase_field_LB_NoSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldID, wallFlagUID, fluidFlagUID);
+      hydro_LB_NoSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldID, wallFlagUID, fluidFlagUID);
+      contact_angle.fillFromNormalField< NormalsField_T >(blocks, normals);
+
+      WALBERLA_LOG_INFO_ON_ROOT("Initialisation of the normals-field done")
+
+      ////////////////
+      // TIME LOOP //
+      //////////////
+
+      int streamLowPriority  = 0;
+      int streamHighPriority = 0;
+      auto defaultStream     = cuda::StreamRAII::newPriorityStream(streamLowPriority);
+      auto innerOuterStreams = cuda::ParallelStreams(streamHighPriority);
+
+      auto timeLoop       = make_shared< SweepTimeloop >(blocks->getBlockStorage(), timesteps);
+      auto normalTimeStep = [&]() {
+         for (auto& block : *blocks)
+         {
+            Comm_phase_field_distributions->communicate(nullptr);
+            phase_field_LB_NoSlip(&block);
+
+            phase_field_LB_step(&block);
+            contact_angle(&block);
+            Comm_phase_field->communicate(nullptr);
+
+            hydro_LB_step(&block);
+
+            Comm_velocity_based_distributions->communicate(nullptr);
+            hydro_LB_NoSlip(&block);
+            stream_hydro(&block);
+         }
+      };
+      auto simpleOverlapTimeStep = [&]() {
+         for (auto& block : *blocks)
+            phase_field_LB_NoSlip(&block);
+
+         Comm_phase_field_distributions->startCommunication(defaultStream);
+         for (auto& block : *blocks)
+            phase_field_LB_step.inner(&block, defaultStream);
+         Comm_phase_field_distributions->wait(defaultStream);
+         for (auto& block : *blocks)
+            phase_field_LB_step.outer(&block, defaultStream);
+
+         for (auto& block : *blocks)
+            contact_angle(&block);
+
+         Comm_phase_field->startCommunication(defaultStream);
+         for (auto& block : *blocks)
+            hydro_LB_step.inner(&block, defaultStream);
+         Comm_phase_field->wait(defaultStream);
+         for (auto& block : *blocks)
+            hydro_LB_step.outer(&block, defaultStream);
+
+         for (auto& block : *blocks)
+            hydro_LB_NoSlip(&block);
+
+         Comm_velocity_based_distributions->startCommunication(defaultStream);
+         for (auto& block : *blocks)
+            stream_hydro.inner(&block, defaultStream);
+         Comm_velocity_based_distributions->wait(defaultStream);
+         for (auto& block : *blocks)
+            stream_hydro.outer(&block, defaultStream);
+      };
+      std::function< void() > timeStep;
+      if (timeStepStrategy == "overlap")
+      {
+         timeStep = std::function< void() >(simpleOverlapTimeStep);
+         WALBERLA_LOG_INFO_ON_ROOT("overlapping timestep")
+      }
+      else
+      {
+         timeStep = std::function< void() >(normalTimeStep);
+         WALBERLA_LOG_INFO_ON_ROOT("normal timestep with no overlapping")
+      }
+
+      timeLoop->add() << BeforeFunction(timeStep) << Sweep([](IBlock*) {}, "time step");
+
+      cuda::fieldCpy< GPUField, PhaseField_T >(blocks, phase_field_gpu, phase_field);
+
+      WALBERLA_LOG_INFO_ON_ROOT("Initialisation of the PDFs")
+      for (auto& block : *blocks)
+      {
+         init_h(&block);
+         init_g(&block);
+      }
+      WALBERLA_LOG_INFO_ON_ROOT("Initialisation of the PDFs done")
+      uint_t dbWriteFrequency = parameters.getParameter< uint_t >("dbWriteFrequency", 10000000);
+
+      timeLoop->addFuncAfterTimeStep(
+         [&]() {
+            if (timeLoop->getCurrentTimeStep() % dbWriteFrequency == 0)
+            {
+               cuda::fieldCpy< PhaseField_T, GPUField >(blocks, phase_field, phase_field_gpu);
+               python_coupling::PythonCallback callback("at_end_of_time_step");
+               if (callback.isCallable())
+               {
+                  callback.data().exposePtr("blocks", blocks);
+                  callback.data().exposePtr("time_loop", timeLoop);
+                  callback();
+               }
+            }
+         },
+         "Python callback");
+
+      // remaining time logger
+      timeLoop->addFuncAfterTimeStep(
+         timing::RemainingTimeLogger(timeLoop->getNrOfTimeSteps(), remainingTimeLoggerFrequency),
+         "remaining time logger");
+
+      uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
+      if (vtkWriteFrequency > 0)
+      {
+         const std::string path = "vtk_out";
+         auto vtkOutput         = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, path,
+                                                         "simulation_step", false, true, true, false, 0);
+         vtkOutput->addBeforeFunction([&]() {
+            cuda::fieldCpy< PhaseField_T, GPUField >(blocks, phase_field, phase_field_gpu);
+            // cuda::fieldCpy<VelocityField_T, GPUField>( blocks, vel_field, vel_field_gpu );
+         });
+         auto phaseWriter = make_shared< field::VTKWriter< PhaseField_T > >(phase_field, "PhaseField");
+         vtkOutput->addCellDataWriter(phaseWriter);
+
+         // auto normlasWriter = make_shared<field::VTKWriter<NormalsField_T>>(normals, "Normals");
+         // vtkOutput->addCellDataWriter(normlasWriter);
+
+         // auto flagWriter = make_shared<field::VTKWriter<FlagField_T>>(flagFieldID, "flag");
+         // vtkOutput->addCellDataWriter(flagWriter);
+
+         // auto velWriter = make_shared<field::VTKWriter<VelocityField_T>>(vel_field, "Velocity");
+         // vtkOutput->addCellDataWriter(velWriter);
+
+         timeLoop->addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
+      }
+
+      WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps")
+      WcTimer simTimer;
+      simTimer.start();
+      timeLoop->run();
+
+      cudaDeviceSynchronize();
+
+      simTimer.end();
+      WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
+      auto time            = simTimer.last();
+      auto nrOfCells       = real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
+      auto mlupsPerProcess = nrOfCells * real_c(timesteps) / time * 1e-6;
+      WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process: " << mlupsPerProcess)
+      WALBERLA_LOG_RESULT_ON_ROOT("Time per time step: " << time / real_c(timesteps) << " s")
+   }
+
+   return EXIT_SUCCESS;
+}
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_RTI_3D.py b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_RTI_3D.py
new file mode 100755
index 0000000000000000000000000000000000000000..a7c5ed1c49e98279a3f77a296fe1716ad38232c1
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_RTI_3D.py
@@ -0,0 +1,134 @@
+import waLBerla as wlb
+import waLBerla.tools.sqlitedb as wlbSqlite
+from waLBerla.core_extension import makeSlice
+
+import numpy as np
+from lbmpy.phasefield_allen_cahn.parameter_calculation import calculate_parameters_rti
+
+
+class Scenario:
+    def __init__(self):
+        # output frequencies
+        self.vtkWriteFrequency = 1000
+        self.dbWriteFrequency = 200
+
+        # simulation parameters
+        self.timesteps = 27001
+
+        self.cells = (64, 256, 64)
+        self.blocks = (1, 1, 1)
+        self.periodic = (1, 0, 1)
+        self.size = (self.cells[0] * self.blocks[0],
+                     self.cells[1] * self.blocks[1],
+                     self.cells[2] * self.blocks[2])
+
+        # physical parameters
+        self.density_heavy = 1.0
+        self.reference_time = 6000
+        self.parameters = calculate_parameters_rti(reference_length=128,
+                                                   reference_time=self.reference_time,
+                                                   density_heavy=self.density_heavy,
+                                                   capillary_number=9.1,
+                                                   reynolds_number=128,
+                                                   atwood_number=1.0,
+                                                   peclet_number=140,
+                                                   density_ratio=3,
+                                                   viscosity_ratio=3)
+
+        # everything else
+        self.dbFile = "RTI.db"
+
+        self.scenario = 2  # 1 rising bubble, 2 RTI
+
+        self.counter = 0
+        self.yPositions = []
+
+    @wlb.member_callback
+    def config(self):
+        return {
+            'DomainSetup': {
+                'blocks': self.blocks,
+                'cellsPerBlock': self.cells,
+                'periodic': self.periodic,
+            },
+            'Parameters': {
+                'timesteps': self.timesteps,
+                'vtkWriteFrequency': self.vtkWriteFrequency,
+                'dbWriteFrequency': self.dbWriteFrequency,
+                'useGui': 0,
+                'remainingTimeLoggerFrequency': 10.0,
+                'scenario': self.scenario,
+            },
+            'PhysicalParameters': {
+                'density_liquid': self.density_heavy,
+                'density_gas': self.parameters["density_light"],
+                'surface_tension': self.parameters["surface_tension"],
+                'mobility': self.parameters.get("mobility", 0.1),
+                'gravitational_acceleration': self.parameters["gravitational_acceleration"],
+                'relaxation_time_liquid': self.parameters.get("relaxation_time_heavy"),
+                'relaxation_time_gas': self.parameters.get("relaxation_time_light"),
+            },
+            'Boundaries': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
+                ]
+            },
+        }
+
+    @wlb.member_callback
+    def at_end_of_time_step(self, blocks, time_loop):
+        t = time_loop.getCurrentTimeStep()
+        ny = self.size[1]
+        l0 = self.size[0]
+        if t % self.dbWriteFrequency == 0:
+            location_of_spike = -100
+            location_of_bubble = -100
+            location_of_saddle = -100
+            mass = -100
+            spike_data = wlb.field.gather(blocks, 'phase', makeSlice[self.size[0] // 2, :, self.size[2] // 2])
+            if spike_data:
+                spike_field = np.asarray(spike_data.buffer()).squeeze()
+                location_of_spike = (np.argmax(spike_field > 0.5) - ny//2)/l0
+
+            bubble_data = wlb.field.gather(blocks, 'phase', makeSlice[0, :, 0])
+            if bubble_data:
+                bubble_field = np.asarray(bubble_data.buffer()).squeeze()
+                location_of_bubble = (np.argmax(bubble_field > 0.5) - ny//2)/l0
+
+            saddle_data = wlb.field.gather(blocks, 'phase', makeSlice[0, :, self.size[2] // 2])
+            if saddle_data:
+                saddle_field = np.asarray(saddle_data.buffer()).squeeze()
+                location_of_saddle = (np.argmax(saddle_field > 0.5) - ny//2)/l0
+
+            phase = wlb.field.gather(blocks, 'phase', makeSlice[:, :, :])
+            if phase:
+                phase_field = np.asarray(phase.buffer()).squeeze()
+                mass = np.sum(phase_field)
+
+            self.write_result_to_database(t, location_of_spike, location_of_bubble, location_of_saddle, mass)
+
+    def write_result_to_database(self, t, spike, bubble, saddle, mass):
+        """Writes the simulation result stored in the global variables shapeFactors and angles to
+               an sqlite3 database, and resets the global variables."""
+        result = {'waLBerlaVersion': wlb.build_info.version,
+                  'xCells': self.size[0],
+                  'yCells': self.size[1],
+                  'zCells': self.size[2],
+                  'spike': spike,
+                  'bubble': bubble,
+                  'saddle': saddle,
+                  'mass': mass,
+                  'timesteps': t,
+                  'normalized_time': t / self.reference_time,
+                  'processes': wlb.mpi.numProcesses(),
+                  }
+        try:
+            wlbSqlite.checkAndUpdateSchema(result, 'interface_location', self.dbFile)
+            wlbSqlite.storeSingle(result, 'interface_location', self.dbFile)
+        except Exception as e:
+            wlb.log_warning("Failed to store run in database " + str(e) + "\n" + str(result))
+
+
+scenarios = wlb.ScenarioManager()
+scenarios.add(Scenario())
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_codegen.py b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_codegen.py
new file mode 100644
index 0000000000000000000000000000000000000000..b98348ab48e74f5198e3a7ceb065f23556e6b593
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_codegen.py
@@ -0,0 +1,203 @@
+from pystencils import fields, TypedSymbol
+from pystencils.simp import sympy_cse
+from pystencils import AssignmentCollection
+
+from lbmpy.boundaries import NoSlip
+from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule
+from lbmpy.stencils import get_stencil
+
+from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_from_kernel
+from lbmpy_walberla import generate_boundary
+
+from lbmpy.phasefield_allen_cahn.kernel_equations import initializer_kernel_phase_field_lb, \
+     initializer_kernel_hydro_lb, interface_tracking_force, \
+     hydrodynamic_force, get_collision_assignments_hydro
+
+from lbmpy.phasefield_allen_cahn.force_model import MultiphaseForceModel
+
+import numpy as np
+import sympy as sp
+
+stencil_phase = get_stencil("D3Q19")
+stencil_hydro = get_stencil("D3Q27")
+q_phase = len(stencil_phase)
+q_hydro = len(stencil_hydro)
+
+assert (len(stencil_phase[0]) == len(stencil_hydro[0]))
+dimensions = len(stencil_hydro[0])
+
+########################
+# PARAMETER DEFINITION #
+########################
+
+density_liquid = sp.Symbol("rho_H")
+density_gas = sp.Symbol("rho_L")
+
+surface_tension = sp.Symbol("sigma")
+mobility = sp.Symbol("mobility")
+
+gravitational_acceleration = sp.Symbol("gravity")
+
+relaxation_time_liquid = sp.Symbol("tau_H")
+relaxation_time_gas = sp.Symbol("tau_L")
+
+# phase-field parameter
+drho3 = (density_liquid - density_gas) / 3
+# interface thickness
+W = 5
+# coefficients related to surface tension
+beta = 12.0 * (surface_tension / W)
+kappa = 1.5 * surface_tension * W
+
+########################
+# FIELDS #
+########################
+
+# velocity field
+u = fields(f"vel_field({dimensions}): [{dimensions}D]", layout='fzyx')
+# phase-field
+C = fields(f"phase_field: [{dimensions}D]", layout='fzyx')
+
+flag = fields(f"flag_field: uint8[{dimensions}D]", layout='fzyx')
+# phase-field distribution functions
+h = fields(f"lb_phase_field({q_phase}): [{dimensions}D]", layout='fzyx')
+h_tmp = fields(f"lb_phase_field_tmp({q_phase}): [{dimensions}D]", layout='fzyx')
+# hydrodynamic distribution functions
+g = fields(f"lb_velocity_field({q_hydro}): [{dimensions}D]", layout='fzyx')
+g_tmp = fields(f"lb_velocity_field_tmp({q_hydro}): [{dimensions}D]", layout='fzyx')
+
+########################################
+# RELAXATION RATES AND EXTERNAL FORCES #
+########################################
+
+# relaxation rate for interface tracking LB step
+relaxation_rate_allen_cahn = 1.0 / (0.5 + (3.0 * mobility))
+# calculate the relaxation rate for hydrodynamic LB step
+density = density_gas + C.center * (density_liquid - density_gas)
+# force acting on all phases of the model
+body_force = np.array([0, gravitational_acceleration * density, 0])
+# calculation of the relaxation time via viscosities
+# viscosity = viscosity_gas * viscosity_liquid + C.center\
+#             * (density_liquid*viscosity_liquid - viscosity_liquid*viscosity_gas)
+# relaxation_time = 3 * viscosity / density + 0.5
+
+relaxation_time = 0.5 + relaxation_time_gas + C.center * (relaxation_time_liquid - relaxation_time_gas)
+# calculate the relaxation time if the phase-field has values over one
+# relaxation_rate = 1.0 / relaxation_time
+relaxation_rate = sp.Symbol("s8")
+relaxation_rate_cutoff = sp.Piecewise((1 / (0.5 + relaxation_time_liquid), C.center > 0.999),   # True value
+                                      (1 / relaxation_time, True))                              # Else value
+
+###############
+# LBM METHODS #
+###############
+
+method_phase = create_lb_method(stencil=stencil_phase, method='srt',
+                                relaxation_rate=relaxation_rate_allen_cahn, compressible=True)
+
+method_hydro = create_lb_method(stencil=stencil_hydro, method="mrt", weighted=True,
+                                relaxation_rates=[relaxation_rate, 1, 1, 1, 1, 1])
+
+# create the kernels for the initialization of the g and h field
+h_updates = initializer_kernel_phase_field_lb(h, C, u, method_phase, W, fd_stencil=get_stencil("D3Q27"))
+g_updates = initializer_kernel_hydro_lb(g, u, method_hydro)
+
+force_h = [f / 3 for f in interface_tracking_force(C, stencil_phase, W, fd_stencil=get_stencil("D3Q27"))]
+force_model_h = MultiphaseForceModel(force=force_h)
+
+force_g = hydrodynamic_force(g, C, method_hydro, relaxation_time, density_liquid, density_gas, kappa, beta, body_force,
+                             fd_stencil=get_stencil("D3Q27"))
+
+####################
+# LBM UPDATE RULES #
+####################
+
+h_tmp_symbol_list = [h_tmp.center(i) for i, _ in enumerate(stencil_phase)]
+sum_h = np.sum(h_tmp_symbol_list[:])
+
+method_phase.set_force_model(force_model_h)
+
+phase_field_LB_step = create_lb_update_rule(lb_method=method_phase,
+                                            velocity_input=u,
+                                            compressible=True,
+                                            optimization={"symbolic_field": h,
+                                                          "symbolic_temporary_field": h_tmp},
+                                            kernel_type='stream_pull_collide')
+
+phase_field_LB_step.set_main_assignments_from_dict({**phase_field_LB_step.main_assignments_dict, **{C.center: sum_h}})
+phase_field_LB_step = AssignmentCollection(main_assignments=phase_field_LB_step.main_assignments,
+                                           subexpressions=phase_field_LB_step.subexpressions)
+phase_field_LB_step = sympy_cse(phase_field_LB_step)
+
+# ---------------------------------------------------------------------------------------------------------
+hydro_LB_step = get_collision_assignments_hydro(lb_method=method_hydro,
+                                                density=density,
+                                                velocity_input=u,
+                                                force=force_g,
+                                                optimization={"symbolic_field": g,
+                                                              "symbolic_temporary_field": g_tmp},
+                                                kernel_type='collide_only')
+
+hydro_LB_step.set_sub_expressions_from_dict({**{relaxation_rate: relaxation_rate_cutoff},
+                                             **hydro_LB_step.subexpressions_dict})
+
+stream_hydro = create_lb_update_rule(stencil=stencil_hydro,
+                                     optimization={"symbolic_field": g,
+                                                   "symbolic_temporary_field": g_tmp},
+                                     kernel_type='stream_pull_only')
+
+###################
+# GENERATE SWEEPS #
+###################
+
+vp = [('int32_t', 'cudaBlockSize0'),
+      ('int32_t', 'cudaBlockSize1')]
+
+sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32),
+                    TypedSymbol("cudaBlockSize1", np.int32),
+                    1)
+sweep_params = {'block_size': sweep_block_size}
+
+info_header = f"""
+#include "stencil/D3Q{q_phase}.h"\nusing Stencil_phase_T = walberla::stencil::D3Q{q_phase}; 
+#include "stencil/D3Q{q_hydro}.h"\nusing Stencil_hydro_T = walberla::stencil::D3Q{q_hydro};
+"""
+
+with CodeGeneration() as ctx:
+    generate_sweep(ctx, 'initialize_phase_field_distributions', h_updates, target='gpu')
+    generate_sweep(ctx, 'initialize_velocity_based_distributions', g_updates, target='gpu')
+
+    generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
+                   field_swaps=[(h, h_tmp)],
+                   inner_outer_split=True,
+                   target='gpu',
+                   gpu_indexing_params=sweep_params,
+                   varying_parameters=vp)
+    generate_boundary(ctx, 'phase_field_LB_NoSlip', NoSlip(), method_phase, target='gpu')
+
+    generate_sweep(ctx, 'hydro_LB_step', hydro_LB_step,
+                   inner_outer_split=True,
+                   target='gpu',
+                   gpu_indexing_params=sweep_params,
+                   varying_parameters=vp)
+    generate_boundary(ctx, 'hydro_LB_NoSlip', NoSlip(), method_hydro, target='gpu')
+
+    generate_sweep(ctx, 'stream_hydro', stream_hydro,
+                   field_swaps=[(g, g_tmp)],
+                   inner_outer_split=True,
+                   target='gpu',
+                   gpu_indexing_params=sweep_params,
+                   varying_parameters=vp)
+
+    # communication
+
+    generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field_distributions',
+                                   phase_field_LB_step.main_assignments, target='gpu')
+    generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field',
+                                   hydro_LB_step.all_assignments, target='gpu', kind='pull')
+    generate_pack_info_from_kernel(ctx, 'PackInfo_velocity_based_distributions',
+                                   stream_hydro.all_assignments, target='gpu', kind='pull')
+
+    ctx.write_file("GenDefines.h", info_header)
+
+print("finished code generation successfully")
diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_rising_bubble.py b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_rising_bubble.py
new file mode 100755
index 0000000000000000000000000000000000000000..4294fc7b82519002f765a34f1a7eb81b8b42718b
--- /dev/null
+++ b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_rising_bubble.py
@@ -0,0 +1,138 @@
+import waLBerla as wlb
+import waLBerla.tools.sqlitedb as wlbSqlite
+import numpy as np
+
+from waLBerla.core_extension import makeSlice
+from lbmpy.phasefield_allen_cahn.parameter_calculation import calculate_dimensionless_rising_bubble
+
+
+class Scenario:
+    def __init__(self):
+        # output frequencies
+        self.vtkWriteFrequency = 1000
+        self.dbWriteFrequency = 200
+
+        # simulation parameters
+        self.timesteps = 10000
+        self.cells = (64, 128, 64)
+        self.blocks = (1, 1, 1)
+        self.periodic = (0, 0, 0)
+        self.size = (self.cells[0] * self.blocks[0],
+                     self.cells[1] * self.blocks[1],
+                     self.cells[2] * self.blocks[2])
+
+        self.overlappingWidth = (8, 1, 1)
+        self.timeStepStrategy = 'normal'
+
+        # physical parameters
+        self.bubbleRadius = 16
+        self.bubbleMidPoint = (self.size[0] / 2, self.bubbleRadius + 10, self.size[2] / 2)
+
+        # physical parameters
+        self.density_heavy = 1.0
+        self.reference_time = 18000
+        self.parameters = calculate_dimensionless_rising_bubble(reference_time=self.reference_time,
+                                                                density_heavy=self.density_heavy,
+                                                                bubble_radius=self.bubbleRadius,
+                                                                bond_number=1,
+                                                                reynolds_number=40,
+                                                                density_ratio=1000,
+                                                                viscosity_ratio=100)
+
+        # everything else
+        self.dbFile = "risingBubble3D.db"
+
+        self.scenario = 1  # 1 rising bubble, 2 RTI
+
+        self.counter = 0
+        self.yPositions = []
+
+    @wlb.member_callback
+    def config(self):
+        return {
+            'DomainSetup': {
+                'blocks': self.blocks,
+                'cellsPerBlock': self.cells,
+                'periodic': self.periodic,
+            },
+            'Parameters': {
+                'timesteps': self.timesteps,
+                'vtkWriteFrequency': self.vtkWriteFrequency,
+                'dbWriteFrequency': self.dbWriteFrequency,
+                'timeStepStrategy': self.timeStepStrategy,
+                'overlappingWidth': self.overlappingWidth,
+                'remainingTimeLoggerFrequency': 10.0,
+                'scenario': self.scenario,
+            },
+            'PhysicalParameters': {
+                'density_liquid': self.density_heavy,
+                'density_gas': self.parameters["density_light"],
+                'surface_tension': self.parameters["surface_tension"],
+                'mobility': self.parameters.get("mobility", 0.1),
+                'gravitational_acceleration': self.parameters["gravitational_acceleration"],
+                'relaxation_time_liquid': self.parameters.get("relaxation_time_heavy"),
+                'relaxation_time_gas': self.parameters.get("relaxation_time_light"),
+            },
+            'Boundaries': {
+                'Border': [
+                    {'direction': 'N', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'W', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'E', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'T', 'walldistance': -1, 'flag': 'NoSlip'},
+                    {'direction': 'B', 'walldistance': -1, 'flag': 'NoSlip'},
+                ]
+            },
+            'Bubble': {
+                'bubbleMidPoint': self.bubbleMidPoint,
+                'bubbleRadius': self.bubbleRadius,
+                'bubble': True
+            },
+        }
+
+    @wlb.member_callback
+    def at_end_of_time_step(self, blocks, time_loop):
+        t = time_loop.getCurrentTimeStep()
+        if t % self.dbWriteFrequency == 0:
+            wlb_field = wlb.field.gather(blocks, 'phase', makeSlice[:, :, self.size[2] // 2])
+            if wlb_field:
+                phase_field = np.asarray(wlb_field.buffer()).squeeze()
+
+                location_of_gas = np.where(phase_field < 0.5)
+                cov = np.cov(location_of_gas)
+                eig = np.linalg.eig(cov)
+                axis_of_the_bubble = np.sqrt(eig[0])
+
+                center_of_mass = np.mean(location_of_gas, axis=1)
+                self.yPositions.append(center_of_mass[1])
+                if len(self.yPositions) > 1:
+                    speed = self.yPositions[-1] - self.yPositions[-2]
+                    self.write_result_to_database(t, speed, axis_of_the_bubble, center_of_mass)
+
+                self.counter += 1
+
+    def write_result_to_database(self, t, speed, axis_of_the_bubble, center_of_mass):
+        """Writes the simulation result stored in the global variables shapeFactors and angles to
+               an sqlite3 database, and resets the global variables."""
+        result = {'waLBerlaVersion': wlb.build_info.version,
+                  'xCells': self.size[0],
+                  'yCells': self.size[1],
+                  'zCells': self.size[2],
+                  'bubbleDiameter': self.bubbleRadius * 2.0,
+                  'bubble_axis_x': axis_of_the_bubble[0],
+                  'bubble_axis_y': axis_of_the_bubble[1],
+                  'center_of_mass_x': center_of_mass[0],
+                  'center_of_mass_y': center_of_mass[1],
+                  'rising_speed': speed,
+                  'timesteps': t,
+                  'processes': wlb.mpi.numProcesses(),
+                  }
+        try:
+            wlbSqlite.checkAndUpdateSchema(result, 'data', self.dbFile)
+            wlbSqlite.storeSingle(result, 'data', self.dbFile)
+        except Exception as e:
+            wlb.log_warning("Failed to store run in database " + str(e) + "\n" + str(result))
+
+
+scenarios = wlb.ScenarioManager()
+scenarios.add(Scenario())
diff --git a/python/pystencils_walberla/templates/CpuPackInfo.tmpl.cpp b/python/pystencils_walberla/templates/CpuPackInfo.tmpl.cpp
index 24fd293700989b45229d48108aea6036e2dc913a..41dd20ed21af9652606d9215faf8351dcd3118b7 100644
--- a/python/pystencils_walberla/templates/CpuPackInfo.tmpl.cpp
+++ b/python/pystencils_walberla/templates/CpuPackInfo.tmpl.cpp
@@ -3,6 +3,14 @@
 #include "core/DataTypes.h"
 #include "{{class_name}}.h"
 
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wfloat-equal"
+#   pragma GCC diagnostic ignored "-Wshadow"
+#   pragma GCC diagnostic ignored "-Wconversion"
+#   pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
 {% for header in headers %}
 #include {{header}}
 {% endfor %}
diff --git a/python/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp b/python/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp
index 366a29596d22d4155a190a9d9960a4566c32e334..6aec87cacbf3e1689ad07659f50e2b7d9c8206b1 100644
--- a/python/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp
+++ b/python/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp
@@ -128,6 +128,7 @@ void {{class_name}}::outer( IBlock * block{%if target is equalto 'gpu'%} , cudaS
     {% else %}
     for( auto & ci: layers_ )
     {
+        {{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(8) }}
         {{kernel|generate_call(cell_interval='ci')|indent(8)}}
     }
     {% endif %}