diff --git a/apps/benchmarks/FluidParticleCoupling/DragForceSphere.cpp b/apps/benchmarks/FluidParticleCoupling/DragForceSphere.cpp
index a28651957a527509cc3575172614ff2242a6c869..37921bf9cf99629576040bca5c106fd9eaf84711 100644
--- a/apps/benchmarks/FluidParticleCoupling/DragForceSphere.cpp
+++ b/apps/benchmarks/FluidParticleCoupling/DragForceSphere.cpp
@@ -79,6 +79,11 @@
 
 #ifdef WALBERLA_BUILD_WITH_CODEGEN
 #include "GeneratedLBMWithForce.h"
+
+#define USE_TRTLIKE
+//#define USE_D3Q27TRTLIKE
+//#define USE_CUMULANTTRT
+//#define USE_CUMULANT
 #endif
 
 namespace drag_force_sphere_mem
@@ -529,17 +534,27 @@ int main( int argc, char **argv )
    real_t omegaBulk = (useSRT) ? lambda_e : lbm_mesapd_coupling::omegaBulkFromOmega(omega, bulkViscRateFactor);
 
    // add omega bulk field
-   BlockDataID omegaBulkFieldID = field::addToStorage<ScalarField_T>( blocks, "omega bulk field", omegaBulk, field::fzyx, uint_t(0) );
+   BlockDataID omegaBulkFieldID = field::addToStorage<ScalarField_T>( blocks, "omega bulk field", omegaBulk, field::fzyx);
 
    // create the lattice model
 #ifdef WALBERLA_BUILD_WITH_CODEGEN
+
+#if defined(USE_TRTLIKE) || defined(USE_D3Q27TRTLIKE)
    WALBERLA_LOG_INFO_ON_ROOT("Using generated TRT-like lattice model!");
    WALBERLA_LOG_INFO_ON_ROOT(" - magic number " << magicNumber);
    WALBERLA_LOG_INFO_ON_ROOT(" - omegaBulk = " << omegaBulk << ", bulk visc. = " << lbm_mesapd_coupling::bulkViscosityFromOmegaBulk(omegaBulk) << " (bvrf " << bulkViscRateFactor << ")");
    WALBERLA_LOG_INFO_ON_ROOT(" - lambda_e " << lambda_e << ", lambda_d " << lambda_d << ", omegaBulk " << omegaBulk );
    WALBERLA_LOG_INFO_ON_ROOT(" - use omega bulk adaption = " << useOmegaBulkAdaption << " (adaption layer size = " << adaptionLayerSize << ")");
-
    LatticeModel_T latticeModel = LatticeModel_T(omegaBulkFieldID, setup.extForce, lambda_d, lambda_e);
+#elif defined(USE_CUMULANTTRT)
+   WALBERLA_LOG_INFO_ON_ROOT("Using generated cumulant TRT lattice model!");
+   WALBERLA_LOG_INFO_ON_ROOT(" - magic number " << magicNumber);
+   WALBERLA_LOG_INFO_ON_ROOT(" - lambda_e " << lambda_e << ", lambda_d " << lambda_d );
+   LatticeModel_T latticeModel = LatticeModel_T(setup.extForce, lambda_d, lambda_e);
+#elif defined(USE_CUMULANT)
+   LatticeModel_T latticeModel = LatticeModel_T(setup.extForce, omega);
+#endif
+
 #else
    WALBERLA_LOG_INFO_ON_ROOT("Using waLBerla built-in MRT lattice model and ignoring omega bulk field since not supported!");
    WALBERLA_LOG_INFO_ON_ROOT(" - magic number " << magicNumber);
diff --git a/apps/benchmarks/FluidParticleCoupling/ForcesOnSphereNearPlane.cpp b/apps/benchmarks/FluidParticleCoupling/ForcesOnSphereNearPlane.cpp
index ecf14716ca36f464bf00c482e0a83b8c63c41856..ee3c0f1431a0c73d137c1bd07943729bab44ba8b 100644
--- a/apps/benchmarks/FluidParticleCoupling/ForcesOnSphereNearPlane.cpp
+++ b/apps/benchmarks/FluidParticleCoupling/ForcesOnSphereNearPlane.cpp
@@ -407,6 +407,7 @@ int main( int argc, char **argv )
    bool initializeVelocityProfile = false;
    bool useOmegaBulkAdaption = false;
    real_t adaptionLayerSize = real_t(2);
+   std::string boundaryCondition = "CLI"; // SBB, CLI
 
    real_t relativeChangeConvergenceEps = real_t(1e-5);
    real_t physicalCheckingFrequency = real_t(0.1);
@@ -420,6 +421,7 @@ int main( int argc, char **argv )
       if( std::strcmp( argv[i], "--timesteps" )                 == 0 ) { maximumNonDimTimesteps = real_c( std::atof( argv[++i] ) ); continue; }
       if( std::strcmp( argv[i], "--wallDistance" )              == 0 ) { normalizedWallDistance = real_c( std::atof( argv[++i] ) ); continue; }
       if( std::strcmp( argv[i], "--Re" )                        == 0 ) { ReynoldsNumberShear = real_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--boundaryCondition" )         == 0 ) { boundaryCondition = argv[++i]; continue; }
       if( std::strcmp( argv[i], "--velocity" )                  == 0 ) { wallVelocity = real_c( std::atof( argv[++i] ) ); continue; }
       if( std::strcmp( argv[i], "--xOffset" )                   == 0 ) { xOffsetOfSpherePosition = real_c( std::atof( argv[++i] ) ); continue; }
       if( std::strcmp( argv[i], "--yOffset" )                   == 0 ) { yOffsetOfSpherePosition = real_c( std::atof( argv[++i] ) ); continue; }
@@ -438,6 +440,7 @@ int main( int argc, char **argv )
    WALBERLA_CHECK_GREATER_EQUAL(normalizedWallDistance, real_t(0.5));
    WALBERLA_CHECK_GREATER_EQUAL(ReynoldsNumberShear, real_t(0));
    WALBERLA_CHECK_GREATER_EQUAL(diameter, real_t(0));
+   WALBERLA_CHECK(boundaryCondition == "SBB" || boundaryCondition == "CLI");
 
    //////////////////////////
    // NUMERICAL PARAMETERS //
@@ -484,6 +487,7 @@ int main( int argc, char **argv )
    WALBERLA_LOG_INFO_ON_ROOT(" - use omega bulk adaption = " << useOmegaBulkAdaption << " (adaption layer size = " << adaptionLayerSize << ")");
    WALBERLA_LOG_INFO_ON_ROOT(" - sphere diameter = " << diameter << ", position = " << initialPosition << " ( xOffset = " << xOffsetOfSpherePosition << ", yOffset = " << yOffsetOfSpherePosition << " )");
    WALBERLA_LOG_INFO_ON_ROOT(" - base folder VTK = " << baseFolderVTK << ", base folder logging = " << baseFolderLogging );
+   WALBERLA_LOG_INFO_ON_ROOT(" - boundary condition = " << boundaryCondition );
 
    ///////////////////////////
    // BLOCK STRUCTURE SETUP //
@@ -562,7 +566,7 @@ int main( int argc, char **argv )
    ////////////////////////
 
    // add omega bulk field
-   BlockDataID omegaBulkFieldID = field::addToStorage<ScalarField_T>( blocks, "omega bulk field", omegaBulk, field::fzyx, uint_t(0) );
+   BlockDataID omegaBulkFieldID = field::addToStorage<ScalarField_T>( blocks, "omega bulk field", omegaBulk, field::fzyx);
 
    // create the lattice model
    real_t lambda_e = lbm::collision_model::TRT::lambda_e( omega );
@@ -594,11 +598,21 @@ int main( int argc, char **argv )
    lbm_mesapd_coupling::MovingParticleMappingKernel<BoundaryHandling_T> movingParticleMappingKernel(blocks, boundaryHandlingID, particleFieldID);
    lbm_mesapd_coupling::AverageHydrodynamicForceTorqueKernel averageHydrodynamicForceTorque;
 
-   // map sphere into the LBM simulation
-   ps->forEachParticle(false, lbm_mesapd_coupling::RegularParticlesSelector(), *accessor, movingParticleMappingKernel, *accessor, CLI_Flag);
+   if(boundaryCondition == "CLI")
+   {
+      // map sphere into the LBM simulation
+      ps->forEachParticle(false, lbm_mesapd_coupling::RegularParticlesSelector(), *accessor, movingParticleMappingKernel, *accessor, CLI_Flag);
+      // map planes
+      ps->forEachParticle(false, lbm_mesapd_coupling::GlobalParticlesSelector(), *accessor, movingParticleMappingKernel, *accessor, CLI_Flag);
+   }
+   else
+   {
+      // map sphere into the LBM simulation
+      ps->forEachParticle(false, lbm_mesapd_coupling::RegularParticlesSelector(), *accessor, movingParticleMappingKernel, *accessor, SBB_Flag);
+      // map planes
+      ps->forEachParticle(false, lbm_mesapd_coupling::GlobalParticlesSelector(), *accessor, movingParticleMappingKernel, *accessor, SBB_Flag);
+   }
 
-   // map planes
-   ps->forEachParticle(false, lbm_mesapd_coupling::GlobalParticlesSelector(), *accessor, movingParticleMappingKernel, *accessor, CLI_Flag);
 
    // initialize Couette velocity profile in whole domain
    if(initializeVelocityProfile)
@@ -661,13 +675,13 @@ int main( int argc, char **argv )
    timeloop.add() << Sweep( makeSharedSweep( lbmSweep ), "cell-wise LB sweep" );
 #endif
 
-
    // add force evaluation and logging
    real_t normalizationFactor = math::pi / real_t(8) * densityFluid * shearRate * shearRate * wallDistance * wallDistance * diameter * diameter ;
    std::string loggingFileName( baseFolderLogging + "/LoggingForcesNearPlane");
    loggingFileName += "_D" + std::to_string(uint_c(diameter));
    loggingFileName += "_Re" + std::to_string(uint_c(ReynoldsNumberShear));
    loggingFileName += "_WD" + std::to_string(uint_c(normalizedWallDistance * real_t(1000)));
+   loggingFileName += "_" + boundaryCondition;
    loggingFileName += "_bvrf" + std::to_string(uint_c(bulkViscRateFactor));
    if(useOmegaBulkAdaption) loggingFileName += "_uOBA" + std::to_string(uint_c(adaptionLayerSize));
    loggingFileName += "_mn" + std::to_string(magicNumber);
@@ -796,6 +810,7 @@ int main( int argc, char **argv )
    resultFileName += "_D" + std::to_string(uint_c(diameter));
    resultFileName += "_Re" + std::to_string(uint_c(ReynoldsNumberShear));
    resultFileName += "_WD" + std::to_string(uint_c(normalizedWallDistance * real_t(1000)));
+   resultFileName += "_" + boundaryCondition;
    resultFileName += "_bvrf" + std::to_string(uint_c(bulkViscRateFactor));
    if(useOmegaBulkAdaption) resultFileName += "_uOBA" + std::to_string(uint_c(adaptionLayerSize));
    resultFileName += "_mn" + std::to_string(magicNumber);
diff --git a/apps/benchmarks/FluidParticleCoupling/GeneratedLBM.py b/apps/benchmarks/FluidParticleCoupling/GeneratedLBM.py
index 7992bcdd92bbbbd99cd4a3ad9a948dbcd8eb7e8d..9772c5ba12edab54835a9a6ccc6dbab9c156443b 100644
--- a/apps/benchmarks/FluidParticleCoupling/GeneratedLBM.py
+++ b/apps/benchmarks/FluidParticleCoupling/GeneratedLBM.py
@@ -1,46 +1,140 @@
 import sympy as sp
+from sympy.core.cache import clear_cache
 import pystencils as ps
 from lbmpy.creationfunctions import create_lb_method_from_existing, create_lb_method
 from lbmpy_walberla import generate_lattice_model
 from pystencils_walberla import CodeGeneration
+from pystencils_walberla import get_vectorize_instruction_set
 
 from lbmpy.creationfunctions import create_lb_collision_rule
 from lbmpy.moments import is_even, get_order, MOMENT_SYMBOLS
 from lbmpy.stencils import get_stencil
 
+from collections import OrderedDict
 
 with CodeGeneration() as ctx:
 
-    omegaVisc = sp.Symbol("omega_visc")
-    omegaBulk = ps.fields("omega_bulk: [3D]", layout='fzyx')
-    omegaMagic = sp.Symbol("omega_magic")
-    stencil = get_stencil("D3Q19", 'walberla')
+    generatedMethod = "TRTlike"
+    #generatedMethod = "D3Q27TRTlike"
+    #generatedMethod = "cumulant"
 
-    x, y, z = MOMENT_SYMBOLS
-    one = sp.Rational(1, 1)
-    sq = x ** 2 + y ** 2 + z ** 2
-    moments = [
-        [one, x, y, z],  # [0, 3, 5, 7]
-        [sq - 1],  # [1]
-        [3 * sq ** 2 - 6 * sq + 1],  # [2]
-        [(3 * sq - 5) * x, (3 * sq - 5) * y, (3 * sq - 5) * z],  # [4, 6, 8]
-        [3 * x ** 2 - sq, y ** 2 - z ** 2, x * y, y * z, x * z],  # [9, 11, 13, 14, 15]
-        [(2 * sq - 3) * (3 * x ** 2 - sq), (2 * sq - 3) * (y ** 2 - z ** 2)],  # [10, 12]
-        [(y ** 2 - z ** 2) * x, (z ** 2 - x ** 2) * y, (x ** 2 - y ** 2) * z]  # [16, 17, 18]
-    ]
+    clear_cache()
 
-    # relaxation rate for first group of moments (1,x,y,z) is set to zero internally
-    relaxation_rates=[omegaBulk.center_vector, omegaBulk.center_vector, omegaMagic, omegaVisc, omegaVisc, omegaMagic]
+    cpu_vectorize_info = {'instruction_set': get_vectorize_instruction_set(ctx)}
 
-    methodWithForce = create_lb_method(stencil=stencil, method='mrt', maxwellian_moments=False,
-                                       nested_moments=moments, relaxation_rates=relaxation_rates)
+    if generatedMethod == "TRTlike":
+        omegaVisc = sp.Symbol("omega_visc")
+        omegaBulk = ps.fields("omega_bulk: [3D]", layout='fzyx')
+        omegaMagic = sp.Symbol("omega_magic")
+        stencil = get_stencil("D3Q19", 'walberla')
 
-    #print(methodWithForce.relaxation_rates)
-    #print(methodWithForce.moment_matrix)
+        x, y, z = MOMENT_SYMBOLS
+        one = sp.Rational(1, 1)
+        sq = x ** 2 + y ** 2 + z ** 2
+        moments = [
+            [one, x, y, z],  # [0, 3, 5, 7]
+            [sq - 1],  # [1]
+            [3 * sq ** 2 - 6 * sq + 1],  # [2]
+            [(3 * sq - 5) * x, (3 * sq - 5) * y, (3 * sq - 5) * z],  # [4, 6, 8]
+            [3 * x ** 2 - sq, y ** 2 - z ** 2, x * y, y * z, x * z],  # [9, 11, 13, 14, 15]
+            [(2 * sq - 3) * (3 * x ** 2 - sq), (2 * sq - 3) * (y ** 2 - z ** 2)],  # [10, 12]
+            [(y ** 2 - z ** 2) * x, (z ** 2 - x ** 2) * y, (x ** 2 - y ** 2) * z]  # [16, 17, 18]
+        ]
 
-    collision_rule = create_lb_collision_rule(lb_method=methodWithForce, optimization={'cse_global': True})
-    generate_lattice_model(ctx, 'GeneratedLBM', collision_rule, field_layout='fzyx')
+        # relaxation rate for first group of moments (1,x,y,z) is set to zero internally
+        relaxation_rates=[omegaBulk.center_vector, omegaBulk.center_vector, omegaMagic, omegaVisc, omegaVisc, omegaMagic]
 
+        methodWithForce = create_lb_method(stencil=stencil, method='mrt', maxwellian_moments=False,
+                                           nested_moments=moments, relaxation_rates=relaxation_rates)
+
+        #print(methodWithForce.relaxation_rates)
+        #print(methodWithForce.moment_matrix)
+        collision_rule = create_lb_collision_rule(lb_method=methodWithForce, optimization={'cse_global': True})
+        generate_lattice_model(ctx, 'GeneratedLBM', collision_rule, field_layout='fzyx', cpu_vectorize_info=cpu_vectorize_info)
+
+ 
+    if generatedMethod == "D3Q27TRTlike":
+
+        omegaVisc = sp.Symbol("omega_visc")
+        omegaBulk = ps.fields("omega_bulk: [3D]", layout='fzyx')
+        omegaMagic = sp.Symbol("omega_magic")
+        stencil = get_stencil("D3Q27", 'walberla')
+
+        relaxation_rates=[omegaVisc, omegaBulk.center_vector, omegaMagic, omegaVisc, omegaMagic, omegaVisc]
+
+        methodWithForce = create_lb_method(stencil=stencil, method='mrt', maxwellian_moments=False,weighted=True, compressible=False,
+                                           relaxation_rates=relaxation_rates)
+
+        collision_rule = create_lb_collision_rule(lb_method=methodWithForce, optimization={'cse_global': True})
+        generate_lattice_model(ctx, 'GeneratedLBM', collision_rule, field_layout='fzyx', cpu_vectorize_info=cpu_vectorize_info)
+
+    if generatedMethod == "cumulant":
+
+        x,y,z = MOMENT_SYMBOLS
+
+        cumulants = [0] * 27
+
+        cumulants[0] = sp.sympify(1) #000
+
+        cumulants[1] = x #100
+        cumulants[2] = y #010
+        cumulants[3] = z #001
+
+        cumulants[4] = x*y #110
+        cumulants[5] = x*z #101
+        cumulants[6] = y*z #011
+
+        cumulants[7] = x**2 - y**2 #200 - 020
+        cumulants[8] = x**2 - z**2 #200 - 002
+        cumulants[9] = x**2 + y**2 + z**2 #200 + 020 + 002
+
+        cumulants[10] = x*y**2 + x*z**2 #120 + 102
+        cumulants[11] = x**2*y + y*z**2 #210 + 012
+        cumulants[12] = x**2*z + y**2*z #201 + 021
+        cumulants[13] = x*y**2 - x*z**2 #120 - 102
+        cumulants[14] = x**2*y - y*z**2 #210 - 012
+        cumulants[15] = x**2*z - y**2*z #201 - 021
+
+        cumulants[16] = x*y*z #111
+
+        cumulants[17] = x**2*y**2 - 2*x**2*z**2 + y**2*z**2 # 220- 2*202 +022
+        cumulants[18] = x**2*y**2 + x**2*z**2 - 2*y**2*z**2 # 220 + 202 - 2*002
+        cumulants[19] = x**2*y**2 + x**2*z**2 + y**2*z**2 # 220 + 202 + 022
+
+        cumulants[20] = x**2*y*z # 211
+        cumulants[21] = x*y**2*z # 121
+        cumulants[22] = x*y*z**2 # 112
+
+        cumulants[23] = x**2*y**2*z # 221
+        cumulants[24] = x**2*y*z**2 # 212
+        cumulants[25] = x*y**2*z**2 # 122
+
+        cumulants[26] = x**2*y**2*z**2 # 222
+
+        def get_relaxation_rate(cumulant, omega):
+            if get_order(cumulant) <= 1:
+                return 0
+            elif get_order(cumulant) == 2 and cumulant != x**2 + y**2 + z**2:
+                return omega
+            else:
+                return 1
+
+        stencil = get_stencil("D3Q27", 'walberla')
+
+        omega = sp.Symbol("omega")
+        rr_dict = OrderedDict((c, get_relaxation_rate(c, omega))
+                              for c in cumulants)
+
+        from lbmpy.methods import create_with_continuous_maxwellian_eq_moments
+        my_method = create_with_continuous_maxwellian_eq_moments(stencil, rr_dict, cumulant=True, compressible=True,)
+
+        collision_rule = create_lb_collision_rule(lb_method=my_method,
+                                                  optimization={"cse_global": True,
+                                                                "cse_pdfs": False})
+
+        print(my_method.relaxation_rates)
+
+        generate_lattice_model(ctx, 'GeneratedLBM', collision_rule, field_layout='fzyx', cpu_vectorize_info=cpu_vectorize_info)
 
 
 
diff --git a/apps/benchmarks/FluidParticleCoupling/GeneratedLBMWithForce.py b/apps/benchmarks/FluidParticleCoupling/GeneratedLBMWithForce.py
index 9162d1cfeb55b6f850292ea3ce7fa6edac6c5aef..6f720252539ee4be59565902042128239b94611f 100644
--- a/apps/benchmarks/FluidParticleCoupling/GeneratedLBMWithForce.py
+++ b/apps/benchmarks/FluidParticleCoupling/GeneratedLBMWithForce.py
@@ -1,45 +1,215 @@
 import sympy as sp
+from sympy.core.cache import clear_cache
 import pystencils as ps
-from lbmpy.creationfunctions import create_lb_method_from_existing, create_lb_method
+from lbmpy.creationfunctions import create_lb_method_from_existing, create_lb_ast, create_lb_method
 from lbmpy_walberla import generate_lattice_model
 from pystencils_walberla import CodeGeneration
+from pystencils_walberla import get_vectorize_instruction_set
 
 from lbmpy.creationfunctions import create_lb_collision_rule
-from lbmpy.moments import is_even, get_order, MOMENT_SYMBOLS
+from lbmpy.moments import MOMENT_SYMBOLS, is_even, get_order
 from lbmpy.stencils import get_stencil
 from lbmpy.forcemodels import Luo
 
+from collections import OrderedDict
+
 with CodeGeneration() as ctx:
 
     forcing=(sp.symbols("fx"),0,0)
-    omegaVisc = sp.Symbol("omega_visc")
-    omegaBulk = ps.fields("omega_bulk: [3D]", layout='fzyx')
-    omegaMagic = sp.Symbol("omega_magic")
-    stencil = get_stencil("D3Q19", 'walberla')
-
     forcemodel=Luo(forcing)
 
-    x, y, z = MOMENT_SYMBOLS
-    one = sp.Rational(1, 1)
-    sq = x ** 2 + y ** 2 + z ** 2
-    moments = [
-        [one, x, y, z],  # [0, 3, 5, 7]
-        [sq - 1],  # [1]
-        [3 * sq ** 2 - 6 * sq + 1],  # [2]
-        [(3 * sq - 5) * x, (3 * sq - 5) * y, (3 * sq - 5) * z],  # [4, 6, 8]
-        [3 * x ** 2 - sq, y ** 2 - z ** 2, x * y, y * z, x * z],  # [9, 11, 13, 14, 15]
-        [(2 * sq - 3) * (3 * x ** 2 - sq), (2 * sq - 3) * (y ** 2 - z ** 2)],  # [10, 12]
-        [(y ** 2 - z ** 2) * x, (z ** 2 - x ** 2) * y, (x ** 2 - y ** 2) * z]  # [16, 17, 18]
-    ]
-
-    # relaxation rate for first group of moments (1,x,y,z) is set to zero internally
-    relaxation_rates=[omegaBulk.center_vector, omegaBulk.center_vector, omegaMagic, omegaVisc, omegaVisc, omegaMagic]
-
-    methodWithForce = create_lb_method(stencil=stencil, method='mrt', maxwellian_moments=False,
-                                       force_model=forcemodel, nested_moments=moments, relaxation_rates=relaxation_rates)
-
-    #print(methodWithForce.relaxation_rates)
-    #print(methodWithForce.moment_matrix)
-
-    collision_rule = create_lb_collision_rule(lb_method=methodWithForce, optimization={'cse_global': True})
-    generate_lattice_model(ctx, 'GeneratedLBMWithForce', collision_rule, field_layout='fzyx')
+    generatedMethod = "TRTlike"
+    #generatedMethod = "D3Q27TRTlike"
+    #generatedMethod = "cumulant"
+    #generatedMethod = "cumulantTRT"
+
+    print("Generating " + generatedMethod + " LBM method")
+
+    clear_cache()
+
+    cpu_vectorize_info = {'instruction_set': get_vectorize_instruction_set(ctx)}
+
+    if generatedMethod == "TRTlike":
+        omegaVisc = sp.Symbol("omega_visc")
+        omegaBulk = ps.fields("omega_bulk: [3D]", layout='fzyx')
+        omegaMagic = sp.Symbol("omega_magic")
+        stencil = get_stencil("D3Q19", 'walberla')
+
+        x, y, z = MOMENT_SYMBOLS
+        one = sp.Rational(1, 1)
+        sq = x ** 2 + y ** 2 + z ** 2
+        moments = [
+            [one, x, y, z],  # [0, 3, 5, 7]
+            [sq - 1],  # [1]
+            [3 * sq ** 2 - 6 * sq + 1],  # [2]
+            [(3 * sq - 5) * x, (3 * sq - 5) * y, (3 * sq - 5) * z],  # [4, 6, 8]
+            [3 * x ** 2 - sq, y ** 2 - z ** 2, x * y, y * z, x * z],  # [9, 11, 13, 14, 15]
+            [(2 * sq - 3) * (3 * x ** 2 - sq), (2 * sq - 3) * (y ** 2 - z ** 2)],  # [10, 12]
+            [(y ** 2 - z ** 2) * x, (z ** 2 - x ** 2) * y, (x ** 2 - y ** 2) * z]  # [16, 17, 18]
+        ]
+
+        # relaxation rate for first group of moments (1,x,y,z) is set to zero internally
+        relaxation_rates=[omegaBulk.center_vector, omegaBulk.center_vector, omegaMagic, omegaVisc, omegaVisc, omegaMagic]
+
+        methodWithForce = create_lb_method(stencil=stencil, method='mrt', maxwellian_moments=False,
+                                           force_model=forcemodel, nested_moments=moments, relaxation_rates=relaxation_rates)
+
+        #print(methodWithForce.relaxation_rates)
+        #print(methodWithForce.moment_matrix)
+        collision_rule = create_lb_collision_rule(lb_method=methodWithForce, optimization={'cse_global': True})
+        generate_lattice_model(ctx, 'GeneratedLBMWithForce', collision_rule, field_layout='fzyx', cpu_vectorize_info=cpu_vectorize_info)
+ 
+    if generatedMethod == "D3Q27TRTlike":
+
+        omegaVisc = sp.Symbol("omega_visc")
+        omegaBulk = ps.fields("omega_bulk: [3D]", layout='fzyx')
+        omegaMagic = sp.Symbol("omega_magic")
+        stencil = get_stencil("D3Q27", 'walberla')
+
+        relaxation_rates=[omegaVisc, omegaBulk.center_vector, omegaMagic, omegaVisc, omegaMagic, omegaVisc]
+
+        methodWithForce = create_lb_method(stencil=stencil, method='mrt', maxwellian_moments=False,weighted=True, compressible=False,
+                                           force_model=forcemodel, relaxation_rates=relaxation_rates)
+
+        collision_rule = create_lb_collision_rule(lb_method=methodWithForce, optimization={'cse_global': True})
+        generate_lattice_model(ctx, 'GeneratedLBMWithForce', collision_rule, field_layout='fzyx', cpu_vectorize_info=cpu_vectorize_info)
+
+    if generatedMethod == "cumulant":
+
+        x,y,z = MOMENT_SYMBOLS
+
+        cumulants = [0] * 27
+
+        cumulants[0] = sp.sympify(1) #000
+
+        cumulants[1] = x #100
+        cumulants[2] = y #010
+        cumulants[3] = z #001
+
+        cumulants[4] = x*y #110
+        cumulants[5] = x*z #101
+        cumulants[6] = y*z #011
+
+        cumulants[7] = x**2 - y**2 #200 - 020
+        cumulants[8] = x**2 - z**2 #200 - 002
+        cumulants[9] = x**2 + y**2 + z**2 #200 + 020 + 002
+
+        cumulants[10] = x*y**2 + x*z**2 #120 + 102
+        cumulants[11] = x**2*y + y*z**2 #210 + 012
+        cumulants[12] = x**2*z + y**2*z #201 + 021
+        cumulants[13] = x*y**2 - x*z**2 #120 - 102
+        cumulants[14] = x**2*y - y*z**2 #210 - 012
+        cumulants[15] = x**2*z - y**2*z #201 - 021
+
+        cumulants[16] = x*y*z #111
+
+        cumulants[17] = x**2*y**2 - 2*x**2*z**2 + y**2*z**2 # 220- 2*202 +022
+        cumulants[18] = x**2*y**2 + x**2*z**2 - 2*y**2*z**2 # 220 + 202 - 2*002
+        cumulants[19] = x**2*y**2 + x**2*z**2 + y**2*z**2 # 220 + 202 + 022
+
+        cumulants[20] = x**2*y*z # 211
+        cumulants[21] = x*y**2*z # 121
+        cumulants[22] = x*y*z**2 # 112
+
+        cumulants[23] = x**2*y**2*z # 221
+        cumulants[24] = x**2*y*z**2 # 212
+        cumulants[25] = x*y**2*z**2 # 122
+
+        cumulants[26] = x**2*y**2*z**2 # 222
+
+        def get_relaxation_rate(cumulant, omega):
+            if get_order(cumulant) <= 1:
+                return 0
+            elif get_order(cumulant) == 2 and cumulant != x**2 + y**2 + z**2:
+                return omega
+            else:
+                return 1
+
+        stencil = get_stencil("D3Q27", 'walberla')
+
+        omega = sp.Symbol("omega")
+        rr_dict = OrderedDict((c, get_relaxation_rate(c, omega))
+                              for c in cumulants)
+
+        from lbmpy.methods import create_with_continuous_maxwellian_eq_moments
+        my_method = create_with_continuous_maxwellian_eq_moments(stencil, rr_dict, cumulant=True, compressible=True, force_model=forcemodel)
+
+        collision_rule = create_lb_collision_rule(lb_method=my_method,
+                                                  optimization={"cse_global": True,
+                                                                "cse_pdfs": False})
+
+        print(my_method.relaxation_rates)
+
+        generate_lattice_model(ctx, 'GeneratedLBMWithForce', collision_rule, field_layout='fzyx', cpu_vectorize_info=cpu_vectorize_info)
+
+
+    if generatedMethod == "cumulantTRT":
+
+        x,y,z = MOMENT_SYMBOLS
+
+        cumulants = [0] * 27
+
+        cumulants[0] = sp.sympify(1) #000
+
+        cumulants[1] = x #100
+        cumulants[2] = y #010
+        cumulants[3] = z #001
+
+        cumulants[4] = x*y #110
+        cumulants[5] = x*z #101
+        cumulants[6] = y*z #011
+
+        cumulants[7] = x**2 - y**2 #200 - 020
+        cumulants[8] = x**2 - z**2 #200 - 002
+        cumulants[9] = x**2 + y**2 + z**2 #200 + 020 + 002
+
+        cumulants[10] = x*y**2 + x*z**2 #120 + 102
+        cumulants[11] = x**2*y + y*z**2 #210 + 012
+        cumulants[12] = x**2*z + y**2*z #201 + 021
+        cumulants[13] = x*y**2 - x*z**2 #120 - 102
+        cumulants[14] = x**2*y - y*z**2 #210 - 012
+        cumulants[15] = x**2*z - y**2*z #201 - 021
+
+        cumulants[16] = x*y*z #111
+
+        cumulants[17] = x**2*y**2 - 2*x**2*z**2 + y**2*z**2 # 220- 2*202 +022
+        cumulants[18] = x**2*y**2 + x**2*z**2 - 2*y**2*z**2 # 220 + 202 - 2*002
+        cumulants[19] = x**2*y**2 + x**2*z**2 + y**2*z**2 # 220 + 202 + 022
+
+        cumulants[20] = x**2*y*z # 211
+        cumulants[21] = x*y**2*z # 121
+        cumulants[22] = x*y*z**2 # 112
+
+        cumulants[23] = x**2*y**2*z # 221
+        cumulants[24] = x**2*y*z**2 # 212
+        cumulants[25] = x*y**2*z**2 # 122
+
+        cumulants[26] = x**2*y**2*z**2 # 222
+
+        def get_relaxation_rate(cumulant, omegaVisc, omegaMagic):
+            if get_order(cumulant) <= 1:
+                return 0
+            elif is_even(cumulant):
+                return omegaVisc
+            else:
+                return omegaMagic
+
+        stencil = get_stencil("D3Q27", 'walberla')
+
+        omegaVisc = sp.Symbol("omega_visc")
+        omegaMagic = sp.Symbol("omega_magic")
+
+        rr_dict = OrderedDict((c, get_relaxation_rate(c, omegaVisc, omegaMagic))
+                              for c in cumulants)
+
+        from lbmpy.methods import create_with_continuous_maxwellian_eq_moments
+        my_method = create_with_continuous_maxwellian_eq_moments(stencil, rr_dict, cumulant=True, compressible=True, force_model=forcemodel)
+
+        collision_rule = create_lb_collision_rule(lb_method=my_method,
+                                                  optimization={"cse_global": True,
+                                                                "cse_pdfs": False})
+
+        print(my_method.relaxation_rates)
+
+        generate_lattice_model(ctx, 'GeneratedLBMWithForce', collision_rule, field_layout='fzyx', cpu_vectorize_info=cpu_vectorize_info)
+
diff --git a/apps/benchmarks/FluidParticleCoupling/LubricationForceEvaluation.cpp b/apps/benchmarks/FluidParticleCoupling/LubricationForceEvaluation.cpp
index dcb7737fe5c8809802e22a3bd5802fd8321a2369..e17092473fdbc3524286da109ec2b93552c71dd5 100644
--- a/apps/benchmarks/FluidParticleCoupling/LubricationForceEvaluation.cpp
+++ b/apps/benchmarks/FluidParticleCoupling/LubricationForceEvaluation.cpp
@@ -52,6 +52,7 @@
 #include "lbm_mesapd_coupling/mapping/ParticleMapping.h"
 #include "lbm_mesapd_coupling/momentum_exchange_method/MovingParticleMapping.h"
 #include "lbm_mesapd_coupling/momentum_exchange_method/boundary/CurvedLinear.h"
+#include "lbm_mesapd_coupling/momentum_exchange_method/boundary/SimpleBB.h"
 #include "lbm_mesapd_coupling/utility/ParticleSelector.h"
 #include "lbm_mesapd_coupling/utility/ResetHydrodynamicForceTorqueKernel.h"
 #include "lbm_mesapd_coupling/utility/LubricationCorrectionKernel.h"
@@ -115,7 +116,8 @@ const uint_t FieldGhostLayers = 1;
 
 const FlagUID Fluid_Flag( "fluid" );
 const FlagUID FreeSlip_Flag( "free slip" );
-const FlagUID MO_Flag( "moving obstacle" );
+const FlagUID MO_SBB_Flag( "moving obstacle sbb" );
+const FlagUID MO_CLI_Flag( "moving obstacle cli" );
 const FlagUID FormerMO_Flag( "former moving obstacle" );
 
 /////////////////////////////////////
@@ -127,8 +129,9 @@ class MyBoundaryHandling
 public:
 
    using FreeSlip_T = lbm::FreeSlip< LatticeModel_T, FlagField_T>;
-   using MO_T = lbm_mesapd_coupling::CurvedLinear< LatticeModel_T, FlagField_T, ParticleAccessor_T >;
-   using Type = BoundaryHandling< FlagField_T, Stencil_T, FreeSlip_T, MO_T >;
+   using MO_SBB_T = lbm_mesapd_coupling::SimpleBB< LatticeModel_T, FlagField_T, ParticleAccessor_T >;
+   using MO_CLI_T = lbm_mesapd_coupling::CurvedLinear< LatticeModel_T, FlagField_T, ParticleAccessor_T >;
+   using Type = BoundaryHandling< FlagField_T, Stencil_T, FreeSlip_T, MO_SBB_T, MO_CLI_T >;
 
    MyBoundaryHandling( const BlockDataID & flagFieldID, const BlockDataID & pdfFieldID,
                        const BlockDataID & particleFieldID, const shared_ptr<ParticleAccessor_T>& ac) :
@@ -147,7 +150,8 @@ public:
 
       Type * handling = new Type( "moving obstacle boundary handling", flagField, fluid,
                                   FreeSlip_T( "FreeSlip", FreeSlip_Flag, pdfField, flagField, fluid ),
-                                  MO_T( "MO", MO_Flag, pdfField, flagField, particleField, ac_, fluid, *storage, *block ) );
+                                  MO_SBB_T( "SBB", MO_SBB_Flag, pdfField, flagField, particleField, ac_, fluid, *storage, *block ),
+                                  MO_CLI_T( "CLI", MO_CLI_Flag, pdfField, flagField, particleField, ac_, fluid, *storage, *block )  );
 
       const auto freeslip = flagField->getFlag( FreeSlip_Flag );
 
@@ -237,6 +241,7 @@ int main( int argc, char **argv )
    real_t magicNumber = real_t(3)/real_t(16);
    bool useOmegaBulkAdaption = false;
    real_t adaptionLayerSize = real_t(2);
+   bool useSBB = false;
 
    // 1: translation in normal direction -> normal Lubrication force
    // 2: translation in tangential direction -> tangential Lubrication force and torque
@@ -246,19 +251,20 @@ int main( int argc, char **argv )
 
    for( int i = 1; i < argc; ++i )
    {
-      if( std::strcmp( argv[i], "--sphWallTest" )               == 0 ) { sphSphTest = false; continue; }
-      if( std::strcmp( argv[i], "--noLogging" )                 == 0 ) { fileIO = false; continue; }
-      if( std::strcmp( argv[i], "--vtkIOFreq" )                 == 0 ) { vtkIOFreq = uint_c( std::atof( argv[++i] ) ); continue; }
-      if( std::strcmp( argv[i], "--setup" )                 == 0 ) { setup = uint_c( std::atof( argv[++i] ) ); continue; }
-      if( std::strcmp( argv[i], "--baseFolder" )                == 0 ) { baseFolder = argv[++i]; continue; }
-      if( std::strcmp( argv[i], "--diameter" )                  == 0 ) { radius = real_t(0.5)*real_c(std::atof(argv[++i])); continue; }
+      if( std::strcmp( argv[i], "--sphWallTest" )          == 0 ) { sphSphTest = false; continue; }
+      if( std::strcmp( argv[i], "--noLogging" )            == 0 ) { fileIO = false; continue; }
+      if( std::strcmp( argv[i], "--vtkIOFreq" )            == 0 ) { vtkIOFreq = uint_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--setup" )                == 0 ) { setup = uint_c( std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--baseFolder" )           == 0 ) { baseFolder = argv[++i]; continue; }
+      if( std::strcmp( argv[i], "--diameter" )             == 0 ) { radius = real_t(0.5)*real_c(std::atof(argv[++i])); continue; }
       if( std::strcmp( argv[i], "--gapSize" )              == 0 ) { gapSize = real_c(std::atof(argv[++i])); continue; }
-      if( std::strcmp( argv[i], "--bulkViscRateFactor" )        == 0 ) { bulkViscRateFactor = real_c(std::atof( argv[++i] ) ); continue; }
+      if( std::strcmp( argv[i], "--bulkViscRateFactor" )   == 0 ) { bulkViscRateFactor = real_c(std::atof( argv[++i] ) ); continue; }
       if( std::strcmp( argv[i], "--tau" )                  == 0 ) { tau = real_c(std::atof( argv[++i] ) ); continue; }
-      if( std::strcmp( argv[i], "--fileName" )                  == 0 ) { fileNameEnding = argv[++i]; continue; }
-      if( std::strcmp( argv[i], "--magicNumber" )               == 0 ) { magicNumber = real_c(std::atof(argv[++i])); continue; }
-      if( std::strcmp( argv[i], "--useOmegaBulkAdaption" )      == 0 ) { useOmegaBulkAdaption = true; continue; }
-      if( std::strcmp( argv[i], "--adaptionLayerSize" )         == 0 ) { adaptionLayerSize = real_c(std::atof( argv[++i] )); continue; }
+      if( std::strcmp( argv[i], "--fileName" )             == 0 ) { fileNameEnding = argv[++i]; continue; }
+      if( std::strcmp( argv[i], "--magicNumber" )          == 0 ) { magicNumber = real_c(std::atof(argv[++i])); continue; }
+      if( std::strcmp( argv[i], "--useOmegaBulkAdaption" ) == 0 ) { useOmegaBulkAdaption = true; continue; }
+      if( std::strcmp( argv[i], "--adaptionLayerSize" )    == 0 ) { adaptionLayerSize = real_c(std::atof( argv[++i] )); continue; }
+      if( std::strcmp( argv[i], "--useSBB" )               == 0 ) { useSBB = true; continue; }
       WALBERLA_ABORT("Unrecognized command line argument found: " << argv[i]);
    }
 
@@ -315,6 +321,7 @@ int main( int argc, char **argv )
          << " time steps   = " << timesteps << "\n"
          << " fStokes      = " << fStokes << "\n"
          << " setup        = " << setup << "\n"
+         << " useSBB       = " << useSBB << "\n"
          << "-------------------------------------------------------\n"
          << " domainSize = " << xSize << " x " << ySize << " x " << zSize  << "\n"
          << " blocks     = " << xBlocks << " x " << yBlocks << " x " << zBlocks  << "\n"
@@ -374,7 +381,7 @@ int main( int argc, char **argv )
          } else if (setup == 2)
          {
             // only tangential translational velocity
-            p.setLinearVelocity(Vector3<real_t>( real_t(0), velocity, real_t(0)));
+            p.setLinearVelocity(Vector3<real_t>( real_t(0), real_t(0), velocity));
             p.setAngularVelocity(Vector3<real_t>( real_t(0), real_t(0), real_t(0)));
          } else if (setup == 3)
          {
@@ -406,7 +413,7 @@ int main( int argc, char **argv )
          } else if (setup == 2)
          {
             // only tangential translational velocity
-            p.setLinearVelocity(Vector3<real_t>( real_t(0), -velocity, real_t(0)));
+            p.setLinearVelocity(Vector3<real_t>( real_t(0), real_t(0), -velocity));
             p.setAngularVelocity(Vector3<real_t>( real_t(0), real_t(0), real_t(0)));
          } else if (setup == 3)
          {
@@ -493,7 +500,7 @@ int main( int argc, char **argv )
    ////////////////////////
 
    // add omega bulk field
-   BlockDataID omegaBulkFieldID = field::addToStorage<ScalarField_T>( blocks, "omega bulk field", omegaBulk, field::fzyx, uint_t(0) );
+   BlockDataID omegaBulkFieldID = field::addToStorage<ScalarField_T>( blocks, "omega bulk field", omegaBulk, field::fzyx);
 
    // create the lattice model
    real_t lambda_e = lbm::collision_model::TRT::lambda_e( omega );
@@ -546,7 +553,13 @@ int main( int argc, char **argv )
    ///////////////
 
    // map particles into the LBM simulation
-   ps->forEachParticle(false, mesa_pd::kernel::SelectAll(), *accessor, movingParticleMappingKernel, *accessor, MO_Flag);
+   if(useSBB)
+   {
+      ps->forEachParticle(false, mesa_pd::kernel::SelectAll(), *accessor, movingParticleMappingKernel, *accessor, MO_SBB_Flag);
+   } else
+   {
+      ps->forEachParticle(false, mesa_pd::kernel::SelectAll(), *accessor, movingParticleMappingKernel, *accessor, MO_CLI_Flag);
+   }
 
    // create the timeloop
    SweepTimeloop timeloop( blocks->getBlockStorage(), timesteps );
@@ -637,7 +650,7 @@ int main( int argc, char **argv )
    real_t curTorqueNorm = real_t(0);
    real_t oldTorqueNorm = real_t(0);
 
-   real_t convergenceLimit = real_t(1e-4);
+   real_t convergenceLimit = real_t(1e-5);
 
    // time loop
    for (uint_t i = 1; i <= timesteps; ++i )
@@ -747,6 +760,7 @@ int main( int argc, char **argv )
       loggingFileName += "_bvrf" + std::to_string(uint_c(bulkViscRateFactor));
       loggingFileName += "_mn" + std::to_string(float(magicNumber));
       if( useOmegaBulkAdaption ) loggingFileName += "_uOBA" + std::to_string(uint_c(adaptionLayerSize));
+      if( useSBB ) loggingFileName += "_SBB";
       if( !fileNameEnding.empty()) loggingFileName += "_" + fileNameEnding;
       loggingFileName += ".txt";
 
diff --git a/apps/benchmarks/FluidParticleCoupling/ObliqueWetCollision.cpp b/apps/benchmarks/FluidParticleCoupling/ObliqueWetCollision.cpp
index cac87d6c75e0fb971a8cfb5ed26901a333fef5fc..3bb0c57eef6b1138797cef785afe47869abb3f0f 100644
--- a/apps/benchmarks/FluidParticleCoupling/ObliqueWetCollision.cpp
+++ b/apps/benchmarks/FluidParticleCoupling/ObliqueWetCollision.cpp
@@ -815,7 +815,7 @@ int main( int argc, char **argv )
    ////////////////////////
 
    // add omega bulk field
-   BlockDataID omegaBulkFieldID = field::addToStorage<ScalarField_T>( blocks, "omega bulk field", omegaBulk, field::fzyx, uint_t(0) );
+   BlockDataID omegaBulkFieldID = field::addToStorage<ScalarField_T>( blocks, "omega bulk field", omegaBulk, field::fzyx);
 
    // create the lattice model
    real_t lambda_e = lbm::collision_model::TRT::lambda_e( omega );
diff --git a/apps/benchmarks/FluidParticleCoupling/SettlingSphereInBox.cpp b/apps/benchmarks/FluidParticleCoupling/SettlingSphereInBox.cpp
index f1d6ac21e643f97fc1485678217c29551aa05266..3f2f613b252ece2fc72ad5c3df257e3c2f2f5223 100644
--- a/apps/benchmarks/FluidParticleCoupling/SettlingSphereInBox.cpp
+++ b/apps/benchmarks/FluidParticleCoupling/SettlingSphereInBox.cpp
@@ -186,9 +186,10 @@ class SpherePropertyLogger
 public:
    SpherePropertyLogger( const shared_ptr< ParticleAccessor_T > & ac, walberla::id_t sphereUid,
          const std::string & fileName, bool fileIO,
-         real_t dx_SI, real_t dt_SI, real_t diameter, real_t gravitationalForceMag) :
+         real_t dx_SI, real_t dt_SI, real_t diameter, real_t gravitationalForceMag, real_t uref) :
       ac_( ac ), sphereUid_( sphereUid ), fileName_( fileName ), fileIO_(fileIO),
-      dx_SI_( dx_SI ), dt_SI_( dt_SI ), diameter_( diameter ), gravitationalForceMag_( gravitationalForceMag ),
+      dx_SI_( dx_SI ), dt_SI_( dt_SI ), diameter_( diameter ),
+      gravitationalForceMag_( gravitationalForceMag ), uref_(uref), tref_(diameter / uref),
       position_( real_t(0) ), maxVelocity_( real_t(0) )
    {
       if ( fileIO_ )
@@ -197,7 +198,7 @@ public:
          {
             std::ofstream file;
             file.open( fileName_.c_str() );
-            file << "#\t t\t posZ\t gapZ/D\t velZ\t velZ_SI\t fZ\t fZ/fGravi\n";
+            file << "#\t t\t t/tref\t posZ\t gapZ/D\t velZ\t velZ_SI\t velZ/uref\t fZ\t fZ/fGravi\n";
             file.close();
          }
       }
@@ -264,10 +265,9 @@ private:
          auto velocity_SI = velocity * dx_SI_ / dt_SI_;
          auto normalizedHydForce = hydForce / gravitationalForceMag_;
 
-
-         file << timestep << "\t" << real_c(timestep) * dt_SI_ << "\t"
+         file << timestep << "\t" << real_c(timestep) * dt_SI_ << "\t" << real_c(timestep) / tref_
               << "\t" << position[2] << "\t" << scaledPosition[2] - real_t(0.5)
-              << "\t" << velocity[2] << "\t" << velocity_SI[2]
+              << "\t" << velocity[2] << "\t" << velocity_SI[2] << "\t" << velocity[2] / uref_
               << "\t" << hydForce[2] << "\t" << normalizedHydForce[2]
               << "\n";
          file.close();
@@ -278,7 +278,7 @@ private:
    const walberla::id_t sphereUid_;
    std::string fileName_;
    bool fileIO_;
-   real_t dx_SI_, dt_SI_, diameter_, gravitationalForceMag_;
+   real_t dx_SI_, dt_SI_, diameter_, gravitationalForceMag_, uref_, tref_;
 
    real_t position_;
    real_t maxVelocity_;
@@ -385,6 +385,8 @@ int main( int argc, char **argv )
    real_t adaptionLayerSize = real_t(2);
    bool useLubricationCorrection = true;
 
+   bool useGalileoParameterization = false;
+
    for( int i = 1; i < argc; ++i )
    {
       if( std::strcmp( argv[i], "--shortrun" )             == 0 ) { shortrun = true; continue; }
@@ -406,6 +408,7 @@ int main( int argc, char **argv )
       if( std::strcmp( argv[i], "--useOmegaBulkAdaption" ) == 0 ) { useOmegaBulkAdaption = true; continue; }
       if( std::strcmp( argv[i], "--adaptionLayerSize" )    == 0 ) { adaptionLayerSize = real_c(std::atof( argv[++i] )); continue; }
       if( std::strcmp( argv[i], "--noLubricationCorrection" ) == 0 ) { useLubricationCorrection = false; continue; }
+      if( std::strcmp( argv[i], "--useGalileoParameterization" ) == 0 ) { useGalileoParameterization = true; continue; }
       WALBERLA_ABORT("Unrecognized command line argument found: " << argv[i]);
    }
 
@@ -433,6 +436,9 @@ int main( int argc, char **argv )
    real_t densityFluid_SI;
    real_t dynamicViscosityFluid_SI;
    real_t expectedSettlingVelocity_SI;
+
+   // expected velocity given as uMax in experiments of ten Cate (Table 2, E1-E4), with uInfty from Table 1
+   // > slightly different Re than in ten Cate's Table 1
    switch( fluidType )
    {
       case 1:
@@ -494,8 +500,9 @@ int main( int argc, char **argv )
    const real_t diameter = diameter_SI / dx_SI;
    const real_t sphereVolume = math::pi / real_t(6) * diameter * diameter * diameter;
 
-   //const real_t dt_SI = characteristicVelocity / ug_SI * dx_SI; // this uses Ga for parameterization
-   const real_t dt_SI = characteristicVelocity / expectedSettlingVelocity_SI * dx_SI; // this uses Re for parameterization (only possible since settling velocity is known)
+
+   const real_t dt_SI = (useGalileoParameterization) ? characteristicVelocity / ug_SI * dx_SI : // this uses Ga for parameterization, where ug is the characteristic velocity
+                                                       characteristicVelocity / expectedSettlingVelocity_SI * dx_SI; // this uses Re for parameterization (only possible since settling velocity is known)
 
    const real_t viscosity =  kinematicViscosityFluid_SI * dt_SI / ( dx_SI * dx_SI );
    const real_t omega = lbm::collision_model::omegaFromViscosity(viscosity);
@@ -602,7 +609,7 @@ int main( int argc, char **argv )
    ////////////////////////
 
    // add omega bulk field
-   BlockDataID omegaBulkFieldID = field::addToStorage<ScalarField_T>( blocks, "omega bulk field", omegaBulk, field::fzyx, uint_t(0) );
+   BlockDataID omegaBulkFieldID = field::addToStorage<ScalarField_T>( blocks, "omega bulk field", omegaBulk, field::fzyx);
 
    // create the lattice model
    real_t lambda_e = lbm::collision_model::TRT::lambda_e( omega );
@@ -779,17 +786,19 @@ int main( int argc, char **argv )
    // evaluation functionality
    std::string loggingFileName( baseFolder + "/LoggingSettlingSphere_");
    loggingFileName += std::to_string(fluidType);
+   loggingFileName += "_res" + std::to_string(numberOfCellsInHorizontalDirection);
    loggingFileName += "_recon" + reconstructorType;
    loggingFileName += "_bvrf" + std::to_string(uint_c(bulkViscRateFactor));
    loggingFileName += "_mn" + std::to_string(float(magicNumber));
    if( useOmegaBulkAdaption ) loggingFileName += "_uOBA" + std::to_string(uint_c(adaptionLayerSize));
+   if( useGalileoParameterization ) loggingFileName += "_Ga";
    if( !fileNameEnding.empty()) loggingFileName += "_" + fileNameEnding;
    loggingFileName += ".txt";
    if( fileIO  )
    {
       WALBERLA_LOG_INFO_ON_ROOT(" - writing logging output to file \"" << loggingFileName << "\"");
    }
-   SpherePropertyLogger<ParticleAccessor_T> logger( accessor, sphereUid, loggingFileName, fileIO, dx_SI, dt_SI, diameter, -gravitationalForce[2] );
+   SpherePropertyLogger<ParticleAccessor_T> logger( accessor, sphereUid, loggingFileName, fileIO, dx_SI, dt_SI, diameter, -gravitationalForce[2], characteristicVelocity );
 
 
    ////////////////////////
@@ -832,7 +841,7 @@ int main( int argc, char **argv )
             syncCall();
          }
 
-         ps->forEachParticle(useOpenMP, sphereSelector, *accessor, addHydrodynamicInteraction, *accessor );
+         ps->forEachParticle(useOpenMP, mesa_pd::kernel::SelectLocal(), *accessor, addHydrodynamicInteraction, *accessor );
          ps->forEachParticle(useOpenMP, mesa_pd::kernel::SelectLocal(), *accessor, addGravitationalForce, *accessor );
 
          // lubrication correction
diff --git a/apps/benchmarks/FluidParticleCoupling/SphereMovingWithPrescribedVelocity.cpp b/apps/benchmarks/FluidParticleCoupling/SphereMovingWithPrescribedVelocity.cpp
index 0e0263ff5042bda995728eac25dfe71ea4efa9b7..9c963fa9dcbc0580af7ce7476d92c0884bbaec27 100644
--- a/apps/benchmarks/FluidParticleCoupling/SphereMovingWithPrescribedVelocity.cpp
+++ b/apps/benchmarks/FluidParticleCoupling/SphereMovingWithPrescribedVelocity.cpp
@@ -655,7 +655,7 @@ int main( int argc, char **argv )
    ////////////////////////
 
    // add omega bulk field
-   BlockDataID omegaBulkFieldID = field::addToStorage<ScalarField_T>( blocks, "omega bulk field", omegaBulk, field::fzyx, uint_t(0) );
+   BlockDataID omegaBulkFieldID = field::addToStorage<ScalarField_T>( blocks, "omega bulk field", omegaBulk, field::fzyx );
 
    // create the lattice model
    real_t lambda_e = lbm::collision_model::TRT::lambda_e( omega );
diff --git a/apps/benchmarks/FluidParticleCoupling/SphereWallCollision.cpp b/apps/benchmarks/FluidParticleCoupling/SphereWallCollision.cpp
index cd84f67e4d0ad40ba6813cfc6716b195d0475558..1ce306d76a368929dce44e0e52fb73e7753bab7f 100644
--- a/apps/benchmarks/FluidParticleCoupling/SphereWallCollision.cpp
+++ b/apps/benchmarks/FluidParticleCoupling/SphereWallCollision.cpp
@@ -132,6 +132,10 @@ using ScalarField_T = GhostLayerField< real_t, 1>;
 
 const uint_t FieldGhostLayers = 1;
 
+#define USE_TRTLIKE
+//#define USE_D3Q27TRTLIKE
+//#define USE_CUMULANT
+
 ///////////
 // FLAGS //
 ///////////
@@ -758,10 +762,12 @@ int main( int argc, char **argv )
                                     + "_mn" + std::to_string(magicNumber);
    if(useOmegaBulkAdaption) checkPointFileName +="_omegaBulkAdaption";
    if(applyOutflowBCAtTop) checkPointFileName +="_outflowBC";
-#ifdef USE_TRT_LIKE_LATTICE_MODEL
-   checkPointFileName +="_TRTlike";
-#else
-   checkPointFileName += "_other";
+#if defined(USE_TRTLIKE)
+   checkPointFileName += "_trtlike";
+#elif defined(USE_CUMULANT)
+   checkPointFileName += "_cumulant";
+#elif defined(USE_D3Q27TRTLIKE)
+   checkPointFileName += "_d3q27trtlike";
 #endif
 
    WALBERLA_LOG_INFO_ON_ROOT("Checkpointing:");
@@ -852,10 +858,10 @@ int main( int argc, char **argv )
    {
       WALBERLA_LOG_INFO_ON_ROOT( "Initializing particles from checkpointing file!" );
       particleStorageID = blocks->loadBlockData( checkPointFileName + "_mesa.txt", mesa_pd::domain::createBlockForestDataHandling(ps), "Particle Storage" );
-   } else {
-      particleStorageID = blocks->addBlockData(mesa_pd::domain::createBlockForestDataHandling(ps), "Particle Storage");
       mesa_pd::mpi::ClearNextNeighborSync CNNS;
       CNNS(*accessor);
+   } else {
+      particleStorageID = blocks->addBlockData(mesa_pd::domain::createBlockForestDataHandling(ps), "Particle Storage");
    }
 
    // bounding planes
@@ -901,14 +907,19 @@ int main( int argc, char **argv )
    ////////////////////////
 
    // add omega bulk field
-   BlockDataID omegaBulkFieldID = field::addToStorage<ScalarField_T>( blocks, "omega bulk field", omegaBulk, field::fzyx, uint_t(0) );
+   BlockDataID omegaBulkFieldID = field::addToStorage<ScalarField_T>( blocks, "omega bulk field", omegaBulk, field::fzyx);
 
    // create the lattice model
    real_t lambda_e = lbm::collision_model::TRT::lambda_e( omega );
    real_t lambda_d = lbm::collision_model::TRT::lambda_d( omega, magicNumber );
 #ifdef WALBERLA_BUILD_WITH_CODEGEN
+#ifdef USE_CUMULANT
+   WALBERLA_LOG_INFO_ON_ROOT("Using generated cumulant lattice model!");
+   LatticeModel_T latticeModel = LatticeModel_T(omega);
+#elif defined(USE_TRTLIKE) || defined(USE_D3Q27TRTLIKE)
    WALBERLA_LOG_INFO_ON_ROOT("Using generated TRT-like lattice model!");
    LatticeModel_T latticeModel = LatticeModel_T(omegaBulkFieldID, lambda_d, lambda_e);
+#endif
 #else
    WALBERLA_LOG_INFO_ON_ROOT("Using waLBerla built-in MRT lattice model and ignoring omega bulk field since not supported!");
    LatticeModel_T latticeModel = LatticeModel_T(lbm::collision_model::D3Q19MRT( omegaBulk, omegaBulk, lambda_d, lambda_e, lambda_e, lambda_d ));
@@ -1117,11 +1128,7 @@ int main( int argc, char **argv )
    }else if( reconstructorType == "Ext")
    {
       auto sphereNormalExtrapolationDirectionFinder = make_shared<lbm_mesapd_coupling::SphereNormalExtrapolationDirectionFinder>(blocks);
-#ifdef USE_TRT_LIKE_LATTICE_MODEL
-      auto extrapolationReconstructor = lbm_mesapd_coupling::makeExtrapolationReconstructor<BoundaryHandling_T, lbm_mesapd_coupling::SphereNormalExtrapolationDirectionFinder, true>(blocks, boundaryHandlingID, sphereNormalExtrapolationDirectionFinder, uint_t(3), true);
-#else
       auto extrapolationReconstructor = lbm_mesapd_coupling::makeExtrapolationReconstructor<BoundaryHandling_T, lbm_mesapd_coupling::SphereNormalExtrapolationDirectionFinder, false>(blocks, boundaryHandlingID, sphereNormalExtrapolationDirectionFinder, uint_t(3), true);
-#endif
       timeloopAfterParticles.add() << BeforeFunction( fullPDFCommunicationScheme, "LBM Communication" )
                                    << Sweep( makeSharedSweep(lbm_mesapd_coupling::makePdfReconstructionManager<PdfField_T,BoundaryHandling_T>(blocks, pdfFieldID, boundaryHandlingID, particleFieldID, accessor, FormerMO_Flag, Fluid_Flag, extrapolationReconstructor, conserveMomentum)), "PDF Restore" );
    } else
@@ -1308,9 +1315,8 @@ int main( int argc, char **argv )
          }
          else
          {
-            lbm_mesapd_coupling::RegularParticlesSelector sphereSelector;
             lbm_mesapd_coupling::AddHydrodynamicInteractionKernel addHydrodynamicInteraction;
-            ps->forEachParticle(useOpenMP, sphereSelector, *accessor, addHydrodynamicInteraction, *accessor );
+            ps->forEachParticle(useOpenMP, mesa_pd::kernel::SelectLocal(), *accessor, addHydrodynamicInteraction, *accessor );
          }
 
          hydForce = getForce(sphereUid,*accessor) - lubForce - collisionForce;