diff --git a/apps/benchmarks/UniformGridGPU/CMakeLists.txt b/apps/benchmarks/UniformGridGPU/CMakeLists.txt
index 74d5e5c714ff810f60a454f7062c230635e928c2..9372c0b5275d6057ea188cc5667883a5d8e30d30 100644
--- a/apps/benchmarks/UniformGridGPU/CMakeLists.txt
+++ b/apps/benchmarks/UniformGridGPU/CMakeLists.txt
@@ -1,8 +1,14 @@
 
 waLBerla_link_files_to_builddir( "*.prm" )
 
+waLBerla_python_file_generates(UniformGridGPU.py
+        UniformGridGPU_LatticeModel.cpp UniformGridGPU_LatticeModel.h
+        UniformGridGPU_LbKernel.cu UniformGridGPU_LbKernel.h
+        UniformGridGPU_NoSlip.cu UniformGridGPU_NoSlip.h
+        UniformGridGPU_UBB.cu UniformGridGPU_UBB.h
+        UniformGridGPU_PackInfo.cu UniformGridGPU_PackInfo.h
+        )
+
 waLBerla_add_executable ( NAME UniformGridBenchmarkGPU
-                          FILES UniformGridGPU.cpp UniformGridGPU_LatticeModel.cpp
-                                UniformGridGPU_LbKernel.cu UniformGridGPU_NoSlip.cu UniformGridGPU_UBB.cu
-                                UniformGridGPU_PackInfo.cu
+                          FILES UniformGridGPU.cpp UniformGridGPU.py
                           DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk )
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.gen.py b/apps/benchmarks/UniformGridGPU/UniformGridGPU.gen.py
deleted file mode 100644
index 731897463f30a8b9d8d005ec64f0dce36e954321..0000000000000000000000000000000000000000
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.gen.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import sympy as sp
-from lbmpy_walberla import generate_lattice_model_files
-from lbmpy.creationfunctions import create_lb_update_rule
-from pystencils_walberla.sweep import Sweep
-from lbmpy.boundaries import NoSlip, UBB
-from lbmpy.creationfunctions import create_lb_method
-from lbmpy_walberla.boundary import create_boundary_class
-from pystencils_walberla.cmake_integration import codegen
-
-
-dtype = 'float64'
-
-# LB options
-options = {
-    'method': 'srt',
-    'stencil': 'D3Q19',
-    'relaxation_rate': sp.Symbol("omega"),
-    'field_name': 'pdfs',
-    'compressible': False,
-    'temporary_field_name': 'pdfs_tmp',
-    'optimization': {'cse_global': True,
-                     'cse_pdfs': True,
-                     'double_precision': dtype == 'float64'}
-}
-
-# GPU optimization options
-inner_opt = {'gpu_indexing_params': {'block_size': (128, 1, 1)},  'data_type': dtype}
-outer_opt = {'gpu_indexing_params': {'block_size': (32, 32, 32)}, 'data_type': dtype}
-
-
-def lb_assignments():
-    ur = create_lb_update_rule(**options)
-    return ur.all_assignments
-
-
-def genBoundary():
-    boundary = UBB([0.05, 0, 0], dim=3, name="UniformGridGPU_UBB")
-    return create_boundary_class(boundary, create_lb_method(**options), target='gpu')
-
-
-def genNoSlip():
-    boundary = NoSlip(name='UniformGridGPU_NoSlip')
-    return create_boundary_class(boundary, create_lb_method(**options), target='gpu')
-
-
-generate_lattice_model_files(class_name='UniformGridGPU_LatticeModel', **options)
-
-Sweep.generate_inner_outer_kernel('UniformGridGPU_LbKernel',
-                                  lambda: create_lb_update_rule(**options).all_assignments,
-                                  target='gpu',
-                                  temporary_fields=['pdfs_tmp'],
-                                  field_swaps=[('pdfs', 'pdfs_tmp')],
-                                  optimization=inner_opt,
-                                  outer_optimization=outer_opt)
-
-Sweep.generate_pack_info('UniformGridGPU_PackInfo', lb_assignments, target='gpu')
-
-codegen.register(['UniformGridGPU_UBB.h', 'UniformGridGPU_UBB.cu'], genBoundary)
-codegen.register(['UniformGridGPU_NoSlip.h', 'UniformGridGPU_NoSlip.cu'], genNoSlip)
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4619226eabd2da6ce38bf233d67e91e5bf4ccaa
--- /dev/null
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
@@ -0,0 +1,35 @@
+import sympy as sp
+from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule
+from lbmpy.boundaries import NoSlip, UBB
+from pystencils_walberla import generate_pack_info_from_kernel
+from lbmpy_walberla import generate_lattice_model, generate_boundary
+from pystencils_walberla import CodeGeneration, generate_sweep
+
+
+with CodeGeneration() as ctx:
+    # LB options
+    options = {
+        'method': 'srt',
+        'stencil': 'D3Q19',
+        'relaxation_rate': sp.Symbol("omega"),
+        'field_name': 'pdfs',
+        'compressible': False,
+        'temporary_field_name': 'pdfs_tmp',
+        'optimization': {'cse_global': True,
+                         'cse_pdfs': True,
+                         'gpu_indexing_params': {'block_size': (128, 1, 1)}}
+    }
+    lb_method = create_lb_method(**options)
+    update_rule = create_lb_update_rule(lb_method=lb_method, **options)
+
+    # CPU lattice model - required for macroscopic value computation, VTK output etc.
+    generate_lattice_model(ctx, 'UniformGridGPU_LatticeModel', lb_method)
+
+    # gpu LB sweep & boundaries
+    generate_sweep(ctx, 'UniformGridGPU_LbKernel', update_rule, field_swaps=[('pdfs', 'pdfs_tmp')],
+                   inner_outer_split=True, target='gpu')
+    generate_boundary(ctx, 'UniformGridGPU_NoSlip', NoSlip(), lb_method, target='gpu')
+    generate_boundary(ctx, 'UniformGridGPU_UBB', UBB([0.05, 0, 0]), lb_method, target='gpu')
+
+    # communication
+    generate_pack_info_from_kernel(ctx, 'UniformGridGPU_PackInfo', update_rule, target='gpu')
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPUSmall.prm b/apps/benchmarks/UniformGridGPU/UniformGridGPUSmall.prm
deleted file mode 100644
index c6b8ae931524c474bcc60bce1711e69f011f6a53..0000000000000000000000000000000000000000
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPUSmall.prm
+++ /dev/null
@@ -1,27 +0,0 @@
-
-Parameters 
-{
-	omega           1.8;
-	timesteps       2;
-
-	remainingTimeLoggerFrequency 3;
-	vtkWriteFrequency 0;
-
-	overlapCommunication false;
-	cudaEnabledMPI false;
-}
-
-DomainSetup
-{
-   blocks        <  1,    1, 1 >;
-   cellsPerBlock <  50, 20, 10 >;
-   periodic      <  0,    0, 1 >;  
-}
-
-Boundaries 
-{
-	Border { direction W;    walldistance -1;  flag NoSlip; }
-	Border { direction E;    walldistance -1;  flag NoSlip; }
-    Border { direction S;    walldistance -1;  flag NoSlip; }
-    Border { direction N;    walldistance -1;  flag UBB; }
-}
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.cpp b/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.cpp
deleted file mode 100644
index 20712a5bc6e37826a12e6e8f1fb011a08df43a13..0000000000000000000000000000000000000000
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.cpp
+++ /dev/null
@@ -1,594 +0,0 @@
-//======================================================================================================================
-//
-//  This file is part of waLBerla. waLBerla is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \\author Martin Bauer <martin.bauer@fau.de>
-//======================================================================================================================
-
-#include <cmath>
-
-#include "core/DataTypes.h"
-#include "core/Macros.h"
-#include "lbm/field/PdfField.h"
-#include "lbm/sweeps/Streaming.h"
-#include "UniformGridGPU_LatticeModel.h"
-
-#ifdef _MSC_VER
-#  pragma warning( disable : 4458 )
-#endif
-
-#define FUNC_PREFIX
-
-#ifdef WALBERLA_CXX_COMPILER_IS_GNU
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-variable"
-#pragma GCC diagnostic ignored "-Wunused-parameter"
-#pragma GCC diagnostic ignored "-Wshadow"
-#endif
-
-#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-variable"
-#pragma clang diagnostic ignored "-Wunused-parameter"
-#pragma clang diagnostic ignored "-Wshadow"
-#endif
-
-
-using namespace std;
-
-namespace walberla {
-namespace lbm {
-
-namespace internal_kernel_streamCollide {
-static FUNC_PREFIX void kernel_streamCollide(double * const _data_pdfs, double * _data_pdfs_tmp, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double omega)
-{
-   const double xi_1 = omega*0.166666666666667;
-   const double xi_5 = omega*0.0416666666666667;
-   for (int ctr_2 = 1; ctr_2 < _size_pdfs_2 - 1; ctr_2 += 1)
-   {
-      double * const _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
-      double * const _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      double * const _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
-      double * const _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
-      double * const _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      double * const _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      double * const _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
-      double * const _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      double * const _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
-      double * const _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      double * const _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      double * const _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
-      double * const _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
-      double * const _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
-      double * const _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
-      double * const _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
-      double * const _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      double * const _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      double * const _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2;
-      double * _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      double * _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
-      for (int ctr_1 = 1; ctr_1 < _size_pdfs_1 - 1; ctr_1 += 1)
-      {
-         double * const _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
-         double * const _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
-         double * const _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
-         double * const _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
-         double * const _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
-         double * const _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
-         double * const _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
-         double * const _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
-         double * const _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
-         double * const _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
-         double * const _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
-         double * const _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
-         double * const _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
-         double * const _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
-         double * const _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
-         double * const _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
-         double * const _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
-         double * const _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
-         double * const _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
-         double * _data_pdfs_tmp_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_30;
-         double * _data_pdfs_tmp_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_31;
-         double * _data_pdfs_tmp_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_32;
-         double * _data_pdfs_tmp_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_33;
-         double * _data_pdfs_tmp_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_34;
-         double * _data_pdfs_tmp_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_35;
-         double * _data_pdfs_tmp_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_36;
-         double * _data_pdfs_tmp_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_37;
-         double * _data_pdfs_tmp_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_38;
-         double * _data_pdfs_tmp_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_39;
-         double * _data_pdfs_tmp_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_310;
-         double * _data_pdfs_tmp_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_311;
-         double * _data_pdfs_tmp_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_312;
-         double * _data_pdfs_tmp_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_313;
-         double * _data_pdfs_tmp_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_314;
-         double * _data_pdfs_tmp_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_315;
-         double * _data_pdfs_tmp_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_316;
-         double * _data_pdfs_tmp_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_317;
-         double * _data_pdfs_tmp_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_tmp_20_318;
-         for (int ctr_0 = 1; ctr_0 < _size_pdfs_0 - 1; ctr_0 += 1)
-         {
-            const double xi_18 = -_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double xi_19 = -_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double xi_20 = -_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
-            const double vel0Term = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double vel1Term = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
-            const double vel2Term = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
-            const double rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
-            const double u_0 = vel0Term + xi_18 + xi_19 - _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            const double xi_23 = (u_0*u_0);
-            const double u_1 = vel1Term + xi_19 + xi_20 - _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
-            const double xi_21 = -u_1;
-            const double xi_24 = (u_1*u_1);
-            const double u_2 = vel2Term + xi_18 + xi_20 - _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] - _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            const double xi_22 = -u_2;
-            const double xi_25 = (u_2*u_2);
-            const double u0Mu1 = u_0 + xi_21;
-            const double u0Pu1 = u_0 + u_1;
-            const double u1Pu2 = u_1 + u_2;
-            const double u1Mu2 = u_1 + xi_22;
-            const double u0Mu2 = u_0 + xi_22;
-            const double u0Pu2 = u_0 + u_2;
-            const double f_eq_common = rho - xi_23 - xi_24 - xi_25;
-            const double xi_26 = f_eq_common + rho*-0.666666666666667;
-            const double xi_27 = f_eq_common + rho*-0.333333333333333;
-            const double xi_28 = xi_25 + xi_27;
-            const double xi_29 = xi_23 + xi_27;
-            const double xi_30 = xi_24 + xi_27;
-            const double xi_2 = xi_24*2 + xi_26;
-            const double xi_3 = xi_23*2 + xi_26;
-            const double xi_4 = xi_25*2 + xi_26;
-            const double xi_6 = u0Mu1*2;
-            const double xi_7 = (u0Mu1*u0Mu1)*3 + xi_28;
-            const double xi_8 = u0Pu1*2;
-            const double xi_9 = (u0Pu1*u0Pu1)*3 + xi_28;
-            const double xi_10 = u1Pu2*2;
-            const double xi_11 = (u1Pu2*u1Pu2)*3 + xi_29;
-            const double xi_12 = u1Mu2*2;
-            const double xi_13 = (u1Mu2*u1Mu2)*3 + xi_29;
-            const double xi_14 = u0Mu2*2;
-            const double xi_15 = (u0Mu2*u0Mu2)*3 + xi_30;
-            const double xi_16 = u0Pu2*2;
-            const double xi_17 = (u0Pu2*u0Pu2)*3 + xi_30;
-            _data_pdfs_tmp_20_30_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.333333333333333 - _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_31_10[_stride_pdfs_0*ctr_0] = xi_1*(u_1 + xi_2 - 6*_data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_32_10[_stride_pdfs_0*ctr_0] = xi_1*(xi_2 + xi_21 - 6*_data_pdfs_20_32_11[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_33_10[_stride_pdfs_0*ctr_0] = xi_1*(-u_0 + xi_3 - 6*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]) + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_34_10[_stride_pdfs_0*ctr_0] = xi_1*(u_0 + xi_3 - 6*_data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]) + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_35_10[_stride_pdfs_0*ctr_0] = xi_1*(u_2 + xi_4 - 6*_data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_36_10[_stride_pdfs_0*ctr_0] = xi_1*(xi_22 + xi_4 - 6*_data_pdfs_21_36_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_37_10[_stride_pdfs_0*ctr_0] = xi_5*(-xi_6 + xi_7 - 24*_data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]) + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_38_10[_stride_pdfs_0*ctr_0] = xi_5*(xi_8 + xi_9 - 24*_data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]) + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_39_10[_stride_pdfs_0*ctr_0] = xi_5*(-xi_8 + xi_9 - 24*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]) + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_310_10[_stride_pdfs_0*ctr_0] = xi_5*(xi_6 + xi_7 - 24*_data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]) + _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_311_10[_stride_pdfs_0*ctr_0] = xi_5*(xi_10 + xi_11 - 24*_data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0]) + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_312_10[_stride_pdfs_0*ctr_0] = xi_5*(-xi_12 + xi_13 - 24*_data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0]) + _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_313_10[_stride_pdfs_0*ctr_0] = xi_5*(-xi_14 + xi_15 - 24*_data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]) + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_314_10[_stride_pdfs_0*ctr_0] = xi_5*(xi_16 + xi_17 - 24*_data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]) + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_315_10[_stride_pdfs_0*ctr_0] = xi_5*(xi_12 + xi_13 - 24*_data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0]) + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_316_10[_stride_pdfs_0*ctr_0] = xi_5*(-xi_10 + xi_11 - 24*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0]) + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_317_10[_stride_pdfs_0*ctr_0] = xi_5*(-xi_16 + xi_17 - 24*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]) + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_318_10[_stride_pdfs_0*ctr_0] = xi_5*(xi_14 + xi_15 - 24*_data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]) + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-         }
-      }
-   }
-}
-}
-namespace internal_kernel_collide {
-static FUNC_PREFIX void kernel_collide(double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double omega)
-{
-   const double xi_1 = omega*0.166666666666667;
-   const double xi_5 = omega*0.0416666666666667;
-   for (int ctr_2 = 1; ctr_2 < _size_pdfs_2 - 1; ctr_2 += 1)
-   {
-      double * _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      double * _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      double * _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      double * _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
-      double * _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      double * _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      double * _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      double * _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      double * _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      double * _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      double * _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      double * _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      double * _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      double * _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      double * _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
-      double * _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      double * _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      double * _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
-      double * _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
-      for (int ctr_1 = 1; ctr_1 < _size_pdfs_1 - 1; ctr_1 += 1)
-      {
-         double * _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
-         double * _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
-         double * _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
-         double * _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
-         double * _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
-         double * _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
-         double * _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
-         double * _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
-         double * _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
-         double * _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
-         double * _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
-         double * _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
-         double * _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
-         double * _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
-         double * _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
-         double * _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
-         double * _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
-         double * _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
-         double * _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
-         for (int ctr_0 = 1; ctr_0 < _size_pdfs_0 - 1; ctr_0 += 1)
-         {
-            const double Dummy_18 = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_19 = _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_20 = _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_21 = _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_22 = _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_23 = _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_24 = _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_25 = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_26 = _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_27 = _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_28 = _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_29 = _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_30 = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_31 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_32 = _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_33 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_34 = _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_35 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
-            const double Dummy_36 = _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0];
-            const double xi_18 = -Dummy_22;
-            const double xi_19 = -Dummy_26;
-            const double xi_20 = -Dummy_24;
-            const double vel0Term = Dummy_20 + Dummy_21 + Dummy_30 + Dummy_31 + Dummy_34;
-            const double vel1Term = Dummy_19 + Dummy_23 + Dummy_25 + Dummy_29;
-            const double vel2Term = Dummy_18 + Dummy_28 + Dummy_32;
-            const double rho = Dummy_22 + Dummy_24 + Dummy_26 + Dummy_27 + Dummy_33 + Dummy_35 + Dummy_36 + vel0Term + vel1Term + vel2Term;
-            const double u_0 = -Dummy_28 - Dummy_29 - Dummy_33 + vel0Term + xi_18 + xi_19;
-            const double xi_23 = (u_0*u_0);
-            const double u_1 = -Dummy_18 - Dummy_27 - Dummy_30 + Dummy_34 + vel1Term + xi_19 + xi_20;
-            const double xi_21 = -u_1;
-            const double xi_24 = (u_1*u_1);
-            const double u_2 = Dummy_20 - Dummy_21 - Dummy_23 + Dummy_25 - Dummy_36 + vel2Term + xi_18 + xi_20;
-            const double xi_22 = -u_2;
-            const double xi_25 = (u_2*u_2);
-            const double u0Mu1 = u_0 + xi_21;
-            const double u0Pu1 = u_0 + u_1;
-            const double u1Pu2 = u_1 + u_2;
-            const double u1Mu2 = u_1 + xi_22;
-            const double u0Mu2 = u_0 + xi_22;
-            const double u0Pu2 = u_0 + u_2;
-            const double f_eq_common = rho - xi_23 - xi_24 - xi_25;
-            const double xi_26 = f_eq_common + rho*-0.666666666666667;
-            const double xi_27 = f_eq_common + rho*-0.333333333333333;
-            const double xi_28 = xi_25 + xi_27;
-            const double xi_29 = xi_23 + xi_27;
-            const double xi_30 = xi_24 + xi_27;
-            const double xi_2 = xi_24*2 + xi_26;
-            const double xi_3 = xi_23*2 + xi_26;
-            const double xi_4 = xi_25*2 + xi_26;
-            const double xi_6 = u0Mu1*2;
-            const double xi_7 = (u0Mu1*u0Mu1)*3 + xi_28;
-            const double xi_8 = u0Pu1*2;
-            const double xi_9 = (u0Pu1*u0Pu1)*3 + xi_28;
-            const double xi_10 = u1Pu2*2;
-            const double xi_11 = (u1Pu2*u1Pu2)*3 + xi_29;
-            const double xi_12 = u1Mu2*2;
-            const double xi_13 = (u1Mu2*u1Mu2)*3 + xi_29;
-            const double xi_14 = u0Mu2*2;
-            const double xi_15 = (u0Mu2*u0Mu2)*3 + xi_30;
-            const double xi_16 = u0Pu2*2;
-            const double xi_17 = (u0Pu2*u0Pu2)*3 + xi_30;
-            _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = Dummy_35 + omega*(-Dummy_35 + f_eq_common*0.333333333333333);
-            _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = Dummy_19 + xi_1*(Dummy_19*-6 + u_1 + xi_2);
-            _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = Dummy_27 + xi_1*(Dummy_27*-6 + xi_2 + xi_21);
-            _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = Dummy_33 + xi_1*(Dummy_33*-6 - u_0 + xi_3);
-            _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = Dummy_31 + xi_1*(Dummy_31*-6 + u_0 + xi_3);
-            _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = Dummy_32 + xi_1*(Dummy_32*-6 + u_2 + xi_4);
-            _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = Dummy_36 + xi_1*(Dummy_36*-6 + xi_22 + xi_4);
-            _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = Dummy_29 + xi_5*(Dummy_29*-24 - xi_6 + xi_7);
-            _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = Dummy_34 + xi_5*(Dummy_34*-24 + xi_8 + xi_9);
-            _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = Dummy_26 + xi_5*(Dummy_26*-24 - xi_8 + xi_9);
-            _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = Dummy_30 + xi_5*(Dummy_30*-24 + xi_6 + xi_7);
-            _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = Dummy_25 + xi_5*(Dummy_25*-24 + xi_10 + xi_11);
-            _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = Dummy_18 + xi_5*(Dummy_18*-24 - xi_12 + xi_13);
-            _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = Dummy_28 + xi_5*(Dummy_28*-24 - xi_14 + xi_15);
-            _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = Dummy_20 + xi_5*(Dummy_20*-24 + xi_16 + xi_17);
-            _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = Dummy_23 + xi_5*(Dummy_23*-24 + xi_12 + xi_13);
-            _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = Dummy_24 + xi_5*(Dummy_24*-24 - xi_10 + xi_11);
-            _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = Dummy_22 + xi_5*(Dummy_22*-24 - xi_16 + xi_17);
-            _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = Dummy_21 + xi_5*(Dummy_21*-24 + xi_14 + xi_15);
-         }
-      }
-   }
-}
-}
-namespace internal_kernel_stream {
-static FUNC_PREFIX void kernel_stream(double * const _data_pdfs, double * _data_pdfs_tmp, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_pdfs_tmp_0, int64_t const _stride_pdfs_tmp_1, int64_t const _stride_pdfs_tmp_2, int64_t const _stride_pdfs_tmp_3)
-{
-   for (int ctr_2 = 1; ctr_2 < _size_pdfs_2 - 1; ctr_2 += 1)
-   {
-      double * _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
-      double * const _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
-      double * _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
-      double * const _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      double * _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
-      double * const _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
-      double * const _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
-      double * const _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
-      double * const _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
-      double * const _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
-      double * const _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
-      double * const _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
-      double * const _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
-      double * const _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
-      double * const _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
-      double * const _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
-      double * const _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
-      double * const _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
-      double * const _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
-      double * const _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
-      double * const _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
-      double * _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
-      double * const _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
-      for (int ctr_1 = 1; ctr_1 < _size_pdfs_1 - 1; ctr_1 += 1)
-      {
-         double * _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
-         double * const _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
-         double * _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
-         double * const _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
-         double * _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
-         double * const _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
-         double * _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
-         double * const _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
-         double * _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
-         double * const _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
-         double * _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
-         double * const _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
-         double * _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
-         double * const _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
-         double * _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
-         double * const _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
-         double * _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
-         double * const _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
-         double * _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
-         double * const _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
-         double * _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
-         double * const _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
-         double * _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
-         double * const _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
-         double * _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
-         double * const _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
-         double * _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
-         double * const _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
-         double * _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
-         double * const _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
-         double * _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
-         double * const _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
-         double * _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
-         double * const _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
-         double * _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
-         double * const _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
-         double * _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
-         double * const _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
-         for (int ctr_0 = 1; ctr_0 < _size_pdfs_0 - 1; ctr_0 += 1)
-         {
-            _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-            _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
-            _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-            _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-         }
-      }
-   }
-}
-}
-
-
-const real_t UniformGridGPU_LatticeModel::w[19] = { 0.333333333333333,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778 };
-const real_t UniformGridGPU_LatticeModel::wInv[19] = { 3.00000000000000,18.0000000000000,18.0000000000000,18.0000000000000,18.0000000000000,18.0000000000000,18.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000 };
-
-void UniformGridGPU_LatticeModel::Sweep::streamCollide( IBlock * block, const uint_t numberOfGhostLayersToInclude )
-{
-    auto pdfs = block->getData< GhostLayerField<double, 19> >(pdfsID);
-    GhostLayerField<double, 19> * pdfs_tmp;
-    // Getting temporary field pdfs_tmp
-    auto it = cache_pdfs_.find( pdfs );
-    if( it != cache_pdfs_.end() )
-    {
-        pdfs_tmp = *it;
-    }
-    else 
-    {
-        pdfs_tmp = pdfs->cloneUninitialized();
-        cache_pdfs_.insert(pdfs_tmp);
-    }
-
-
-    auto & lm = dynamic_cast< lbm::PdfField<UniformGridGPU_LatticeModel> * > (pdfs)->latticeModel();
-    lm.configureBlock(block);
-
-    auto & omega = lm.omega;
-    WALBERLA_ASSERT_GREATER_EQUAL(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -int_c(pdfs->nrOfGhostLayers()));
-    double * const _data_pdfs = pdfs->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, 0, 0, 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
-    double * _data_pdfs_tmp = pdfs_tmp->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, 0, 0, 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_0 = int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_1 = int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_2 = int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
-    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-    const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-    internal_kernel_streamCollide::kernel_streamCollide(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, omega);
-    pdfs->swapDataPointers(pdfs_tmp);
-
-}
-
-void UniformGridGPU_LatticeModel::Sweep::collide( IBlock * block, const uint_t numberOfGhostLayersToInclude )
-{
-   auto pdfs = block->getData< GhostLayerField<double, 19> >(pdfsID);
-
-
-    auto & lm = dynamic_cast< lbm::PdfField<UniformGridGPU_LatticeModel> * > (pdfs)->latticeModel();
-    lm.configureBlock(block);
-
-    auto & omega = lm.omega;
-    WALBERLA_ASSERT_GREATER_EQUAL(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -int_c(pdfs->nrOfGhostLayers()));
-    double * _data_pdfs = pdfs->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, 0, 0, 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_0 = int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_1 = int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_2 = int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
-    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-    const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-    internal_kernel_collide::kernel_collide(_data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, omega);
-}
-
-
-void UniformGridGPU_LatticeModel::Sweep::stream( IBlock * block, const uint_t numberOfGhostLayersToInclude )
-{
-    auto pdfs = block->getData< GhostLayerField<double, 19> >(pdfsID);
-    GhostLayerField<double, 19> * pdfs_tmp;
-    // Getting temporary field pdfs_tmp
-    auto it = cache_pdfs_.find( pdfs );
-    if( it != cache_pdfs_.end() )
-    {
-        pdfs_tmp = *it;
-    }
-    else 
-    {
-        pdfs_tmp = pdfs->cloneUninitialized();
-        cache_pdfs_.insert(pdfs_tmp);
-    }
-
-
-    WALBERLA_ASSERT_GREATER_EQUAL(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -int_c(pdfs->nrOfGhostLayers()));
-    double * const _data_pdfs = pdfs->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, 0, 0, 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
-    double * _data_pdfs_tmp = pdfs_tmp->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, 0, 0, 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_0 = int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_1 = int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_2 = int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
-    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-    const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-    const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
-    const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
-    const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
-    const int64_t _stride_pdfs_tmp_3 = int64_t(pdfs_tmp->fStride());
-    internal_kernel_stream::kernel_stream(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3);
-    
-    pdfs->swapDataPointers(pdfs_tmp);
-
-}
-
-
-} // namespace lbm
-} // namespace walberla
-
-
-
-
-// Buffer Packing
-
-namespace walberla {
-namespace mpi {
-
-mpi::SendBuffer & operator<< (mpi::SendBuffer & buf, const ::walberla::lbm::UniformGridGPU_LatticeModel & lm)
-{
-    buf << lm.currentLevel;
-    return buf;
-}
-
-mpi::RecvBuffer & operator>> (mpi::RecvBuffer & buf, ::walberla::lbm::UniformGridGPU_LatticeModel & lm)
-{
-    buf >> lm.currentLevel;
-    return buf;
-}
-
-
-} // namespace mpi
-} // namespace walberla
-
-#ifdef WALBERLA_CXX_COMPILER_IS_GNU
-#pragma GCC diagnostic pop
-#endif
-
-#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
-#pragma clang diagnostic pop
-#endif
\ No newline at end of file
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.h b/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.h
deleted file mode 100644
index 02a6c7cf869062c38e8ad513dc3ddf69bafb2158..0000000000000000000000000000000000000000
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.h
+++ /dev/null
@@ -1,746 +0,0 @@
-//======================================================================================================================
-//
-//  This file is part of waLBerla. waLBerla is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \\author Martin Bauer <martin.bauer@fau.de>
-//
-//======================================================================================================================
-
-
-#include "core/DataTypes.h"
-#include "core/logging/Logging.h"
-
-#include "field/GhostLayerField.h"
-#include "field/SwapableCompare.h"
-#include "domain_decomposition/BlockDataID.h"
-#include "domain_decomposition/IBlock.h"
-#include "stencil/D3Q19.h"
-
-#include "lbm/lattice_model/EquilibriumDistribution.h"
-#include "lbm/field/Density.h"
-#include "lbm/field/DensityAndMomentumDensity.h"
-#include "lbm/field/DensityAndVelocity.h"
-#include "lbm/field/PressureTensor.h"
-#include "lbm/field/ShearRate.h"
-
-#include <set>
-
-#ifdef __GNUC__
-#define RESTRICT __restrict__
-#elif _MSC_VER
-#define RESTRICT __restrict
-#else
-#define RESTRICT
-#endif
-
-#ifdef WALBERLA_CXX_COMPILER_IS_GNU
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-variable"
-#pragma GCC diagnostic ignored "-Wunused-parameter"
-#endif
-
-#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-variable"
-#pragma clang diagnostic ignored "-Wunused-parameter"
-#endif
-
-
-
-
-// Forward declarations
-namespace walberla{
-namespace lbm {
-   class UniformGridGPU_LatticeModel;
-}}
-namespace walberla {
-namespace mpi {
-    mpi::SendBuffer & operator<< (mpi::SendBuffer & buf, const ::walberla::lbm::UniformGridGPU_LatticeModel & lm);
-    mpi::RecvBuffer & operator>> (mpi::RecvBuffer & buf,       ::walberla::lbm::UniformGridGPU_LatticeModel & lm);
-}}
-
-
-
-
-namespace walberla {
-namespace lbm {
-
-
-/**
-UniformGridGPU_LatticeModel was generated with lbmpy. Do not edit this file directly. Instead modify UniformGridGPU_LatticeModel.py.
-For details see documentation of lbmpy.
-
-Usage:
-    - Create an instance of this lattice model class: the constructor parameters vary depending on the configure
-      lattice model. A model with constant force needs a single force vector, while a model with variable forces needs
-      a force field. All constructor parameters are ordered alphabetically.
-    - Create a PDFField with the lattice model as template argument to store the particle distribution functions.
-      Use the PDFField to get and modify macroscopic values.
-    - The internal class UniformGridGPU_LatticeModel::Sweep is a functor to execute one LB time step.
-      Stream, collide steps can be executed separately, or together in an optimized stream-pull-collide scheme
-
-*/
-class UniformGridGPU_LatticeModel
-{
-
-public:
-    typedef stencil::D3Q19 Stencil;
-    typedef stencil::D3Q19 CommunicationStencil;
-    static const real_t w[19];
-    static const real_t wInv[19];
-
-    static const bool compressible = false;
-    static const int equilibriumAccuracyOrder = 2;
-
-    class Sweep
-    {
-    public:
-        Sweep( BlockDataID _pdfsID ) : pdfsID(_pdfsID) {};
-
-        //void stream       ( IBlock * const block, const uint_t numberOfGhostLayersToInclude = uint_t(0) );
-        void collide      ( IBlock * const block, const uint_t numberOfGhostLayersToInclude = uint_t(0) );
-        void streamCollide( IBlock * const block, const uint_t numberOfGhostLayersToInclude = uint_t(0) );
-        void stream       ( IBlock * const block, const uint_t numberOfGhostLayersToInclude = uint_t(0) );
-
-        void operator() ( IBlock * const block, const uint_t numberOfGhostLayersToInclude = uint_t(0) )
-        {
-            streamCollide( block, numberOfGhostLayersToInclude );
-        }
-
-    private:
-        BlockDataID pdfsID;
-
-        std::set< GhostLayerField<double, 19> *, field::SwapableCompare< GhostLayerField<double, 19> * > > cache_pdfs_;
-    };
-
-    UniformGridGPU_LatticeModel( double omega_ )
-        : omega(omega_), currentLevel(0)
-    {};
-
-    void configure( IBlock & block, StructuredBlockStorage &)  { configureBlock( &block ); }
-
-private:
-    void configureBlock(IBlock * block)
-    {
-        
-
-
-        }
-
-    // Parameters:
-    double omega;
-
-    // Updated by configureBlock:
-    
-
-    uint_t currentLevel;
-
-    // Backend classes can access private members:
-    friend class UniformGridGPU_LatticeModel::Sweep;
-    template<class LM, class Enable> friend class  EquilibriumDistribution;
-    template<class LM, class Enable> friend struct Equilibrium;
-    template<class LM, class Enable> friend struct internal::AdaptVelocityToForce;
-    template<class LM, class Enable> friend struct Density;
-    template<class LM>               friend struct DensityAndVelocity;
-    template<class LM, class Enable> friend struct DensityAndMomentumDensity;
-    template<class LM, class Enable> friend struct MomentumDensity;
-    template<class LM, class It, class Enable> friend struct DensityAndVelocityRange;
-
-    friend mpi::SendBuffer & ::walberla::mpi::operator<< (mpi::SendBuffer & , const UniformGridGPU_LatticeModel & );
-    friend mpi::RecvBuffer & ::walberla::mpi::operator>> (mpi::RecvBuffer & ,       UniformGridGPU_LatticeModel & );
-
-};
-
-
-
-
-//======================================================================================================================
-//
-//  Implementation of macroscopic value backend
-//
-//======================================================================================================================
-
-
-
-template<>
-class EquilibriumDistribution< UniformGridGPU_LatticeModel, void>
-{
-public:
-   typedef typename UniformGridGPU_LatticeModel::Stencil Stencil;
-
-   static real_t get( const stencil::Direction direction,
-                      const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ),
-                      real_t rho = real_t(1.0) )
-   {
-        
-        rho -= real_t(1.0);
-        
-        
-    using namespace stencil;
-    switch( direction ) {
-        case C: return rho*0.333333333333333 - 0.333333333333333*(u[0]*u[0]) - 0.333333333333333*(u[1]*u[1]) - 0.333333333333333*(u[2]*u[2]);
-        case N: return rho*0.0555555555555556 - 0.166666666666667*(u[0]*u[0]) - 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*(u[1]*u[1]) + 0.166666666666667*u[1];
-        case S: return rho*0.0555555555555556 - 0.166666666666667*(u[0]*u[0]) - 0.166666666666667*u[1] - 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*(u[1]*u[1]);
-        case W: return rho*0.0555555555555556 - 0.166666666666667*u[0] - 0.166666666666667*(u[1]*u[1]) - 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*(u[0]*u[0]);
-        case E: return rho*0.0555555555555556 - 0.166666666666667*(u[1]*u[1]) - 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*(u[0]*u[0]) + 0.166666666666667*u[0];
-        case T: return rho*0.0555555555555556 - 0.166666666666667*(u[0]*u[0]) - 0.166666666666667*(u[1]*u[1]) + 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*u[2];
-        case B: return rho*0.0555555555555556 - 0.166666666666667*(u[0]*u[0]) - 0.166666666666667*(u[1]*u[1]) - 0.166666666666667*u[2] + 0.166666666666667*(u[2]*u[2]);
-        case NW: return rho*0.0277777777777778 - 0.0833333333333333*u[0] - 0.25*u[0]*u[1] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*u[1];
-        case NE: return rho*0.0277777777777778 + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*u[0] + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*u[1] + 0.25*u[0]*u[1];
-        case SW: return rho*0.0277777777777778 - 0.0833333333333333*u[0] - 0.0833333333333333*u[1] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[1]*u[1]) + 0.25*u[0]*u[1];
-        case SE: return rho*0.0277777777777778 - 0.0833333333333333*u[1] - 0.25*u[0]*u[1] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*u[0] + 0.0833333333333333*(u[1]*u[1]);
-        case TN: return rho*0.0277777777777778 + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*u[1] + 0.0833333333333333*(u[2]*u[2]) + 0.0833333333333333*u[2] + 0.25*u[1]*u[2];
-        case TS: return rho*0.0277777777777778 - 0.0833333333333333*u[1] - 0.25*u[1]*u[2] + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*(u[2]*u[2]) + 0.0833333333333333*u[2];
-        case TW: return rho*0.0277777777777778 - 0.0833333333333333*u[0] - 0.25*u[0]*u[2] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[2]*u[2]) + 0.0833333333333333*u[2];
-        case TE: return rho*0.0277777777777778 + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*u[0] + 0.0833333333333333*(u[2]*u[2]) + 0.0833333333333333*u[2] + 0.25*u[0]*u[2];
-        case BN: return rho*0.0277777777777778 - 0.0833333333333333*u[2] - 0.25*u[1]*u[2] + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*u[1] + 0.0833333333333333*(u[2]*u[2]);
-        case BS: return rho*0.0277777777777778 - 0.0833333333333333*u[1] - 0.0833333333333333*u[2] + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*(u[2]*u[2]) + 0.25*u[1]*u[2];
-        case BW: return rho*0.0277777777777778 - 0.0833333333333333*u[0] - 0.0833333333333333*u[2] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[2]*u[2]) + 0.25*u[0]*u[2];
-        case BE: return rho*0.0277777777777778 - 0.0833333333333333*u[2] - 0.25*u[0]*u[2] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*u[0] + 0.0833333333333333*(u[2]*u[2]);
-        default:
-            WALBERLA_ABORT("Invalid Direction");
-    }
-    
-   }
-
-   static real_t getSymmetricPart( const stencil::Direction direction,
-                                   const Vector3<real_t> & u = Vector3< real_t >(real_t(0.0)),
-                                   real_t rho = real_t(1.0) )
-   {
-        
-        rho -= real_t(1.0);
-        
-        
-    using namespace stencil;
-    switch( direction ) {
-        case C: return rho*0.333333333333333 - 0.333333333333333*(u[0]*u[0]) - 0.333333333333333*(u[1]*u[1]) - 0.333333333333333*(u[2]*u[2]);
-        case N: return rho*0.0555555555555556 - 0.166666666666667*(u[0]*u[0]) - 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*(u[1]*u[1]);
-        case S: return rho*0.0555555555555556 - 0.166666666666667*(u[0]*u[0]) - 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*(u[1]*u[1]);
-        case W: return rho*0.0555555555555556 - 0.166666666666667*(u[1]*u[1]) - 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*(u[0]*u[0]);
-        case E: return rho*0.0555555555555556 - 0.166666666666667*(u[1]*u[1]) - 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*(u[0]*u[0]);
-        case T: return rho*0.0555555555555556 - 0.166666666666667*(u[0]*u[0]) - 0.166666666666667*(u[1]*u[1]) + 0.166666666666667*(u[2]*u[2]);
-        case B: return rho*0.0555555555555556 - 0.166666666666667*(u[0]*u[0]) - 0.166666666666667*(u[1]*u[1]) + 0.166666666666667*(u[2]*u[2]);
-        case NW: return rho*0.0277777777777778 - 0.25*u[0]*u[1] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[1]*u[1]);
-        case NE: return rho*0.0277777777777778 + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[1]*u[1]) + 0.25*u[0]*u[1];
-        case SW: return rho*0.0277777777777778 + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[1]*u[1]) + 0.25*u[0]*u[1];
-        case SE: return rho*0.0277777777777778 - 0.25*u[0]*u[1] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[1]*u[1]);
-        case TN: return rho*0.0277777777777778 + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*(u[2]*u[2]) + 0.25*u[1]*u[2];
-        case TS: return rho*0.0277777777777778 - 0.25*u[1]*u[2] + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*(u[2]*u[2]);
-        case TW: return rho*0.0277777777777778 - 0.25*u[0]*u[2] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[2]*u[2]);
-        case TE: return rho*0.0277777777777778 + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[2]*u[2]) + 0.25*u[0]*u[2];
-        case BN: return rho*0.0277777777777778 - 0.25*u[1]*u[2] + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*(u[2]*u[2]);
-        case BS: return rho*0.0277777777777778 + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*(u[2]*u[2]) + 0.25*u[1]*u[2];
-        case BW: return rho*0.0277777777777778 + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[2]*u[2]) + 0.25*u[0]*u[2];
-        case BE: return rho*0.0277777777777778 - 0.25*u[0]*u[2] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[2]*u[2]);
-        default:
-            WALBERLA_ABORT("Invalid Direction");
-    }
-    
-   }
-
-   static real_t getAsymmetricPart( const stencil::Direction direction,
-                                    const Vector3< real_t > & u = Vector3<real_t>( real_t(0.0) ),
-                                    real_t rho = real_t(1.0) )
-   {
-        
-        rho -= real_t(1.0);
-        
-        
-    using namespace stencil;
-    switch( direction ) {
-        case C: return 0;
-        case N: return 0.166666666666667*u[1];
-        case S: return -0.166666666666667*u[1];
-        case W: return -0.166666666666667*u[0];
-        case E: return 0.166666666666667*u[0];
-        case T: return 0.166666666666667*u[2];
-        case B: return -0.166666666666667*u[2];
-        case NW: return -0.0833333333333333*u[0] + 0.0833333333333333*u[1];
-        case NE: return 0.0833333333333333*u[0] + 0.0833333333333333*u[1];
-        case SW: return -0.0833333333333333*u[0] - 0.0833333333333333*u[1];
-        case SE: return -0.0833333333333333*u[1] + 0.0833333333333333*u[0];
-        case TN: return 0.0833333333333333*u[1] + 0.0833333333333333*u[2];
-        case TS: return -0.0833333333333333*u[1] + 0.0833333333333333*u[2];
-        case TW: return -0.0833333333333333*u[0] + 0.0833333333333333*u[2];
-        case TE: return 0.0833333333333333*u[0] + 0.0833333333333333*u[2];
-        case BN: return -0.0833333333333333*u[2] + 0.0833333333333333*u[1];
-        case BS: return -0.0833333333333333*u[1] - 0.0833333333333333*u[2];
-        case BW: return -0.0833333333333333*u[0] - 0.0833333333333333*u[2];
-        case BE: return -0.0833333333333333*u[2] + 0.0833333333333333*u[0];
-        default:
-            WALBERLA_ABORT("Invalid Direction");
-    }
-    
-   }
-
-   static std::vector< real_t > get( const Vector3< real_t > & u = Vector3<real_t>( real_t(0.0) ),
-                                     real_t rho = real_t(1.0) )
-   {
-      
-      rho -= real_t(1.0);
-      
-
-      std::vector< real_t > equilibrium( Stencil::Size );
-      for( auto d = Stencil::begin(); d != Stencil::end(); ++d )
-      {
-         equilibrium[d.toIdx()] = get(*d, u, rho);
-      }
-      return equilibrium;
-   }
-};
-
-
-namespace internal {
-
-template<>
-struct AdaptVelocityToForce<UniformGridGPU_LatticeModel, void>
-{
-   template< typename FieldPtrOrIterator >
-   static Vector3<real_t> get( FieldPtrOrIterator & it, const UniformGridGPU_LatticeModel & lm,
-                               const Vector3< real_t > & velocity, const real_t rho )
-   {
-      auto x = it.x();
-      auto y = it.y();
-      auto z = it.z();
-      
-      return velocity;
-      
-   }
-
-   static Vector3<real_t> get( const cell_idx_t x, const cell_idx_t y, const cell_idx_t z, const UniformGridGPU_LatticeModel & lm,
-                               const Vector3< real_t > & velocity, const real_t rho )
-   {
-      
-      return velocity;
-      
-   }
-};
-} // namespace internal
-
-
-
-template<>
-struct Equilibrium< UniformGridGPU_LatticeModel, void >
-{
-
-   template< typename FieldPtrOrIterator >
-   static void set( FieldPtrOrIterator & it,
-                    const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ), real_t rho = real_t(1.0) )
-   {
-        
-        rho -= real_t(1.0);
-        
-
-       it[0] = rho*0.333333333333333 - 0.333333333333333*(u[0]*u[0]) - 0.333333333333333*(u[1]*u[1]) - 0.333333333333333*(u[2]*u[2]);
-       it[1] = rho*0.0555555555555556 - 0.166666666666667*(u[0]*u[0]) - 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*(u[1]*u[1]) + 0.166666666666667*u[1];
-       it[2] = rho*0.0555555555555556 - 0.166666666666667*(u[0]*u[0]) - 0.166666666666667*u[1] - 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*(u[1]*u[1]);
-       it[3] = rho*0.0555555555555556 - 0.166666666666667*u[0] - 0.166666666666667*(u[1]*u[1]) - 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*(u[0]*u[0]);
-       it[4] = rho*0.0555555555555556 - 0.166666666666667*(u[1]*u[1]) - 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*(u[0]*u[0]) + 0.166666666666667*u[0];
-       it[5] = rho*0.0555555555555556 - 0.166666666666667*(u[0]*u[0]) - 0.166666666666667*(u[1]*u[1]) + 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*u[2];
-       it[6] = rho*0.0555555555555556 - 0.166666666666667*(u[0]*u[0]) - 0.166666666666667*(u[1]*u[1]) - 0.166666666666667*u[2] + 0.166666666666667*(u[2]*u[2]);
-       it[7] = rho*0.0277777777777778 - 0.0833333333333333*u[0] - 0.25*u[0]*u[1] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*u[1];
-       it[8] = rho*0.0277777777777778 + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*u[0] + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*u[1] + 0.25*u[0]*u[1];
-       it[9] = rho*0.0277777777777778 - 0.0833333333333333*u[0] - 0.0833333333333333*u[1] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[1]*u[1]) + 0.25*u[0]*u[1];
-       it[10] = rho*0.0277777777777778 - 0.0833333333333333*u[1] - 0.25*u[0]*u[1] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*u[0] + 0.0833333333333333*(u[1]*u[1]);
-       it[11] = rho*0.0277777777777778 + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*u[1] + 0.0833333333333333*(u[2]*u[2]) + 0.0833333333333333*u[2] + 0.25*u[1]*u[2];
-       it[12] = rho*0.0277777777777778 - 0.0833333333333333*u[1] - 0.25*u[1]*u[2] + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*(u[2]*u[2]) + 0.0833333333333333*u[2];
-       it[13] = rho*0.0277777777777778 - 0.0833333333333333*u[0] - 0.25*u[0]*u[2] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[2]*u[2]) + 0.0833333333333333*u[2];
-       it[14] = rho*0.0277777777777778 + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*u[0] + 0.0833333333333333*(u[2]*u[2]) + 0.0833333333333333*u[2] + 0.25*u[0]*u[2];
-       it[15] = rho*0.0277777777777778 - 0.0833333333333333*u[2] - 0.25*u[1]*u[2] + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*u[1] + 0.0833333333333333*(u[2]*u[2]);
-       it[16] = rho*0.0277777777777778 - 0.0833333333333333*u[1] - 0.0833333333333333*u[2] + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*(u[2]*u[2]) + 0.25*u[1]*u[2];
-       it[17] = rho*0.0277777777777778 - 0.0833333333333333*u[0] - 0.0833333333333333*u[2] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[2]*u[2]) + 0.25*u[0]*u[2];
-       it[18] = rho*0.0277777777777778 - 0.0833333333333333*u[2] - 0.25*u[0]*u[2] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*u[0] + 0.0833333333333333*(u[2]*u[2]);
-       }
-
-   template< typename PdfField_T >
-   static void set( PdfField_T & pdf, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z,
-                    const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ), real_t rho = real_t(1.0) )
-   {
-      
-      rho -= real_t(1.0);
-      
-
-      real_t & xyz0 = pdf(x,y,z,0);
-      pdf.getF( &xyz0, 0)= rho*0.333333333333333 - 0.333333333333333*(u[0]*u[0]) - 0.333333333333333*(u[1]*u[1]) - 0.333333333333333*(u[2]*u[2]);
-      pdf.getF( &xyz0, 1)= rho*0.0555555555555556 - 0.166666666666667*(u[0]*u[0]) - 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*(u[1]*u[1]) + 0.166666666666667*u[1];
-      pdf.getF( &xyz0, 2)= rho*0.0555555555555556 - 0.166666666666667*(u[0]*u[0]) - 0.166666666666667*u[1] - 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*(u[1]*u[1]);
-      pdf.getF( &xyz0, 3)= rho*0.0555555555555556 - 0.166666666666667*u[0] - 0.166666666666667*(u[1]*u[1]) - 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*(u[0]*u[0]);
-      pdf.getF( &xyz0, 4)= rho*0.0555555555555556 - 0.166666666666667*(u[1]*u[1]) - 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*(u[0]*u[0]) + 0.166666666666667*u[0];
-      pdf.getF( &xyz0, 5)= rho*0.0555555555555556 - 0.166666666666667*(u[0]*u[0]) - 0.166666666666667*(u[1]*u[1]) + 0.166666666666667*(u[2]*u[2]) + 0.166666666666667*u[2];
-      pdf.getF( &xyz0, 6)= rho*0.0555555555555556 - 0.166666666666667*(u[0]*u[0]) - 0.166666666666667*(u[1]*u[1]) - 0.166666666666667*u[2] + 0.166666666666667*(u[2]*u[2]);
-      pdf.getF( &xyz0, 7)= rho*0.0277777777777778 - 0.0833333333333333*u[0] - 0.25*u[0]*u[1] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*u[1];
-      pdf.getF( &xyz0, 8)= rho*0.0277777777777778 + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*u[0] + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*u[1] + 0.25*u[0]*u[1];
-      pdf.getF( &xyz0, 9)= rho*0.0277777777777778 - 0.0833333333333333*u[0] - 0.0833333333333333*u[1] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[1]*u[1]) + 0.25*u[0]*u[1];
-      pdf.getF( &xyz0, 10)= rho*0.0277777777777778 - 0.0833333333333333*u[1] - 0.25*u[0]*u[1] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*u[0] + 0.0833333333333333*(u[1]*u[1]);
-      pdf.getF( &xyz0, 11)= rho*0.0277777777777778 + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*u[1] + 0.0833333333333333*(u[2]*u[2]) + 0.0833333333333333*u[2] + 0.25*u[1]*u[2];
-      pdf.getF( &xyz0, 12)= rho*0.0277777777777778 - 0.0833333333333333*u[1] - 0.25*u[1]*u[2] + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*(u[2]*u[2]) + 0.0833333333333333*u[2];
-      pdf.getF( &xyz0, 13)= rho*0.0277777777777778 - 0.0833333333333333*u[0] - 0.25*u[0]*u[2] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[2]*u[2]) + 0.0833333333333333*u[2];
-      pdf.getF( &xyz0, 14)= rho*0.0277777777777778 + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*u[0] + 0.0833333333333333*(u[2]*u[2]) + 0.0833333333333333*u[2] + 0.25*u[0]*u[2];
-      pdf.getF( &xyz0, 15)= rho*0.0277777777777778 - 0.0833333333333333*u[2] - 0.25*u[1]*u[2] + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*u[1] + 0.0833333333333333*(u[2]*u[2]);
-      pdf.getF( &xyz0, 16)= rho*0.0277777777777778 - 0.0833333333333333*u[1] - 0.0833333333333333*u[2] + 0.0833333333333333*(u[1]*u[1]) + 0.0833333333333333*(u[2]*u[2]) + 0.25*u[1]*u[2];
-      pdf.getF( &xyz0, 17)= rho*0.0277777777777778 - 0.0833333333333333*u[0] - 0.0833333333333333*u[2] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*(u[2]*u[2]) + 0.25*u[0]*u[2];
-      pdf.getF( &xyz0, 18)= rho*0.0277777777777778 - 0.0833333333333333*u[2] - 0.25*u[0]*u[2] + 0.0833333333333333*(u[0]*u[0]) + 0.0833333333333333*u[0] + 0.0833333333333333*(u[2]*u[2]);
-      }
-};
-
-
-template<>
-struct Density<UniformGridGPU_LatticeModel, void>
-{
-   template< typename FieldPtrOrIterator >
-   static inline real_t get( const UniformGridGPU_LatticeModel & , const FieldPtrOrIterator & it )
-   {
-        const real_t f_0 = it[0];
-        const real_t f_1 = it[1];
-        const real_t f_2 = it[2];
-        const real_t f_3 = it[3];
-        const real_t f_4 = it[4];
-        const real_t f_5 = it[5];
-        const real_t f_6 = it[6];
-        const real_t f_7 = it[7];
-        const real_t f_8 = it[8];
-        const real_t f_9 = it[9];
-        const real_t f_10 = it[10];
-        const real_t f_11 = it[11];
-        const real_t f_12 = it[12];
-        const real_t f_13 = it[13];
-        const real_t f_14 = it[14];
-        const real_t f_15 = it[15];
-        const real_t f_16 = it[16];
-        const real_t f_17 = it[17];
-        const real_t f_18 = it[18];
-        const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
-        const double vel1Term = f_1 + f_11 + f_15 + f_7;
-        const double vel2Term = f_12 + f_13 + f_5;
-        const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term + 1;
-        return rho;
-   }
-
-   template< typename PdfField_T >
-   static inline real_t get( const UniformGridGPU_LatticeModel & ,
-                             const PdfField_T & pdf, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z )
-   {
-        const real_t & xyz0 = pdf(x,y,z,0);
-        const real_t f_0 = pdf.getF( &xyz0, 0);
-        const real_t f_1 = pdf.getF( &xyz0, 1);
-        const real_t f_2 = pdf.getF( &xyz0, 2);
-        const real_t f_3 = pdf.getF( &xyz0, 3);
-        const real_t f_4 = pdf.getF( &xyz0, 4);
-        const real_t f_5 = pdf.getF( &xyz0, 5);
-        const real_t f_6 = pdf.getF( &xyz0, 6);
-        const real_t f_7 = pdf.getF( &xyz0, 7);
-        const real_t f_8 = pdf.getF( &xyz0, 8);
-        const real_t f_9 = pdf.getF( &xyz0, 9);
-        const real_t f_10 = pdf.getF( &xyz0, 10);
-        const real_t f_11 = pdf.getF( &xyz0, 11);
-        const real_t f_12 = pdf.getF( &xyz0, 12);
-        const real_t f_13 = pdf.getF( &xyz0, 13);
-        const real_t f_14 = pdf.getF( &xyz0, 14);
-        const real_t f_15 = pdf.getF( &xyz0, 15);
-        const real_t f_16 = pdf.getF( &xyz0, 16);
-        const real_t f_17 = pdf.getF( &xyz0, 17);
-        const real_t f_18 = pdf.getF( &xyz0, 18);
-        const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
-        const double vel1Term = f_1 + f_11 + f_15 + f_7;
-        const double vel2Term = f_12 + f_13 + f_5;
-        const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term + 1;
-        return rho;
-   }
-};
-
-
-template<>
-struct DensityAndVelocity<UniformGridGPU_LatticeModel>
-{
-    template< typename FieldPtrOrIterator >
-    static void set( FieldPtrOrIterator & it, const UniformGridGPU_LatticeModel & lm,
-                     const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ), const real_t rho_in = real_t(1.0) )
-    {
-        auto x = it.x();
-        auto y = it.y();
-        auto z = it.z();
-
-        const double rho = rho_in - 1;
-        const double u_0 = u[0];
-        const double u_1 = u[1];
-        const double u_2 = u[2];
-        
-
-        Equilibrium<UniformGridGPU_LatticeModel>::set(it, Vector3<real_t>(u_0, u_1, u_2), rho + real_t(1) );
-    }
-
-    template< typename PdfField_T >
-    static void set( PdfField_T & pdf, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z, const UniformGridGPU_LatticeModel & lm,
-                     const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ), const real_t rho_in = real_t(1.0) )
-    {
-        const double rho = rho_in - 1;
-        const double u_0 = u[0];
-        const double u_1 = u[1];
-        const double u_2 = u[2];
-        
-
-        Equilibrium<UniformGridGPU_LatticeModel>::set(pdf, x, y, z, Vector3<real_t>(u_0, u_1, u_2), rho  + real_t(1) );
-    }
-};
-
-
-template<typename FieldIteratorXYZ >
-struct DensityAndVelocityRange<UniformGridGPU_LatticeModel, FieldIteratorXYZ>
-{
-
-   static void set( FieldIteratorXYZ & begin, const FieldIteratorXYZ & end, const UniformGridGPU_LatticeModel & lm,
-                    const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ), const real_t rho_in = real_t(1.0) )
-   {
-        for( auto cellIt = begin; cellIt != end; ++cellIt )
-        {
-            const auto x = cellIt.x();
-            const auto y = cellIt.y();
-            const auto z = cellIt.z();
-            const double rho = rho_in - 1;
-            const double u_0 = u[0];
-            const double u_1 = u[1];
-            const double u_2 = u[2];
-            
-
-            Equilibrium<UniformGridGPU_LatticeModel>::set(cellIt, Vector3<real_t>(u_0, u_1, u_2), rho + real_t(1) );
-        }
-   }
-};
-
-
-
-template<>
-struct DensityAndMomentumDensity<UniformGridGPU_LatticeModel>
-{
-   template< typename FieldPtrOrIterator >
-   static real_t get( Vector3< real_t > & momentumDensity, const UniformGridGPU_LatticeModel & lm,
-                      const FieldPtrOrIterator & it )
-   {
-        const auto x = it.x();
-        const auto y = it.y();
-        const auto z = it.z();
-
-        const real_t f_0 = it[0];
-        const real_t f_1 = it[1];
-        const real_t f_2 = it[2];
-        const real_t f_3 = it[3];
-        const real_t f_4 = it[4];
-        const real_t f_5 = it[5];
-        const real_t f_6 = it[6];
-        const real_t f_7 = it[7];
-        const real_t f_8 = it[8];
-        const real_t f_9 = it[9];
-        const real_t f_10 = it[10];
-        const real_t f_11 = it[11];
-        const real_t f_12 = it[12];
-        const real_t f_13 = it[13];
-        const real_t f_14 = it[14];
-        const real_t f_15 = it[15];
-        const real_t f_16 = it[16];
-        const real_t f_17 = it[17];
-        const real_t f_18 = it[18];
-        const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
-        const double vel1Term = f_1 + f_11 + f_15 + f_7;
-        const double vel2Term = f_12 + f_13 + f_5;
-        const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term + 1;
-        const double md_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
-        const double md_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
-        const double md_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
-        momentumDensity[0] = md_0;
-        momentumDensity[1] = md_1;
-        momentumDensity[2] = md_2;
-        
-        return rho;
-   }
-
-   template< typename PdfField_T >
-   static real_t get( Vector3< real_t > & momentumDensity, const UniformGridGPU_LatticeModel & lm, const PdfField_T & pdf,
-                      const cell_idx_t x, const cell_idx_t y, const cell_idx_t z )
-   {
-        const real_t & xyz0 = pdf(x,y,z,0);
-        const real_t f_0 = pdf.getF( &xyz0, 0);
-        const real_t f_1 = pdf.getF( &xyz0, 1);
-        const real_t f_2 = pdf.getF( &xyz0, 2);
-        const real_t f_3 = pdf.getF( &xyz0, 3);
-        const real_t f_4 = pdf.getF( &xyz0, 4);
-        const real_t f_5 = pdf.getF( &xyz0, 5);
-        const real_t f_6 = pdf.getF( &xyz0, 6);
-        const real_t f_7 = pdf.getF( &xyz0, 7);
-        const real_t f_8 = pdf.getF( &xyz0, 8);
-        const real_t f_9 = pdf.getF( &xyz0, 9);
-        const real_t f_10 = pdf.getF( &xyz0, 10);
-        const real_t f_11 = pdf.getF( &xyz0, 11);
-        const real_t f_12 = pdf.getF( &xyz0, 12);
-        const real_t f_13 = pdf.getF( &xyz0, 13);
-        const real_t f_14 = pdf.getF( &xyz0, 14);
-        const real_t f_15 = pdf.getF( &xyz0, 15);
-        const real_t f_16 = pdf.getF( &xyz0, 16);
-        const real_t f_17 = pdf.getF( &xyz0, 17);
-        const real_t f_18 = pdf.getF( &xyz0, 18);
-        const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
-        const double vel1Term = f_1 + f_11 + f_15 + f_7;
-        const double vel2Term = f_12 + f_13 + f_5;
-        const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term + 1;
-        const double md_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
-        const double md_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
-        const double md_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
-        momentumDensity[0] = md_0;
-        momentumDensity[1] = md_1;
-        momentumDensity[2] = md_2;
-        
-       return rho;
-   }
-};
-
-
-template<>
-struct MomentumDensity< UniformGridGPU_LatticeModel>
-{
-   template< typename FieldPtrOrIterator >
-   static void get( Vector3< real_t > & momentumDensity, const UniformGridGPU_LatticeModel & lm, const FieldPtrOrIterator & it )
-   {
-        const auto x = it.x();
-        const auto y = it.y();
-        const auto z = it.z();
-
-        const real_t f_0 = it[0];
-        const real_t f_1 = it[1];
-        const real_t f_2 = it[2];
-        const real_t f_3 = it[3];
-        const real_t f_4 = it[4];
-        const real_t f_5 = it[5];
-        const real_t f_6 = it[6];
-        const real_t f_7 = it[7];
-        const real_t f_8 = it[8];
-        const real_t f_9 = it[9];
-        const real_t f_10 = it[10];
-        const real_t f_11 = it[11];
-        const real_t f_12 = it[12];
-        const real_t f_13 = it[13];
-        const real_t f_14 = it[14];
-        const real_t f_15 = it[15];
-        const real_t f_16 = it[16];
-        const real_t f_17 = it[17];
-        const real_t f_18 = it[18];
-        const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
-        const double vel1Term = f_1 + f_11 + f_15 + f_7;
-        const double vel2Term = f_12 + f_13 + f_5;
-        const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term + 1;
-        const double md_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
-        const double md_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
-        const double md_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
-        momentumDensity[0] = md_0;
-        momentumDensity[1] = md_1;
-        momentumDensity[2] = md_2;
-        
-   }
-
-   template< typename PdfField_T >
-   static void get( Vector3< real_t > & momentumDensity, const UniformGridGPU_LatticeModel & lm, const PdfField_T & pdf,
-                    const cell_idx_t x, const cell_idx_t y, const cell_idx_t z )
-   {
-        const real_t & xyz0 = pdf(x,y,z,0);
-        const real_t f_0 = pdf.getF( &xyz0, 0);
-        const real_t f_1 = pdf.getF( &xyz0, 1);
-        const real_t f_2 = pdf.getF( &xyz0, 2);
-        const real_t f_3 = pdf.getF( &xyz0, 3);
-        const real_t f_4 = pdf.getF( &xyz0, 4);
-        const real_t f_5 = pdf.getF( &xyz0, 5);
-        const real_t f_6 = pdf.getF( &xyz0, 6);
-        const real_t f_7 = pdf.getF( &xyz0, 7);
-        const real_t f_8 = pdf.getF( &xyz0, 8);
-        const real_t f_9 = pdf.getF( &xyz0, 9);
-        const real_t f_10 = pdf.getF( &xyz0, 10);
-        const real_t f_11 = pdf.getF( &xyz0, 11);
-        const real_t f_12 = pdf.getF( &xyz0, 12);
-        const real_t f_13 = pdf.getF( &xyz0, 13);
-        const real_t f_14 = pdf.getF( &xyz0, 14);
-        const real_t f_15 = pdf.getF( &xyz0, 15);
-        const real_t f_16 = pdf.getF( &xyz0, 16);
-        const real_t f_17 = pdf.getF( &xyz0, 17);
-        const real_t f_18 = pdf.getF( &xyz0, 18);
-        const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
-        const double vel1Term = f_1 + f_11 + f_15 + f_7;
-        const double vel2Term = f_12 + f_13 + f_5;
-        const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term + 1;
-        const double md_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
-        const double md_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
-        const double md_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
-        momentumDensity[0] = md_0;
-        momentumDensity[1] = md_1;
-        momentumDensity[2] = md_2;
-        
-   }
-};
-
-
-template<>
-struct PressureTensor<UniformGridGPU_LatticeModel>
-{
-   template< typename FieldPtrOrIterator >
-   static void get( Matrix3< real_t > & /* pressureTensor */, const UniformGridGPU_LatticeModel & /* latticeModel */, const FieldPtrOrIterator & /* it */ )
-   {
-       WALBERLA_ABORT("Not implemented");
-   }
-
-   template< typename PdfField_T >
-   static void get( Matrix3< real_t > & /* pressureTensor */, const UniformGridGPU_LatticeModel & /* latticeModel */, const PdfField_T & /* pdf */,
-                    const cell_idx_t /* x */, const cell_idx_t /* y */, const cell_idx_t /* z */ )
-   {
-       WALBERLA_ABORT("Not implemented");
-   }
-};
-
-
-template<>
-struct ShearRate<UniformGridGPU_LatticeModel>
-{
-   template< typename FieldPtrOrIterator >
-   static inline real_t get( const UniformGridGPU_LatticeModel & /* latticeModel */, const FieldPtrOrIterator & /* it */,
-                             const Vector3< real_t > & /* velocity */, const real_t /* rho */)
-   {
-       WALBERLA_ABORT("Not implemented");
-       return real_t(0.0);
-   }
-
-   template< typename PdfField_T >
-   static inline real_t get( const UniformGridGPU_LatticeModel & latticeModel,
-                             const PdfField_T & /* pdf */, const cell_idx_t /* x */, const cell_idx_t /* y */, const cell_idx_t /* z */,
-                             const Vector3< real_t > & /* velocity */, const real_t /* rho */ )
-   {
-       WALBERLA_ABORT("Not implemented");
-       return real_t(0.0);
-   }
-
-   static inline real_t get( const std::vector< real_t > & /* nonEquilibrium */, const real_t /* relaxationParam */,
-                             const real_t /* rho */ = real_t(1) )
-   {
-       WALBERLA_ABORT("Not implemented");
-       return real_t(0.0);
-   }
-};
-
-
-} // namespace lbm
-} // namespace walberla
-
-
-
-#ifdef WALBERLA_CXX_COMPILER_IS_GNU
-#pragma GCC diagnostic pop
-#endif
-
-#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
-#pragma clang diagnostic pop
-#endif
\ No newline at end of file
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.cu b/apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.cu
deleted file mode 100644
index a650f8b3edae3b607ec4bb8f13329cab33e862ef..0000000000000000000000000000000000000000
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.cu
+++ /dev/null
@@ -1,324 +0,0 @@
-//======================================================================================================================
-//
-//  This file is part of waLBerla. waLBerla is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \\file .cpp
-//! \\ingroup lbm
-//! \\author lbmpy
-//======================================================================================================================
-
-#include <cmath>
-
-#include "core/DataTypes.h"
-#include "core/Macros.h"
-#include "UniformGridGPU_LbKernel.h"
-
-
-#define FUNC_PREFIX __global__
-
-#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
-#   pragma GCC diagnostic push
-#   pragma GCC diagnostic ignored "-Wfloat-equal"
-#   pragma GCC diagnostic ignored "-Wshadow"
-#   pragma GCC diagnostic ignored "-Wconversion"
-#endif
-
-using namespace std;
-
-namespace walberla {
-namespace pystencils {
-
-namespace internal_UniformGridGPU_LbKernel {
-static FUNC_PREFIX void UniformGridGPU_LbKernel(double * const _data_pdfs, double * _data_pdfs_tmp, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double omega)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x + 1 < _size_pdfs_0 - 1 && blockDim.y*blockIdx.y + threadIdx.y + 1 < _size_pdfs_1 - 1 && blockDim.z*blockIdx.z + threadIdx.z + 1 < _size_pdfs_2 - 1)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x + 1;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y + 1;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z + 1;
-      double * const _data_pdfs_10_21_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
-      const double xi_18 = -_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-      double * const _data_pdfs_11_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      const double xi_19 = -_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-      double * const _data_pdfs_11_21_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
-      const double xi_20 = -_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_2m1_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
-      double * const _data_pdfs_11_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      double * const _data_pdfs_1m1_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      double * const _data_pdfs_10_21_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
-      double * const _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      const double vel0Term = _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-      double * const _data_pdfs_1m1_2m1_311 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
-      double * const _data_pdfs_1m1_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      double * const _data_pdfs_1m1_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      double * const _data_pdfs_1m1_21_315 = _data_pdfs + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
-      const double vel1Term = _data_pdfs_1m1_20_31[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_2m1_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
-      double * const _data_pdfs_11_2m1_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
-      double * const _data_pdfs_10_2m1_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
-      const double vel2Term = _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_2m1_35[_stride_pdfs_0*ctr_0] + _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2;
-      double * const _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      double * const _data_pdfs_11_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      double * const _data_pdfs_10_21_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
-      const double rho = vel0Term + vel1Term + vel2Term + _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0] + _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_21_36[_stride_pdfs_0*ctr_0] + _data_pdfs_11_20_32[_stride_pdfs_0*ctr_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0];
-      const double u_0 = vel0Term + xi_18 + xi_19 - _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-      const double xi_23 = (u_0*u_0);
-      const double u_1 = vel1Term + xi_19 + xi_20 - _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - _data_pdfs_11_20_32[_stride_pdfs_0*ctr_0] - _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-      const double xi_21 = -u_1;
-      const double xi_24 = (u_1*u_1);
-      const double u_2 = vel2Term + xi_18 + xi_20 - _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - _data_pdfs_10_21_36[_stride_pdfs_0*ctr_0] + _data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0];
-      const double xi_22 = -u_2;
-      const double xi_25 = (u_2*u_2);
-      const double u0Mu1 = u_0 + xi_21;
-      const double u0Pu1 = u_0 + u_1;
-      const double u1Pu2 = u_1 + u_2;
-      const double u1Mu2 = u_1 + xi_22;
-      const double u0Mu2 = u_0 + xi_22;
-      const double u0Pu2 = u_0 + u_2;
-      const double f_eq_common = rho - xi_23 - xi_24 - xi_25;
-      const double xi_26 = f_eq_common + rho*-0.666666666666667;
-      const double xi_27 = f_eq_common + rho*-0.333333333333333;
-      const double xi_28 = xi_25 + xi_27;
-      const double xi_29 = xi_23 + xi_27;
-      const double xi_30 = xi_24 + xi_27;
-      const double xi_2 = xi_24*2 + xi_26;
-      const double xi_3 = xi_23*2 + xi_26;
-      const double xi_4 = xi_25*2 + xi_26;
-      const double xi_6 = u0Mu1*2;
-      const double xi_7 = (u0Mu1*u0Mu1)*3 + xi_28;
-      const double xi_8 = u0Pu1*2;
-      const double xi_9 = (u0Pu1*u0Pu1)*3 + xi_28;
-      const double xi_10 = u1Pu2*2;
-      const double xi_11 = (u1Pu2*u1Pu2)*3 + xi_29;
-      const double xi_12 = u1Mu2*2;
-      const double xi_13 = (u1Mu2*u1Mu2)*3 + xi_29;
-      const double xi_14 = u0Mu2*2;
-      const double xi_15 = (u0Mu2*u0Mu2)*3 + xi_30;
-      const double xi_16 = u0Pu2*2;
-      const double xi_17 = (u0Pu2*u0Pu2)*3 + xi_30;
-      const double xi_1 = omega*0.166666666666667;
-      const double xi_5 = omega*0.0416666666666667;
-      double * _data_pdfs_tmp_10_20_30 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2;
-      _data_pdfs_tmp_10_20_30[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.333333333333333 - _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0]) + _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0];
-      double * _data_pdfs_tmp_10_20_31 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      _data_pdfs_tmp_10_20_31[_stride_pdfs_0*ctr_0] = xi_1*(u_1 + xi_2 - 6*_data_pdfs_1m1_20_31[_stride_pdfs_0*ctr_0]) + _data_pdfs_1m1_20_31[_stride_pdfs_0*ctr_0];
-      double * _data_pdfs_tmp_10_20_32 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      _data_pdfs_tmp_10_20_32[_stride_pdfs_0*ctr_0] = xi_1*(xi_2 + xi_21 - 6*_data_pdfs_11_20_32[_stride_pdfs_0*ctr_0]) + _data_pdfs_11_20_32[_stride_pdfs_0*ctr_0];
-      double * _data_pdfs_tmp_10_20_33 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      _data_pdfs_tmp_10_20_33[_stride_pdfs_0*ctr_0] = xi_1*(-u_0 + xi_3 - 6*_data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]) + _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-      double * _data_pdfs_tmp_10_20_34 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      _data_pdfs_tmp_10_20_34[_stride_pdfs_0*ctr_0] = xi_1*(u_0 + xi_3 - 6*_data_pdfs_10_20_34[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]) + _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-      double * _data_pdfs_tmp_10_20_35 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
-      _data_pdfs_tmp_10_20_35[_stride_pdfs_0*ctr_0] = xi_1*(u_2 + xi_4 - 6*_data_pdfs_10_2m1_35[_stride_pdfs_0*ctr_0]) + _data_pdfs_10_2m1_35[_stride_pdfs_0*ctr_0];
-      double * _data_pdfs_tmp_10_20_36 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
-      _data_pdfs_tmp_10_20_36[_stride_pdfs_0*ctr_0] = xi_1*(xi_22 + xi_4 - 6*_data_pdfs_10_21_36[_stride_pdfs_0*ctr_0]) + _data_pdfs_10_21_36[_stride_pdfs_0*ctr_0];
-      double * _data_pdfs_tmp_10_20_37 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      _data_pdfs_tmp_10_20_37[_stride_pdfs_0*ctr_0] = xi_5*(-xi_6 + xi_7 - 24*_data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]) + _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-      double * _data_pdfs_tmp_10_20_38 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      _data_pdfs_tmp_10_20_38[_stride_pdfs_0*ctr_0] = xi_5*(xi_8 + xi_9 - 24*_data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]) + _data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-      double * _data_pdfs_tmp_10_20_39 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      _data_pdfs_tmp_10_20_39[_stride_pdfs_0*ctr_0] = xi_5*(-xi_8 + xi_9 - 24*_data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]) + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-      double * _data_pdfs_tmp_10_20_310 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      _data_pdfs_tmp_10_20_310[_stride_pdfs_0*ctr_0] = xi_5*(xi_6 + xi_7 - 24*_data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]) + _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-      double * _data_pdfs_tmp_10_20_311 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      _data_pdfs_tmp_10_20_311[_stride_pdfs_0*ctr_0] = xi_5*(xi_10 + xi_11 - 24*_data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0]) + _data_pdfs_1m1_2m1_311[_stride_pdfs_0*ctr_0];
-      double * _data_pdfs_tmp_10_20_312 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      _data_pdfs_tmp_10_20_312[_stride_pdfs_0*ctr_0] = xi_5*(-xi_12 + xi_13 - 24*_data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0]) + _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0];
-      double * _data_pdfs_tmp_10_20_313 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      _data_pdfs_tmp_10_20_313[_stride_pdfs_0*ctr_0] = xi_5*(-xi_14 + xi_15 - 24*_data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]) + _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-      double * _data_pdfs_tmp_10_20_314 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      _data_pdfs_tmp_10_20_314[_stride_pdfs_0*ctr_0] = xi_5*(xi_16 + xi_17 - 24*_data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]) + _data_pdfs_10_2m1_314[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-      double * _data_pdfs_tmp_10_20_315 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      _data_pdfs_tmp_10_20_315[_stride_pdfs_0*ctr_0] = xi_5*(xi_12 + xi_13 - 24*_data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0]) + _data_pdfs_1m1_21_315[_stride_pdfs_0*ctr_0];
-      double * _data_pdfs_tmp_10_20_316 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      _data_pdfs_tmp_10_20_316[_stride_pdfs_0*ctr_0] = xi_5*(-xi_10 + xi_11 - 24*_data_pdfs_11_21_316[_stride_pdfs_0*ctr_0]) + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0];
-      double * _data_pdfs_tmp_10_20_317 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      _data_pdfs_tmp_10_20_317[_stride_pdfs_0*ctr_0] = xi_5*(-xi_16 + xi_17 - 24*_data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]) + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
-      double * _data_pdfs_tmp_10_20_318 = _data_pdfs_tmp + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
-      _data_pdfs_tmp_10_20_318[_stride_pdfs_0*ctr_0] = xi_5*(xi_14 + xi_15 - 24*_data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]) + _data_pdfs_10_21_318[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
-   } 
-}
-}
-
-void UniformGridGPU_LbKernel::operator() ( IBlock * block , cudaStream_t stream )
-{
-    auto pdfs = block->getData< cuda::GPUField<double> >(pdfsID);
-    cuda::GPUField<double> * pdfs_tmp;
-    // Getting temporary field pdfs_tmp
-    auto it = cache_pdfs_.find( pdfs );
-    if( it != cache_pdfs_.end() )
-    {
-        pdfs_tmp = *it;
-    }
-    else 
-    {
-        pdfs_tmp = pdfs->cloneUninitialized();
-        cache_pdfs_.insert(pdfs_tmp);
-    }
-
-    WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs->nrOfGhostLayers()));
-    double * const _data_pdfs = pdfs->dataAt(-1, 0, 0, 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs_tmp->nrOfGhostLayers()));
-    double * _data_pdfs_tmp = pdfs_tmp->dataAt(-1, 0, 0, 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->xSize()) + 2));
-    const int64_t _size_pdfs_0 = int64_t(cell_idx_c(pdfs->xSize()) + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->ySize()) + 2));
-    const int64_t _size_pdfs_1 = int64_t(cell_idx_c(pdfs->ySize()) + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->zSize()) + 2));
-    const int64_t _size_pdfs_2 = int64_t(cell_idx_c(pdfs->zSize()) + 2);
-    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-    const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-    dim3 _block(int(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)), int(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)), int(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)));
-    dim3 _grid(int(( (_size_pdfs_0 - 2) % (((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) == 0 ? (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) : ( (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) ) +1 )), int(( (_size_pdfs_1 - 2) % (((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) == 0 ? (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) : ( (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) ) +1 )), int(( (_size_pdfs_2 - 2) % (((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) == 0 ? (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) : ( (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) ) +1 )));
-    internal_UniformGridGPU_LbKernel::UniformGridGPU_LbKernel<<<_grid, _block, 0, stream>>>(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, omega);
-    pdfs->swapDataPointers(pdfs_tmp);
-
-}
-
-
-
-void UniformGridGPU_LbKernel::inner( IBlock * block , cudaStream_t stream )
-{
-    auto pdfs = block->getData< cuda::GPUField<double> >(pdfsID);
-    cuda::GPUField<double> * pdfs_tmp;
-    // Getting temporary field pdfs_tmp
-    auto it = cache_pdfs_.find( pdfs );
-    if( it != cache_pdfs_.end() )
-    {
-        pdfs_tmp = *it;
-    }
-    else 
-    {
-        pdfs_tmp = pdfs->cloneUninitialized();
-        cache_pdfs_.insert(pdfs_tmp);
-    }
-
-
-    CellInterval inner = pdfs->xyzSize();
-    inner.expand(-1);
-
-    WALBERLA_ASSERT_GREATER_EQUAL(inner.xMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(inner.yMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(inner.zMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
-    double * const _data_pdfs = pdfs->dataAt(inner.xMin() - 1, inner.yMin() - 1, inner.zMin() - 1, 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(inner.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(inner.yMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(inner.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
-    double * _data_pdfs_tmp = pdfs_tmp->dataAt(inner.xMin() - 1, inner.yMin() - 1, inner.zMin() - 1, 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(inner.xSize()) + 2));
-    const int64_t _size_pdfs_0 = int64_t(cell_idx_c(inner.xSize()) + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(inner.ySize()) + 2));
-    const int64_t _size_pdfs_1 = int64_t(cell_idx_c(inner.ySize()) + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(inner.zSize()) + 2));
-    const int64_t _size_pdfs_2 = int64_t(cell_idx_c(inner.zSize()) + 2);
-    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-    const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-    dim3 _block(int(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)), int(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)), int(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)));
-    dim3 _grid(int(( (_size_pdfs_0 - 2) % (((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) == 0 ? (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) : ( (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) ) +1 )), int(( (_size_pdfs_1 - 2) % (((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) == 0 ? (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) : ( (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) ) +1 )), int(( (_size_pdfs_2 - 2) % (((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) == 0 ? (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) : ( (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) ) +1 )));
-    internal_UniformGridGPU_LbKernel::UniformGridGPU_LbKernel<<<_grid, _block, 0, stream>>>(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, omega);
-}
-
-
-void UniformGridGPU_LbKernel::outer( IBlock * block , cudaStream_t stream  )
-{
-    static std::vector<CellInterval> layers;
-
-    auto pdfs = block->getData< cuda::GPUField<double> >(pdfsID);
-    cuda::GPUField<double> * pdfs_tmp;
-    // Getting temporary field pdfs_tmp
-    auto it = cache_pdfs_.find( pdfs );
-    if( it != cache_pdfs_.end() )
-    {
-        pdfs_tmp = *it;
-    }
-    else 
-    {
-        pdfs_tmp = pdfs->cloneUninitialized();
-        cache_pdfs_.insert(pdfs_tmp);
-    }
-
-
-    if( layers.size() == 0 )
-    {
-        CellInterval ci;
-
-        pdfs->getSliceBeforeGhostLayer(stencil::T, ci, 1, false);
-        layers.push_back(ci);
-        pdfs->getSliceBeforeGhostLayer(stencil::B, ci, 1, false);
-        layers.push_back(ci);
-
-        pdfs->getSliceBeforeGhostLayer(stencil::N, ci, 1, false);
-        ci.expand(Cell(0, 0, -1));
-        layers.push_back(ci);
-        pdfs->getSliceBeforeGhostLayer(stencil::S, ci, 1, false);
-        ci.expand(Cell(0, 0, -1));
-        layers.push_back(ci);
-
-        pdfs->getSliceBeforeGhostLayer(stencil::E, ci, 1, false);
-        ci.expand(Cell(0, -1, -1));
-        layers.push_back(ci);
-        pdfs->getSliceBeforeGhostLayer(stencil::W, ci, 1, false);
-        ci.expand(Cell(0, -1, -1));
-        layers.push_back(ci);
-    }
-
-    
-    {
-        auto parallelSection_ = parallelStreams_.parallelSection( stream );
-        for( auto & ci: layers )
-        {
-            parallelSection_.run([&]( auto s ) {
-                WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
-                WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
-                WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
-                double * const _data_pdfs = pdfs->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
-                WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
-                WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
-                WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
-                double * _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
-                WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2));
-                const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 2);
-                WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2));
-                const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 2);
-                WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2));
-                const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 2);
-                const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-                const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-                const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-                const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-                dim3 _block(int(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)), int(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)), int(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)));
-                dim3 _grid(int(( (_size_pdfs_0 - 2) % (((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) == 0 ? (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) : ( (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) ) +1 )), int(( (_size_pdfs_1 - 2) % (((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) == 0 ? (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) : ( (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) ) +1 )), int(( (_size_pdfs_2 - 2) % (((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) == 0 ? (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) : ( (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) ) +1 )));
-                internal_UniformGridGPU_LbKernel::UniformGridGPU_LbKernel<<<_grid, _block, 0, s>>>(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, omega);
-            });
-        }
-    }
-    
-
-    pdfs->swapDataPointers(pdfs_tmp);
-
-}
-
-
-} // namespace pystencils
-} // namespace walberla
-
-
-#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
-#   pragma GCC diagnostic pop
-#endif
\ No newline at end of file
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.h b/apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.h
deleted file mode 100644
index def06eb548c5ad007b9408dd56e178ce9609a5b0..0000000000000000000000000000000000000000
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.h
+++ /dev/null
@@ -1,91 +0,0 @@
-//======================================================================================================================
-//
-//  This file is part of waLBerla. waLBerla is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \\file UniformGridGPU_LbKernel.h
-//! \\author pystencils
-//======================================================================================================================
-
-#include "core/DataTypes.h"
-
-#include "cuda/GPUField.h"
-#include "cuda/ParallelStreams.h"
-#include "field/SwapableCompare.h"
-#include "domain_decomposition/BlockDataID.h"
-#include "domain_decomposition/IBlock.h"
-
-#include <set>
-
-#ifdef __GNUC__
-#define RESTRICT __restrict__
-#elif _MSC_VER
-#define RESTRICT __restrict
-#else
-#define RESTRICT
-#endif
-
-#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
-#   pragma GCC diagnostic push
-#   pragma GCC diagnostic ignored "-Wunused-parameter"
-#endif
-
-namespace walberla {
-namespace pystencils {
-
-
-class UniformGridGPU_LbKernel
-{
-public:
-    UniformGridGPU_LbKernel( BlockDataID pdfsID_, double omega_)
-        : pdfsID(pdfsID_), omega(omega_)
-    {};
-
-    
-    ~UniformGridGPU_LbKernel() {  
-        for(auto p: cache_pdfs_) {
-            delete p;
-        }
-     }
-
-
-
-    void operator() ( IBlock * block , cudaStream_t stream = 0 );
-
-    void inner( IBlock * block , cudaStream_t stream = 0 );
-    void outer( IBlock * block , cudaStream_t stream = 0 );
-
-    void setOuterPriority(int priority ) {
-        
-        parallelStreams_.setStreamPriority(priority);
-        
-    }
-private:
-    BlockDataID pdfsID;
-    double omega;
-
-    std::set< cuda::GPUField<double> *, field::SwapableCompare< cuda::GPUField<double> * > > cache_pdfs_;
-
-    
-    cuda::ParallelStreams parallelStreams_;
-    
-};
-
-
-} // namespace pystencils
-} // namespace walberla
-
-
-#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
-#   pragma GCC diagnostic pop
-#endif
\ No newline at end of file
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.cu b/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.cu
deleted file mode 100644
index acabe1d0c9a5fa59aa15a9f32b15a41b25190a27..0000000000000000000000000000000000000000
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.cu
+++ /dev/null
@@ -1,132 +0,0 @@
-//======================================================================================================================
-//
-//  This file is part of waLBerla. waLBerla is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \\file UniformGridGPU_NoSlip.cpp
-//! \\ingroup lbm
-//! \\author lbmpy
-//======================================================================================================================
-
-#include <cmath>
-
-#include "core/DataTypes.h"
-#include "core/Macros.h"
-#include "UniformGridGPU_NoSlip.h"
-#include "cuda/ErrorChecking.h"
-
-
-#define FUNC_PREFIX __global__
-
-using namespace std;
-
-namespace walberla {
-namespace lbm {
-
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wstrict-aliasing"
-#pragma GCC diagnostic ignored "-Wunused-variable"
-#pragma GCC diagnostic ignored "-Wconversion"
-#endif
-
-#ifdef __CUDACC__
-#pragma push
-#pragma diag_suppress = declared_but_not_referenced
-#endif
-
-
-namespace internal_boundary_UniformGridGPU_NoSlip {
-static FUNC_PREFIX void boundary_UniformGridGPU_NoSlip(uint8_t * const _data_indexVector, double * _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t indexVectorSize)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < indexVectorSize)
-   {
-      uint8_t * const _data_indexVector_10 = _data_indexVector;
-      const int32_t x = *((int32_t *)(& _data_indexVector_10[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
-      uint8_t * const _data_indexVector_14 = _data_indexVector + 4;
-      const int32_t y = *((int32_t *)(& _data_indexVector_14[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
-      uint8_t * const _data_indexVector_18 = _data_indexVector + 8;
-      const int32_t z = *((int32_t *)(& _data_indexVector_18[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
-      
-      
-      const int64_t cx [] = { 0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1 };
-      const int64_t cy [] = { 0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0 };
-      const int64_t cz [] = { 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1 };
-      const int invdir [] = { 0, 2, 1, 4, 3, 6, 5, 10, 9, 8, 7, 16, 15, 18, 17, 12, 11, 14, 13 };
-      
-      
-      const double weights [] = { 0.333333333333333,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778 };
-      
-      uint8_t * const _data_indexVector_112 = _data_indexVector + 12;
-      const int32_t dir = *((int32_t *)(& _data_indexVector_112[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
-      double * _data_pdfsf9cc34cc4e2b6261 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*cy[dir] + _stride_pdfs_2*z + _stride_pdfs_2*cz[dir] + _stride_pdfs_3*invdir[dir];
-      double * _data_pdfs_10_2011ac6bf6446d4afa = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir;
-      _data_pdfsf9cc34cc4e2b6261[_stride_pdfs_0*x + _stride_pdfs_0*cx[dir]] = _data_pdfs_10_2011ac6bf6446d4afa[_stride_pdfs_0*x];
-   } 
-}
-}
-
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
-
-#ifdef __CUDACC__
-#pragma pop
-#endif
-
-
-void UniformGridGPU_NoSlip::run( IBlock * block, IndexVectors::Type type , cudaStream_t stream )
-{
-    auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
-
-    auto pointer = indexVectors->pointerGpu(type);
-    
-
-    int64_t indexVectorSize = int64_c( indexVectors->indexVector(type).size() );
-    if( indexVectorSize == 0)
-        return;
-
-    uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
-
-    auto pdfs = block->getData< cuda::GPUField<double> >(pdfsID);
-
-    WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()));
-    double * _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
-    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-    const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-    dim3 _block(int(((256 < indexVectorSize) ? 256 : indexVectorSize)), int(1), int(1));
-    dim3 _grid(int(( (indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ( (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) ) +1 )), int(1), int(1));
-    internal_boundary_UniformGridGPU_NoSlip::boundary_UniformGridGPU_NoSlip<<<_grid, _block, 0, stream>>>(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
-}
-
-void UniformGridGPU_NoSlip::operator() ( IBlock * block, cudaStream_t stream  )
-{
-    run( block, IndexVectors::ALL, stream );
-}
-
-void UniformGridGPU_NoSlip::inner( IBlock * block, cudaStream_t stream  )
-{
-    run( block, IndexVectors::INNER, stream  );
-}
-
-void UniformGridGPU_NoSlip::outer( IBlock * block, cudaStream_t stream  )
-{
-    run( block, IndexVectors::OUTER, stream  );
-}
-
-
-} // namespace lbm
-} // namespace walberla
-
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.h b/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.h
deleted file mode 100644
index fa64a19841e63f8c1a579e420a819f5bd4644153..0000000000000000000000000000000000000000
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.h
+++ /dev/null
@@ -1,364 +0,0 @@
-//======================================================================================================================
-//
-//  This file is part of waLBerla. waLBerla is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \\file UniformGridGPU_NoSlip.h
-//! \\author pystencils
-//======================================================================================================================
-
-
-#include "core/DataTypes.h"
-
-#include "cuda/GPUField.h"
-#include "domain_decomposition/BlockDataID.h"
-#include "domain_decomposition/IBlock.h"
-#include "blockforest/StructuredBlockForest.h"
-#include "field/FlagField.h"
-
-#include <set>
-#include <vector>
-
-#ifdef __GNUC__
-#define RESTRICT __restrict__
-#elif _MSC_VER
-#define RESTRICT __restrict
-#else
-#define RESTRICT
-#endif
-
-namespace walberla {
-namespace lbm {
-
-
-class UniformGridGPU_NoSlip
-{
-public:
-    struct IndexInfo { 
-        int32_t x;
-        int32_t y;
-        int32_t z;
-        int32_t dir;
-        IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_) : x(x_), y(y_), z(z_), dir(dir_) {}
-        bool operator==(const IndexInfo & o) const {
-            return x == o.x && y == o.y && z == o.z && dir == o.dir;
-        }
-    };
-
-
-
-    class IndexVectors
-    {
-    public:
-        using CpuIndexVector = std::vector<IndexInfo>;
-
-        enum Type {
-            ALL = 0,
-            INNER = 1,
-            OUTER = 2,
-            NUM_TYPES = 3
-        };
-
-        IndexVectors() : cpuVectors_(NUM_TYPES)  {}
-        bool operator==(IndexVectors & other) { return other.cpuVectors_ == cpuVectors_; }
-
-        ~IndexVectors() {
-            for( auto & gpuVec: gpuVectors_)
-                cudaFree( gpuVec );
-        }
-        
-
-        CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
-        IndexInfo * pointerCpu(Type t)  { return &(cpuVectors_[t][0]); }
-
-        IndexInfo * pointerGpu(Type t)  { return gpuVectors_[t]; }
-        
-
-        void syncGPU()
-        {
-            gpuVectors_.resize( cpuVectors_.size() );
-            for(size_t i=0; i < size_t(NUM_TYPES); ++i )
-            {
-                auto & gpuVec = gpuVectors_[i];
-                auto & cpuVec = cpuVectors_[i];
-                cudaFree( gpuVec );
-                cudaMalloc( &gpuVec, sizeof(IndexInfo) * cpuVec.size() );
-                cudaMemcpy( gpuVec, &cpuVec[0], sizeof(IndexInfo) * cpuVec.size(), cudaMemcpyHostToDevice );
-            }
-        }
-
-    private:
-        std::vector<CpuIndexVector> cpuVectors_;
-
-        using GpuIndexVector = IndexInfo *;
-        std::vector<GpuIndexVector> gpuVectors_;
-        
-    };
-
-
-    UniformGridGPU_NoSlip( const shared_ptr<StructuredBlockForest> & blocks,
-                   BlockDataID pdfsID_ )
-        : pdfsID(pdfsID_)
-    {
-        auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
-        indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_UniformGridGPU_NoSlip");
-    };
-
-    void operator() ( IBlock * block , cudaStream_t stream = 0 );
-    void inner( IBlock * block , cudaStream_t stream = 0 );
-    void outer( IBlock * block , cudaStream_t stream = 0 );
-
-
-    template<typename FlagField_T>
-    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
-                            FlagUID boundaryFlagUID, FlagUID domainFlagUID)
-    {
-        for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
-            fillFromFlagField<FlagField_T>( &*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
-    }
-
-
-    template<typename FlagField_T>
-    void fillFromFlagField( IBlock * block, ConstBlockDataID flagFieldID,
-                            FlagUID boundaryFlagUID, FlagUID domainFlagUID )
-    {
-        auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
-        auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
-        auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
-        auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
-
-
-        auto * flagField = block->getData< FlagField_T > ( flagFieldID );
-
-        auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
-        auto domainFlag = flagField->getFlag(domainFlagUID);
-
-        auto inner = flagField->xyzSize();
-        inner.expand( cell_idx_t(-1) );
-
-
-        indexVectorAll.clear();
-        indexVectorInner.clear();
-        indexVectorOuter.clear();
-
-        for( auto it = flagField->begin(); it != flagField->end(); ++it )
-        {
-            if( ! isFlagSet(it, domainFlag) )
-                continue;
-            if ( isFlagSet( it.neighbor(0, 0, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  0 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(0, 1, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  1 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(0, -1, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  2 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(-1, 0, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  3 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(0, 0, 1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  5 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(0, 0, -1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  6 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(-1, 1, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  7 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(-1, -1, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  9 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(0, 1, 1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  11 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(0, -1, 1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  12 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(-1, 0, 1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  13 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(0, 1, -1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  15 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(0, -1, -1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  16 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(-1, 0, -1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  17 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-        }
-
-        indexVectors->syncGPU();
-    }
-
-private:
-    void run( IBlock * block, IndexVectors::Type type, cudaStream_t stream = 0 );
-
-    BlockDataID indexVectorID;
-
-    BlockDataID pdfsID;
-};
-
-
-
-} // namespace lbm
-} // namespace walberla
\ No newline at end of file
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.cu b/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.cu
deleted file mode 100644
index 27df1f0d920ad163063462c604c849578e10caee..0000000000000000000000000000000000000000
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.cu
+++ /dev/null
@@ -1,1656 +0,0 @@
-#include "stencil/Directions.h"
-#include "core/cell/CellInterval.h"
-#include "cuda/GPUField.h"
-#include "core/DataTypes.h"
-#include "UniformGridGPU_PackInfo.h"
-
-
-#define FUNC_PREFIX __global__
-
-
-namespace walberla {
-namespace pystencils {
-
-using walberla::cell::CellInterval;
-using walberla::stencil::Direction;
-
-
-
-namespace internal_pack_SW {
-static FUNC_PREFIX void pack_SW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_BW {
-static FUNC_PREFIX void pack_BW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_W {
-static FUNC_PREFIX void pack_W(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_TW {
-static FUNC_PREFIX void pack_TW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_NW {
-static FUNC_PREFIX void pack_NW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_BS {
-static FUNC_PREFIX void pack_BS(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_S {
-static FUNC_PREFIX void pack_S(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_32[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_TS {
-static FUNC_PREFIX void pack_TS(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_B {
-static FUNC_PREFIX void pack_B(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_36[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_C {
-static FUNC_PREFIX void pack_C(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_T {
-static FUNC_PREFIX void pack_T(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_35[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_BN {
-static FUNC_PREFIX void pack_BN(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_N {
-static FUNC_PREFIX void pack_N(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_31[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_TN {
-static FUNC_PREFIX void pack_TN(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_SE {
-static FUNC_PREFIX void pack_SE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_BE {
-static FUNC_PREFIX void pack_BE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_E {
-static FUNC_PREFIX void pack_E(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_TE {
-static FUNC_PREFIX void pack_TE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-namespace internal_pack_NE {
-static FUNC_PREFIX void pack_NE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
-   } 
-}
-}
-
-
-
-namespace internal_unpack_NE {
-static FUNC_PREFIX void unpack_NE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
-   } 
-}
-}
-
-namespace internal_unpack_TE {
-static FUNC_PREFIX void unpack_TE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
-   } 
-}
-}
-
-namespace internal_unpack_E {
-static FUNC_PREFIX void unpack_E(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
-      double * _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
-      double * _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
-      double * _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
-      double * _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
-   } 
-}
-}
-
-namespace internal_unpack_BE {
-static FUNC_PREFIX void unpack_BE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
-   } 
-}
-}
-
-namespace internal_unpack_SE {
-static FUNC_PREFIX void unpack_SE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
-   } 
-}
-}
-
-namespace internal_unpack_TN {
-static FUNC_PREFIX void unpack_TN(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
-   } 
-}
-}
-
-namespace internal_unpack_N {
-static FUNC_PREFIX void unpack_N(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
-      double * _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
-      double * _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
-      double * _data_pdfs_10_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      _data_pdfs_10_20_32[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
-      double * _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
-   } 
-}
-}
-
-namespace internal_unpack_BN {
-static FUNC_PREFIX void unpack_BN(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
-   } 
-}
-}
-
-namespace internal_unpack_T {
-static FUNC_PREFIX void unpack_T(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
-      double * _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
-      double * _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
-      double * _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
-      _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
-      double * _data_pdfs_10_20_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
-      _data_pdfs_10_20_36[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
-   } 
-}
-}
-
-namespace internal_unpack_C {
-static FUNC_PREFIX void unpack_C(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2;
-      _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
-   } 
-}
-}
-
-namespace internal_unpack_B {
-static FUNC_PREFIX void unpack_B(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
-      double * _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
-      double * _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
-      double * _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
-      double * _data_pdfs_10_20_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
-      _data_pdfs_10_20_35[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
-   } 
-}
-}
-
-namespace internal_unpack_TS {
-static FUNC_PREFIX void unpack_TS(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
-   } 
-}
-}
-
-namespace internal_unpack_S {
-static FUNC_PREFIX void unpack_S(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      _data_pdfs_10_20_31[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
-      double * _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
-      double * _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
-      double * _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
-      double * _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
-   } 
-}
-}
-
-namespace internal_unpack_BS {
-static FUNC_PREFIX void unpack_BS(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
-   } 
-}
-}
-
-namespace internal_unpack_NW {
-static FUNC_PREFIX void unpack_NW(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
-   } 
-}
-}
-
-namespace internal_unpack_TW {
-static FUNC_PREFIX void unpack_TW(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
-      _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
-   } 
-}
-}
-
-namespace internal_unpack_W {
-static FUNC_PREFIX void unpack_W(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
-      double * _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
-      double * _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
-      _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
-      double * _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
-      double * _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
-   } 
-}
-}
-
-namespace internal_unpack_BW {
-static FUNC_PREFIX void unpack_BW(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
-   } 
-}
-}
-
-namespace internal_unpack_SW {
-static FUNC_PREFIX void unpack_SW(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
-   {
-      const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
-      const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
-      const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
-   } 
-}
-}
-
-
-
-
-void UniformGridGPU_PackInfo::pack(Direction dir, unsigned char * byte_buffer, IBlock * block, cudaStream_t stream)
-{
-    double * buffer = reinterpret_cast<double*>(byte_buffer);
-
-    auto pdfs = block->getData< cuda::GPUField<double> >(pdfsID);
-
-    CellInterval ci;
-    pdfs->getSliceBeforeGhostLayer(dir, ci, 1, false);
-
-    switch( dir )
-    {
-        case stencil::SW:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_SW::pack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::BW:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_BW::pack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::W:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_W::pack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::TW:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_TW::pack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::NW:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_NW::pack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::BS:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_BS::pack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::S:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_S::pack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::TS:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_TS::pack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::B:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_B::pack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::C:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_C::pack_C<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2);
-            break;
-        }
-        
-        case stencil::T:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_T::pack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::BN:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_BN::pack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::N:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_N::pack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::TN:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_TN::pack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::SE:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_SE::pack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::BE:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_BE::pack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::E:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_E::pack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::TE:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_TE::pack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::NE:
-        {
-            double * _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_NE::pack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-
-        default:
-            WALBERLA_ASSERT(false);
-    }
-}
-
-
-void UniformGridGPU_PackInfo::unpack(Direction dir, unsigned char * byte_buffer, IBlock * block, cudaStream_t stream)
-{
-    double * buffer = reinterpret_cast<double*>(byte_buffer);
-
-    auto pdfs = block->getData< cuda::GPUField<double> >(pdfsID);
-
-    CellInterval ci;
-    pdfs->getGhostRegion(dir, ci, 1, false);
-
-    switch( dir )
-    {
-        case stencil::NE:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_NE::unpack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::TE:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_TE::unpack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::E:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_E::unpack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::BE:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_BE::unpack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::SE:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_SE::unpack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::TN:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_TN::unpack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::N:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_N::unpack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::BN:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_BN::unpack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::T:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_T::unpack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::C:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_C::unpack_C<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2);
-            break;
-        }
-        
-        case stencil::B:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_B::unpack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::TS:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_TS::unpack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::S:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_S::unpack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::BS:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_BS::unpack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::NW:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_NW::unpack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::TW:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_TW::unpack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::W:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_W::unpack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::BW:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_BW::unpack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-        case stencil::SW:
-        {
-            double * const _data_buffer = buffer;
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-            const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-            const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-            const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-            dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
-            dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_SW::unpack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
-            break;
-        }
-        
-
-        default:
-            WALBERLA_ASSERT(false);
-    }
-}
-
-
-uint_t UniformGridGPU_PackInfo::size(stencil::Direction dir, IBlock * block)
-{
-    auto pdfs = block->getData< cuda::GPUField<double> >(pdfsID);
-
-    CellInterval ci;
-    pdfs->getGhostRegion(dir, ci, 1, false);
-
-    uint_t elementsPerCell = 0;
-
-    switch( dir )
-    {
-        case stencil::SW:
-            elementsPerCell = 1;
-            break;
-        
-        case stencil::BW:
-            elementsPerCell = 1;
-            break;
-        
-        case stencil::W:
-            elementsPerCell = 5;
-            break;
-        
-        case stencil::TW:
-            elementsPerCell = 1;
-            break;
-        
-        case stencil::NW:
-            elementsPerCell = 1;
-            break;
-        
-        case stencil::BS:
-            elementsPerCell = 1;
-            break;
-        
-        case stencil::S:
-            elementsPerCell = 5;
-            break;
-        
-        case stencil::TS:
-            elementsPerCell = 1;
-            break;
-        
-        case stencil::B:
-            elementsPerCell = 5;
-            break;
-        
-        case stencil::C:
-            elementsPerCell = 1;
-            break;
-        
-        case stencil::T:
-            elementsPerCell = 5;
-            break;
-        
-        case stencil::BN:
-            elementsPerCell = 1;
-            break;
-        
-        case stencil::N:
-            elementsPerCell = 5;
-            break;
-        
-        case stencil::TN:
-            elementsPerCell = 1;
-            break;
-        
-        case stencil::SE:
-            elementsPerCell = 1;
-            break;
-        
-        case stencil::BE:
-            elementsPerCell = 1;
-            break;
-        
-        case stencil::E:
-            elementsPerCell = 5;
-            break;
-        
-        case stencil::TE:
-            elementsPerCell = 1;
-            break;
-        
-        case stencil::NE:
-            elementsPerCell = 1;
-            break;
-        
-        default:
-            elementsPerCell = 0;
-    }
-    return ci.numCells() * elementsPerCell * sizeof( double );
-}
-
-
-
-} // namespace pystencils
-} // namespace walberla
\ No newline at end of file
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.h b/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.h
deleted file mode 100644
index c68a7b063fd2585cead948d771e7f8e012fccbda..0000000000000000000000000000000000000000
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#include "stencil/Directions.h"
-#include "core/cell/CellInterval.h"
-#include "cuda/GPUField.h"
-#include "core/DataTypes.h"
-#include "domain_decomposition/IBlock.h"
-#include "cuda/communication/GeneratedGPUPackInfo.h"
-
-
-#define FUNC_PREFIX __global__
-
-
-namespace walberla {
-namespace pystencils {
-
-
-class UniformGridGPU_PackInfo : public ::walberla::cuda::GeneratedGPUPackInfo
-{
-public:
-    UniformGridGPU_PackInfo( BlockDataID pdfsID_ )
-        : pdfsID(pdfsID_)
-    {};
-    virtual ~UniformGridGPU_PackInfo() {}
-
-    virtual void pack  (stencil::Direction dir, unsigned char * buffer, IBlock * block, cudaStream_t stream);
-    virtual void unpack(stencil::Direction dir, unsigned char * buffer, IBlock * block, cudaStream_t stream);
-    virtual uint_t size  (stencil::Direction dir, IBlock * block);
-
-private:
-    BlockDataID pdfsID;
-};
-
-
-} // namespace pystencils
-} // namespace walberla
\ No newline at end of file
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.cu b/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.cu
deleted file mode 100644
index b6fcbbe3cadcf4093451d091f60c5dae181f0db5..0000000000000000000000000000000000000000
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.cu
+++ /dev/null
@@ -1,132 +0,0 @@
-//======================================================================================================================
-//
-//  This file is part of waLBerla. waLBerla is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \\file UniformGridGPU_UBB.cpp
-//! \\ingroup lbm
-//! \\author lbmpy
-//======================================================================================================================
-
-#include <cmath>
-
-#include "core/DataTypes.h"
-#include "core/Macros.h"
-#include "UniformGridGPU_UBB.h"
-#include "cuda/ErrorChecking.h"
-
-
-#define FUNC_PREFIX __global__
-
-using namespace std;
-
-namespace walberla {
-namespace lbm {
-
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wstrict-aliasing"
-#pragma GCC diagnostic ignored "-Wunused-variable"
-#pragma GCC diagnostic ignored "-Wconversion"
-#endif
-
-#ifdef __CUDACC__
-#pragma push
-#pragma diag_suppress = declared_but_not_referenced
-#endif
-
-
-namespace internal_boundary_UniformGridGPU_UBB {
-static FUNC_PREFIX void boundary_UniformGridGPU_UBB(uint8_t * const _data_indexVector, double * _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t indexVectorSize)
-{
-   if (blockDim.x*blockIdx.x + threadIdx.x < indexVectorSize)
-   {
-      uint8_t * const _data_indexVector_10 = _data_indexVector;
-      const int32_t x = *((int32_t *)(& _data_indexVector_10[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
-      uint8_t * const _data_indexVector_14 = _data_indexVector + 4;
-      const int32_t y = *((int32_t *)(& _data_indexVector_14[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
-      uint8_t * const _data_indexVector_18 = _data_indexVector + 8;
-      const int32_t z = *((int32_t *)(& _data_indexVector_18[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
-      
-      
-      const int64_t cx [] = { 0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1 };
-      const int64_t cy [] = { 0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0 };
-      const int64_t cz [] = { 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1 };
-      const int invdir [] = { 0, 2, 1, 4, 3, 6, 5, 10, 9, 8, 7, 16, 15, 18, 17, 12, 11, 14, 13 };
-      
-      
-      const double weights [] = { 0.333333333333333,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778 };
-      
-      uint8_t * const _data_indexVector_112 = _data_indexVector + 12;
-      const int32_t dir = *((int32_t *)(& _data_indexVector_112[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
-      double * _data_pdfsf9cc34cc4e2b6261 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*cy[dir] + _stride_pdfs_2*z + _stride_pdfs_2*cz[dir] + _stride_pdfs_3*invdir[dir];
-      double * _data_pdfs_10_2011ac6bf6446d4afa = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir;
-      _data_pdfsf9cc34cc4e2b6261[_stride_pdfs_0*x + _stride_pdfs_0*cx[dir]] = -0.30000000000000004*cx[dir]*weights[dir] + _data_pdfs_10_2011ac6bf6446d4afa[_stride_pdfs_0*x];
-   } 
-}
-}
-
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
-
-#ifdef __CUDACC__
-#pragma pop
-#endif
-
-
-void UniformGridGPU_UBB::run( IBlock * block, IndexVectors::Type type , cudaStream_t stream )
-{
-    auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
-
-    auto pointer = indexVectors->pointerGpu(type);
-    
-
-    int64_t indexVectorSize = int64_c( indexVectors->indexVector(type).size() );
-    if( indexVectorSize == 0)
-        return;
-
-    uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
-
-    auto pdfs = block->getData< cuda::GPUField<double> >(pdfsID);
-
-    WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()));
-    double * _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
-    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
-    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
-    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-    const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
-    dim3 _block(int(((256 < indexVectorSize) ? 256 : indexVectorSize)), int(1), int(1));
-    dim3 _grid(int(( (indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ( (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) ) +1 )), int(1), int(1));
-    internal_boundary_UniformGridGPU_UBB::boundary_UniformGridGPU_UBB<<<_grid, _block, 0, stream>>>(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
-}
-
-void UniformGridGPU_UBB::operator() ( IBlock * block, cudaStream_t stream  )
-{
-    run( block, IndexVectors::ALL, stream );
-}
-
-void UniformGridGPU_UBB::inner( IBlock * block, cudaStream_t stream  )
-{
-    run( block, IndexVectors::INNER, stream  );
-}
-
-void UniformGridGPU_UBB::outer( IBlock * block, cudaStream_t stream  )
-{
-    run( block, IndexVectors::OUTER, stream  );
-}
-
-
-} // namespace lbm
-} // namespace walberla
-
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.h b/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.h
deleted file mode 100644
index 0b0017759462d2120752acf7d2db2b85ec85b7b9..0000000000000000000000000000000000000000
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.h
+++ /dev/null
@@ -1,364 +0,0 @@
-//======================================================================================================================
-//
-//  This file is part of waLBerla. waLBerla is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \\file UniformGridGPU_UBB.h
-//! \\author pystencils
-//======================================================================================================================
-
-
-#include "core/DataTypes.h"
-
-#include "cuda/GPUField.h"
-#include "domain_decomposition/BlockDataID.h"
-#include "domain_decomposition/IBlock.h"
-#include "blockforest/StructuredBlockForest.h"
-#include "field/FlagField.h"
-
-#include <set>
-#include <vector>
-
-#ifdef __GNUC__
-#define RESTRICT __restrict__
-#elif _MSC_VER
-#define RESTRICT __restrict
-#else
-#define RESTRICT
-#endif
-
-namespace walberla {
-namespace lbm {
-
-
-class UniformGridGPU_UBB
-{
-public:
-    struct IndexInfo { 
-        int32_t x;
-        int32_t y;
-        int32_t z;
-        int32_t dir;
-        IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_) : x(x_), y(y_), z(z_), dir(dir_) {}
-        bool operator==(const IndexInfo & o) const {
-            return x == o.x && y == o.y && z == o.z && dir == o.dir;
-        }
-    };
-
-
-
-    class IndexVectors
-    {
-    public:
-        using CpuIndexVector = std::vector<IndexInfo>;
-
-        enum Type {
-            ALL = 0,
-            INNER = 1,
-            OUTER = 2,
-            NUM_TYPES = 3
-        };
-
-        IndexVectors() : cpuVectors_(NUM_TYPES)  {}
-        bool operator==(IndexVectors & other) { return other.cpuVectors_ == cpuVectors_; }
-
-        ~IndexVectors() {
-            for( auto & gpuVec: gpuVectors_)
-                cudaFree( gpuVec );
-        }
-        
-
-        CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
-        IndexInfo * pointerCpu(Type t)  { return &(cpuVectors_[t][0]); }
-
-        IndexInfo * pointerGpu(Type t)  { return gpuVectors_[t]; }
-        
-
-        void syncGPU()
-        {
-            gpuVectors_.resize( cpuVectors_.size() );
-            for(size_t i=0; i < size_t(NUM_TYPES); ++i )
-            {
-                auto & gpuVec = gpuVectors_[i];
-                auto & cpuVec = cpuVectors_[i];
-                cudaFree( gpuVec );
-                cudaMalloc( &gpuVec, sizeof(IndexInfo) * cpuVec.size() );
-                cudaMemcpy( gpuVec, &cpuVec[0], sizeof(IndexInfo) * cpuVec.size(), cudaMemcpyHostToDevice );
-            }
-        }
-
-    private:
-        std::vector<CpuIndexVector> cpuVectors_;
-
-        using GpuIndexVector = IndexInfo *;
-        std::vector<GpuIndexVector> gpuVectors_;
-        
-    };
-
-
-    UniformGridGPU_UBB( const shared_ptr<StructuredBlockForest> & blocks,
-                   BlockDataID pdfsID_ )
-        : pdfsID(pdfsID_)
-    {
-        auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
-        indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_UniformGridGPU_UBB");
-    };
-
-    void operator() ( IBlock * block , cudaStream_t stream = 0 );
-    void inner( IBlock * block , cudaStream_t stream = 0 );
-    void outer( IBlock * block , cudaStream_t stream = 0 );
-
-
-    template<typename FlagField_T>
-    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
-                            FlagUID boundaryFlagUID, FlagUID domainFlagUID)
-    {
-        for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
-            fillFromFlagField<FlagField_T>( &*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
-    }
-
-
-    template<typename FlagField_T>
-    void fillFromFlagField( IBlock * block, ConstBlockDataID flagFieldID,
-                            FlagUID boundaryFlagUID, FlagUID domainFlagUID )
-    {
-        auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
-        auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
-        auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
-        auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
-
-
-        auto * flagField = block->getData< FlagField_T > ( flagFieldID );
-
-        auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
-        auto domainFlag = flagField->getFlag(domainFlagUID);
-
-        auto inner = flagField->xyzSize();
-        inner.expand( cell_idx_t(-1) );
-
-
-        indexVectorAll.clear();
-        indexVectorInner.clear();
-        indexVectorOuter.clear();
-
-        for( auto it = flagField->begin(); it != flagField->end(); ++it )
-        {
-            if( ! isFlagSet(it, domainFlag) )
-                continue;
-            if ( isFlagSet( it.neighbor(0, 0, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  0 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(0, 1, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  1 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(0, -1, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  2 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(-1, 0, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  3 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  4 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(0, 0, 1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  5 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(0, 0, -1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  6 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(-1, 1, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  7 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  8 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(-1, -1, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  9 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  10 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(0, 1, 1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  11 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(0, -1, 1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  12 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(-1, 0, 1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  13 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  14 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(0, 1, -1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  15 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(0, -1, -1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  16 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(-1, 0, -1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  17 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-            if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
-            {
-                auto element = IndexInfo(it.x(), it.y(),  it.z(),  18 );
-                indexVectorAll.push_back( element );
-                if( inner.contains( it.x(), it.y(), it.z() ) )
-                    indexVectorInner.push_back( element );
-                else
-                    indexVectorOuter.push_back( element );
-            }
-            
-        }
-
-        indexVectors->syncGPU();
-    }
-
-private:
-    void run( IBlock * block, IndexVectors::Type type, cudaStream_t stream = 0 );
-
-    BlockDataID indexVectorID;
-
-    BlockDataID pdfsID;
-};
-
-
-
-} // namespace lbm
-} // namespace walberla
\ No newline at end of file
diff --git a/cmake/waLBerlaFunctions.cmake b/cmake/waLBerlaFunctions.cmake
index 196d3f4777ad4b4efd6d688f5c99361c107ef2a6..d62c6824b202820eabda59bc3bd61d463ac6f31e 100644
--- a/cmake/waLBerlaFunctions.cmake
+++ b/cmake/waLBerlaFunctions.cmake
@@ -245,6 +245,23 @@ endfunction ( waLBerla_add_executable )
 
 
 
+#######################################################################################################################
+#
+# Function to tell CMake which C/C++/CUDA files are generated by a python file
+#
+# Example:
+#    waLBerla_python_file_generates(MyPythonCodeGenScript.py Sweep1.cpp Sweep1.h Sweep2.h Sweep2.cu)
+#
+#
+#######################################################################################################################
+function( waLBerla_python_file_generates pythonFile )
+    get_filename_component(pythonFileAbsolutePath ${pythonFile} ABSOLUTE)
+    set( "WALBERLA_CODEGEN_INFO_${pythonFileAbsolutePath}" ${ARGN}
+            CACHE INTERNAL "Files generated by python script ${pythonFile}" FORCE)
+endfunction(waLBerla_python_file_generates)
+
+
+
 #######################################################################################################################
 #
 # Adds a  waLBerla module test executable.
diff --git a/cmake/waLBerlaHelperFunctions.cmake b/cmake/waLBerlaHelperFunctions.cmake
index 4d1dafe0e5ce02f35d55151550750fcf3dcdcef2..5268c2ab1b2c20071ea31658faf72cebef738a1d 100644
--- a/cmake/waLBerlaHelperFunctions.cmake
+++ b/cmake/waLBerlaHelperFunctions.cmake
@@ -40,11 +40,10 @@ function( handle_python_codegen sourceFilesOut generatedSourceFilesOut generator
         if( ${sourceFile} MATCHES ".*\\.py$" )
             set(codeGenRequired YES)
             if( WALBERLA_BUILD_WITH_CODEGEN)
-                execute_process(COMMAND ${PYTHON_EXECUTABLE} ${sourceFile} -l
-                            OUTPUT_VARIABLE generatedSourceFiles)
-                string(REGEX REPLACE "\n$" "" generatedSourceFiles "${generatedSourceFiles}")
+                get_filename_component(pythonFileAbsolutePath ${sourceFile} ABSOLUTE )
+                set( generatedSourceFiles ${WALBERLA_CODEGEN_INFO_${pythonFileAbsolutePath}} )
 
-                set(generatedWithAbsolutePath )
+                set( generatedWithAbsolutePath )
                 foreach( filename ${generatedSourceFiles} )
                     list(APPEND generatedWithAbsolutePath ${CMAKE_CURRENT_BINARY_DIR}/${filename})
                 endforeach()
@@ -52,9 +51,19 @@ function( handle_python_codegen sourceFilesOut generatedSourceFilesOut generator
                 list(APPEND generatedResult  ${generatedWithAbsolutePath} )
                 list(APPEND generatorsResult ${sourceFile} )
 
+                string (REPLACE ";" "\", \"" jsonFileList "${generatedWithAbsolutePath}" )
+                set(pythonParameters
+                        "{\"EXPECTED_FILES\": [\"${jsonFileList}\"], \"CMAKE_VARS\" : {  "
+                            "\"WALBERLA_OPTIMIZE_FOR_LOCALHOST\": \"${WALBERLA_OPTIMIZE_FOR_LOCALHOST}\","
+                            "\"WALBERLA_DOUBLE_ACCURACY\": \"${WALBERLA_DOUBLE_ACCURACY}\","
+                            "\"WALBERLA_BUILD_WITH_MPI\": \"${WALBERLA_BUILD_WITH_MPI}\","
+                            "\"WALBERLA_BUILD_WITH_OPENMP\": \"${WALBERLA_BUILD_WITH_OPENMP}\" } }"
+                        )
+                string(REPLACE "\"" "\\\"" pythonParameters ${pythonParameters})   # even one more quoting level required
+                string(REPLACE "\n" "" pythonParameters ${pythonParameters})  # remove newline characters
                 add_custom_command(OUTPUT ${generatedWithAbsolutePath}
                                    DEPENDS ${sourceFile}
-                                   COMMAND ${PYTHON_EXECUTABLE} ${sourceFile} -g
+                                   COMMAND ${PYTHON_EXECUTABLE} ${sourceFile} ${pythonParameters}
                                    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
                 include_directories(${CMAKE_CURRENT_BINARY_DIR})
             endif()
diff --git a/tests/cuda/CMakeLists.txt b/tests/cuda/CMakeLists.txt
index dac2f4a1c3a34e54fbeaa6eedfd894d65be0acce..11213e0111459bdc2aaf9bdeb719a37e3cb255a8 100644
--- a/tests/cuda/CMakeLists.txt
+++ b/tests/cuda/CMakeLists.txt
@@ -19,6 +19,10 @@ waLBerla_execute_test( NAME  SimpleKernelTest )
 waLBerla_compile_test( FILES FieldIndexing3DTest.cpp FieldIndexing3DTest.cu )
 waLBerla_execute_test( NAME  FieldIndexing3DTest )
 
+
+waLBerla_python_file_generates(codegen/CudaJacobiKernel.py
+        CudaJacobiKernel2D.cu CudaJacobiKernel2D.h
+        CudaJacobiKernel3D.cu CudaJacobiKernel3D.h)
 waLBerla_compile_test( FILES codegen/CodegenJacobiGPU.cpp
                              codegen/CudaJacobiKernel.py
                        DEPENDS blockforest timeloop gui )
@@ -34,8 +38,8 @@ waLBerla_compile_test( FILES CudaMPI DEPENDS blockforest timeloop gui )
 
 waLBerla_compile_test( FILES AlignmentTest.cpp DEPENDS blockforest timeloop )
 
-waLBerla_compile_test( FILES codegen/MicroBenchmarkGpuLbm.cpp codegen/MicroBenchmarkGpuLbm.py)
 
-waLBerla_add_executable ( NAME CpuGpuGeneratedEquivalenceTest
-                          FILES codegen/EquivalenceTest.cpp codegen/EquivalenceTest.gen.py
-                          DEPENDS blockforest boundary core cuda field stencil timeloop vtk gui )
+waLBerla_python_file_generates(codegen/MicroBenchmarkGpuLbm.py
+        MicroBenchmarkStreamKernel.cu MicroBenchmarkStreamKernel.h
+        MicroBenchmarkCopyKernel.cu MicroBenchmarkCopyKernel.h)
+waLBerla_compile_test( FILES codegen/MicroBenchmarkGpuLbm.cpp codegen/MicroBenchmarkGpuLbm.py)
diff --git a/tests/cuda/codegen/CudaJacobiKernel.py b/tests/cuda/codegen/CudaJacobiKernel.py
index 14e46d2b6ef017b8728b0106e480c3e56301ac8e..7ec84032a5c7941ddfee67f1484a08dca2014193 100644
--- a/tests/cuda/codegen/CudaJacobiKernel.py
+++ b/tests/cuda/codegen/CudaJacobiKernel.py
@@ -1,20 +1,26 @@
-from pystencils_walberla.sweep import Sweep
+import sympy as sp
+import pystencils as ps
+from pystencils_walberla import CodeGeneration, generate_sweep
 
 
-def jacobi2D(sweep):
-    src = sweep.field("f1")
-    dst = sweep.temporary_field(src)
+with CodeGeneration() as ctx:
+    h = sp.symbols("h")
 
-    dst[0, 0] @= (src[1, 0] + src[-1, 0] + src[0, 1] + src[0, -1]) / (4 * sweep.constant("h") ** 2)
+    # ----- Jacobi 2D - created by specifying weights in nested list --------------------------
+    src, dst = ps.fields("src, src_tmp: [2D]")
+    stencil = [[0, -1, 0],
+               [-1, 4, -1],
+               [0, -1, 0]]
+    assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=4 * h**2)
+    generate_sweep(ctx, 'CudaJacobiKernel2D', assignments, field_swaps=[(src, dst)], target="gpu")
 
+    # ----- Jacobi 3D - created by using kernel_decorator with assignments in '@=' format -----
+    src, dst = ps.fields("src, src_tmp: [3D]")
 
-def jacobi3D(sweep):
-    src = sweep.field("f1")
-    dst = sweep.temporary_field(src)
+    @ps.kernel
+    def kernel_func():
+        dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] +
+                         src[0, 1, 0] + src[0, -1, 0] +
+                         src[0, 0, 1] + src[0, 0, -1]) / (6 * h ** 2)
 
-    dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] + src[0, 1, 0] + src[0, -1, 0] + src[0, 0, 1] + src[0, 0, -1]) / \
-                    (6 * sweep.constant("h") ** 2)
-
-
-Sweep.generate('CudaJacobiKernel2D', jacobi2D, dim=2, target='gpu')
-Sweep.generate('CudaJacobiKernel3D', jacobi3D, dim=3, target='gpu')
+    generate_sweep(ctx, 'CudaJacobiKernel3D', kernel_func, field_swaps=[(src, dst)], target="gpu")
diff --git a/tests/cuda/codegen/EquivalenceTest.cpp b/tests/cuda/codegen/EquivalenceTest.cpp
deleted file mode 100644
index 75bbd273ef043a2b3efecc9c17256d11fec2ad29..0000000000000000000000000000000000000000
--- a/tests/cuda/codegen/EquivalenceTest.cpp
+++ /dev/null
@@ -1,192 +0,0 @@
-#include "core/Environment.h"
-#include "python_coupling/CreateConfig.h"
-#include "blockforest/Initialization.h"
-#include "lbm/field/PdfField.h"
-#include "lbm/field/AddToStorage.h"
-#include "field/FlagField.h"
-#include "field/AddToStorage.h"
-#include "lbm/communication/PdfFieldPackInfo.h"
-#include "lbm/PerformanceLogger.h"
-#include "blockforest/communication/UniformBufferedScheme.h"
-#include "timeloop/all.h"
-#include "core/math/Random.h"
-#include "geometry/all.h"
-#include "cuda/HostFieldAllocator.h"
-#include "cuda/communication/GPUPackInfo.h"
-#include "core/timing/TimingPool.h"
-#include "core/timing/RemainingTimeLogger.h"
-#include "cuda/AddGPUFieldToStorage.h"
-#include "cuda/communication/UniformGPUScheme.h"
-#include "lbm/sweeps/CellwiseSweep.h"
-#include "domain_decomposition/SharedSweep.h"
-
-#include "EquivalenceTest_LatticeModel.h"
-#include "EquivalenceTest_GPUKernel.h"
-#include "EquivalenceTest_GPUPackInfo.h"
-
-using namespace walberla;
-
-using NativeLatticeModel_T = lbm::D3Q19<lbm::collision_model::SRT, false>;
-using GeneratedLatticeModel_T = lbm::EquivalenceTest_LatticeModel;
-
-using Stencil_T = GeneratedLatticeModel_T::Stencil;
-using CommunicationStencil_T = GeneratedLatticeModel_T::CommunicationStencil;
-using NativePdfField_T = lbm::PdfField<NativeLatticeModel_T>;
-using GeneratedPdfField_T = lbm::PdfField<GeneratedLatticeModel_T>;
-
-using flag_t = walberla::uint8_t;
-using FlagField_T = FlagField<flag_t>;
-
-using CpuCommScheme_T = blockforest::communication::UniformBufferedScheme<CommunicationStencil_T>;
-using GpuCommScheme_T = cuda::communication::UniformGPUScheme<CommunicationStencil_T>;
-
-
-template<typename PdfField_T>
-void initPdfField( const shared_ptr<StructuredBlockForest> &blocks, BlockDataID pdfFieldId )
-{
-   auto domainBB = blocks->getDomainCellBB();
-
-   for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
-   {
-      auto pdfField = blockIt->getData<PdfField_T>( pdfFieldId );
-      Cell offset( 0, 0, 0 );
-      blocks->transformBlockLocalToGlobalCell( offset, *blockIt );
-
-      WALBERLA_FOR_ALL_CELLS_XYZ( pdfField,
-         auto globalX = real_c( offset[0] + x );
-         auto globalZ = real_c( offset[2] + z );
-         auto xArg = real_c(std::sin(real_c(globalX) / real_t(4) * real_c(domainBB.size(0)) ));
-         auto zArg = real_c(std::sin(real_c(globalZ) / real_t(4) * real_c(domainBB.size(2)) ));
-         pdfField->setToEquilibrium( x, y, z, Vector3<real_t>( real_t(0.05) * std::sin(xArg), 0,
-                                                               real_t(0.05) * std::cos(zArg)));
-      );
-   }
-}
-
-
-int main( int argc, char **argv )
-{
-   mpi::Environment env( argc, argv );
-
-   for( auto cfg = python_coupling::configBegin( argc, argv ); cfg != python_coupling::configEnd(); ++cfg )
-   {
-      auto config = *cfg;
-      auto parameters = config->getOneBlock( "Parameters" );
-
-      auto blocks = blockforest::createUniformBlockGridFromConfig( config );
-
-      const real_t omega = parameters.getParameter<real_t>( "omega", real_c( 1.4 ));
-      const uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 50 ));
-
-      // Boundary
-      BlockDataID flagFieldId = field::addFlagFieldToStorage<FlagField_T>( blocks, "flag field" );
-      const FlagUID fluidFlagUID( "Fluid" );
-      geometry::setNonBoundaryCellsToDomain<FlagField_T>( *blocks, flagFieldId, fluidFlagUID );
-      GeneratedLatticeModel_T generatedLatticeModel = GeneratedLatticeModel_T( omega );
-
-
-      // Part 1 : Native walberla
-      NativeLatticeModel_T nativeLatticeModel = NativeLatticeModel_T( lbm::collision_model::SRT( omega ));
-      BlockDataID pdfFieldNativeId = lbm::addPdfFieldToStorage( blocks, "pdfNative", nativeLatticeModel, field::fzyx );
-      initPdfField<NativePdfField_T >( blocks, pdfFieldNativeId );
-      CpuCommScheme_T nativeComm( blocks );
-      nativeComm.addPackInfo( make_shared< lbm::PdfFieldPackInfo< NativeLatticeModel_T > >( pdfFieldNativeId ) );
-      auto nativeSweep = lbm::makeCellwiseSweep< NativeLatticeModel_T , FlagField_T >( pdfFieldNativeId, flagFieldId, fluidFlagUID );
-
-      SweepTimeloop nativeTimeLoop( blocks->getBlockStorage(), timesteps );
-      nativeTimeLoop.add() << BeforeFunction( nativeComm, "communication" )
-                           << Sweep(makeSharedSweep(nativeSweep), "native stream collide" );
-      nativeTimeLoop.run();
-
-
-      // Part 2: Generated CPU Version
-      BlockDataID pdfFieldGeneratedId = lbm::addPdfFieldToStorage( blocks, "pdfGenerated", generatedLatticeModel, field::fzyx );
-      initPdfField<GeneratedPdfField_T >( blocks, pdfFieldGeneratedId );
-      CpuCommScheme_T cpuComm( blocks );
-      cpuComm.addPackInfo( make_shared< lbm::PdfFieldPackInfo< GeneratedLatticeModel_T > >( pdfFieldGeneratedId ) );
-      SweepTimeloop cpuTimeLoop( blocks->getBlockStorage(), timesteps );
-      cpuTimeLoop.add() << BeforeFunction( cpuComm, "communication" )
-                        << Sweep(GeneratedLatticeModel_T::Sweep( pdfFieldGeneratedId ), "generated stream collide on cpu" );
-      cpuTimeLoop.run();
-
-
-      // Part 3: Generated GPU Version
-      bool overlapCommunication = parameters.getParameter<bool>( "overlapCommunication", true );
-      bool cudaEnabledMPI = parameters.getParameter<bool>( "cudaEnabledMPI", false );
-      bool oldCommunication = parameters.getParameter<bool>( "oldCommunication", false );
-
-      BlockDataID pdfShadowCPU = lbm::addPdfFieldToStorage( blocks, "cpu shadow field", generatedLatticeModel, field::fzyx );
-      initPdfField<GeneratedPdfField_T >( blocks, pdfShadowCPU );
-
-      BlockDataID pdfGpuFieldId = cuda::addGPUFieldToStorage<GeneratedPdfField_T >( blocks, pdfShadowCPU, "pdfs on gpu", true );
-      auto defaultKernelStream = overlapCommunication ? cuda::StreamRAII::newStream() : cuda::StreamRAII::defaultStream();
-      auto innerKernelStartedEvent = make_shared<cuda::EventRAII>();
-
-      pystencils::EquivalenceTest_GPUKernel cudaLbKernel( pdfGpuFieldId, omega, defaultKernelStream );
-      GpuCommScheme_T gpuComm( blocks, innerKernelStartedEvent, cudaEnabledMPI );
-      gpuComm.addPackInfo( make_shared<pystencils::EquivalenceTest_GPUPackInfo>( pdfGpuFieldId ));
-      auto runCommunication = [&]() { gpuComm(); };
-
-      CpuCommScheme_T oldGpuScheme( blocks );
-
-      std::vector<cudaStream_t > streams;
-      for(uint_t i=0; i < Stencil_T::Size; ++i ) {
-         cudaStream_t s;
-         cudaStreamCreate(&s);
-         streams.push_back(s);
-      }
-      using OldPackInfo = cuda::communication::GPUPackInfo<cuda::GPUField<real_t> >;
-      oldGpuScheme.addPackInfo( make_shared<OldPackInfo>(pdfGpuFieldId, streams) );
-
-
-      SweepTimeloop gpuTimeLoop( blocks->getBlockStorage(), timesteps );
-      if( !overlapCommunication )
-      {
-         gpuTimeLoop.add() << (oldCommunication ? BeforeFunction(oldGpuScheme) :
-                                                  BeforeFunction( runCommunication, "gpu communication" ))
-                           << Sweep( cudaLbKernel, "LB stream & collide gpu" );
-      }
-      else
-      {
-         gpuTimeLoop.add() << Sweep( [&]( IBlock *b )
-                                  {
-                                     cudaEventRecord( *innerKernelStartedEvent, defaultKernelStream );
-                                     cudaLbKernel.inner( b );
-                                  }, "LBM @ inner" );
-         gpuTimeLoop.add() << BeforeFunction( runCommunication, "gpu communication" )
-                           << Sweep( [&]( IBlock *b ) { cudaLbKernel.outer( b ); }, "LBM @ outer" );
-      }
-      gpuTimeLoop.run();
-      cuda::fieldCpy<GeneratedPdfField_T, cuda::GPUField<real_t>> (blocks, pdfShadowCPU, pdfGpuFieldId);
-
-      // Compare all three versions
-      auto errorCPU = real_t(0);
-      auto errorGPU = real_t(0);
-
-      for( auto & block : *blocks )
-      {
-         auto native = block.getData<NativePdfField_T>( pdfFieldNativeId );
-         auto cpu = block.getData<GeneratedPdfField_T >( pdfFieldGeneratedId );
-         auto gpu = block.getData<GeneratedPdfField_T>( pdfShadowCPU );
-
-         WALBERLA_FOR_ALL_CELLS_XYZ(native,
-            for(cell_idx_t f = 0; f < cell_idx_c(NativeLatticeModel_T::Stencil::Q); ++f )
-            {
-               errorCPU += std::abs( native->get( x, y, z, f ) - cpu->get( x, y, z, f ));
-               errorGPU += std::abs( native->get( x, y, z, f ) - gpu->get( x, y, z, f ));
-            }
-         )
-      }
-      mpi::reduceInplace(errorCPU, mpi::SUM);
-      mpi::reduceInplace(errorGPU, mpi::SUM);
-      auto domainBB = blocks->getDomainCellBB();
-      errorCPU /= real_c(domainBB.numCells());
-      errorGPU /= real_c(domainBB.numCells());
-      WALBERLA_LOG_RESULT_ON_ROOT("CPU Error " << errorCPU );
-      WALBERLA_LOG_RESULT_ON_ROOT("GPU Error " << errorGPU );
-      WALBERLA_CHECK_FLOAT_EQUAL(errorCPU, real_c(0.0));
-      WALBERLA_CHECK_FLOAT_EQUAL(errorGPU, real_c(0.0));
-   }
-
-   return 0;
-}
\ No newline at end of file
diff --git a/tests/cuda/codegen/EquivalenceTest.gen.py b/tests/cuda/codegen/EquivalenceTest.gen.py
deleted file mode 100644
index 43140ca53ee3396456d6c17d591b4e1cf0e2deb0..0000000000000000000000000000000000000000
--- a/tests/cuda/codegen/EquivalenceTest.gen.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import sympy as sp
-from lbmpy_walberla import generate_lattice_model_files
-from lbmpy.creationfunctions import create_lb_update_rule
-from pystencils_walberla.sweep import Sweep
-
-dtype = 'float64'
-
-# LB options
-options = {
-    'method': 'srt',
-    'stencil': 'D3Q19',
-    'relaxation_rate': sp.Symbol("omega"),
-    'field_name': 'pdfs',
-    'compressible': False,
-    'maxwellian_moments': False,
-    'temporary_field_name': 'pdfs_tmp',
-    'optimization': {'cse_global': True,
-                     'cse_pdfs': True,
-                     'double_precision': dtype == 'float64'}
-}
-
-# GPU optimization options
-opt =       {'gpu_indexing_params': {'block_size': (128, 1, 1)},  'data_type': dtype}
-outer_opt = {'gpu_indexing_params': {'block_size': (32, 32, 32)}, 'data_type': dtype}
-
-
-def lb_assignments():
-    ur = create_lb_update_rule(**options)
-    return ur.all_assignments
-
-
-generate_lattice_model_files(class_name='EquivalenceTest_LatticeModel', **options)
-
-Sweep.generate_inner_outer_kernel('EquivalenceTest_GPUKernel',
-                                  lambda: create_lb_update_rule(**options).all_assignments,
-                                  target='gpu',
-                                  temporary_fields=['pdfs_tmp'],
-                                  field_swaps=[('pdfs', 'pdfs_tmp')],
-                                  optimization=opt,
-                                  outer_optimization=outer_opt)
-
-Sweep.generate_pack_info('EquivalenceTest_GPUPackInfo', lb_assignments, target='gpu')
diff --git a/tests/cuda/codegen/MicroBenchmarkGpuLbm.py b/tests/cuda/codegen/MicroBenchmarkGpuLbm.py
index b722f0510b95e9b8e01d6bddd5947ef97f2a74c9..298727b46c428384eeef7f755e8bfe4881d53d60 100644
--- a/tests/cuda/codegen/MicroBenchmarkGpuLbm.py
+++ b/tests/cuda/codegen/MicroBenchmarkGpuLbm.py
@@ -1,27 +1,22 @@
 import pystencils as ps
-from pystencils_walberla.sweep import Sweep
 from lbmpy.updatekernels import create_stream_pull_only_kernel
 from lbmpy.stencils import get_stencil
+from pystencils_walberla import CodeGeneration, generate_sweep
 
-dtype = 'float64'
-f_size = 19
+with CodeGeneration() as ctx:
+    f_size = 19
+    dtype = 'float64' if ctx.double_accuracy else 'float32'
 
-
-def copy_only():
+    # Copy sweep
     src, dst = ps.fields("src({f_size}), dst({f_size}) : {dtype}[3D]".format(dtype=dtype, f_size=f_size),
                          layout='fzyx')
-    return [ps.Assignment(dst(i), src(i)) for i in range(f_size)]
-
+    copy_only = [ps.Assignment(dst(i), src(i)) for i in range(f_size)]
+    generate_sweep(ctx, 'MicroBenchmarkCopyKernel', copy_only,
+                   target='gpu', gpu_indexing_params={'block_size': (128, 1, 1)})
 
-def stream_only():
+    # Stream-only sweep
     stencil = get_stencil("D3Q19")
-    return create_stream_pull_only_kernel(stencil, src_field_name='src',
-                                          dst_field_name='dst',
-                                          generic_field_type=dtype,
-                                          generic_layout='fzyx')
-
-
-opt = {'gpu_indexing_params': {'block_size': (128, 1, 1)}, 'data_type': dtype}
-
-Sweep.generate_from_equations('MicroBenchmarkCopyKernel', copy_only, target='gpu', optimization=opt)
-Sweep.generate_from_equations('MicroBenchmarkStreamKernel', stream_only, target='gpu', optimization=opt)
+    stream_only = create_stream_pull_only_kernel(stencil, src_field_name='src', dst_field_name='dst',
+                                                 generic_field_type=dtype, generic_layout='fzyx')
+    generate_sweep(ctx, 'MicroBenchmarkStreamKernel', stream_only,
+                   target='gpu', gpu_indexing_params={'block_size': (128, 1, 1)})
diff --git a/tests/field/CMakeLists.txt b/tests/field/CMakeLists.txt
index e22e9fe331e54008f42d328b799123c3ea5b3960..57114a6d1acda0386de2cdf7da480b707f244624 100644
--- a/tests/field/CMakeLists.txt
+++ b/tests/field/CMakeLists.txt
@@ -60,6 +60,9 @@ endif( WALBERLA_BUILD_WITH_MPI )
 
 # CodeGen Tests
 
+waLBerla_python_file_generates(codegen/JacobiKernel.py
+                                  JacobiKernel2D.cpp JacobiKernel2D.h
+                                  JacobiKernel3D.cpp JacobiKernel3D.h)
 waLBerla_compile_test( FILES codegen/CodegenJacobiCPU.cpp codegen/JacobiKernel.py
                        DEPENDS gui timeloop )
 waLBerla_execute_test( NAME CodegenJacobiCPU )
diff --git a/tests/field/codegen/JacobiKernel.py b/tests/field/codegen/JacobiKernel.py
index bcdc4c72e5f3999fb66ccc98bc827fbeb9991eac..b375d5447c4009a0527580853c242c545a6a2e71 100644
--- a/tests/field/codegen/JacobiKernel.py
+++ b/tests/field/codegen/JacobiKernel.py
@@ -1,16 +1,26 @@
-from pystencils_walberla.sweep import Sweep
+import sympy as sp
+import pystencils as ps
+from pystencils_walberla import CodeGeneration, generate_sweep
 
-def jacobi2D(sweep):
-    src = sweep.field("f1")
-    dst = sweep.temporaryField(src)
 
-    dst[0, 0] @= (src[1, 0] + src[-1, 0] + src[0, 1] + src[0, -1]) / (4 * S.h ** 2)
+with CodeGeneration() as ctx:
+    h = sp.symbols("h")
 
-def jacobi3D(sweep):
-    src = sweep.field("f1")
-    dst = sweep.temporaryField(src)
+    # ----- Jacobi 2D - created by specifying weights in nested list --------------------------
+    src, dst = ps.fields("src, src_tmp: [2D]")
+    stencil = [[0, -1, 0],
+               [-1, 4, -1],
+               [0, -1, 0]]
+    assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=4 * h**2)
+    generate_sweep(ctx, 'JacobiKernel2D', assignments, field_swaps=[(src, dst)])
 
-    dst[0,0,0] @= (src[1,0,0] + src[-1,0,0] + src[0,1,0] + src[0, -1, 0] + src[0, 0, 1] + src[0, 0 , -1] ) / (6 * S.h**2)
+    # ----- Jacobi 3D - created by using kernel_decorator with assignments in '@=' format -----
+    src, dst = ps.fields("src, src_tmp: [3D]")
 
-Sweep.generate('JacobiKernel2D', jacobi2D, dim=2)
-Sweep.generate('JacobiKernel3D', jacobi3D, dim=3)
\ No newline at end of file
+    @ps.kernel
+    def kernel_func():
+        dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] +
+                         src[0, 1, 0] + src[0, -1, 0] +
+                         src[0, 0, 1] + src[0, 0, -1]) / (6 * h ** 2)
+
+    generate_sweep(ctx, 'JacobiKernel3D', kernel_func, field_swaps=[(src, dst)])
diff --git a/tests/lbm/CMakeLists.txt b/tests/lbm/CMakeLists.txt
index 6593cbabf44ae16398df21d79048d637bcaa47aa..795636733fa71b846d3e954429169e6f1e0c28a1 100644
--- a/tests/lbm/CMakeLists.txt
+++ b/tests/lbm/CMakeLists.txt
@@ -64,5 +64,9 @@ waLBerla_execute_test( NAME PdfFieldInitializerTest COMMAND $<TARGET_FILE:PdfFie
 
 
 # Code Generation
-waLBerla_compile_test( FILES codegen/SrtWithForceFieldModel.gen.py
+waLBerla_python_file_generates(codegen/SrtWithForceFieldModel.py
+        SrtWithForceFieldModel.cpp SrtWithForceFieldModel.h
+        MyNoSlip.cpp MyNoSlip.h
+        MyUBB.cpp MyUBB.h)
+waLBerla_compile_test( FILES codegen/SrtWithForceFieldModel.py
                              codegen/SrtWithForceField.cpp )
diff --git a/tests/lbm/codegen/SrtWithForceFieldModel.gen.py b/tests/lbm/codegen/SrtWithForceFieldModel.gen.py
deleted file mode 100644
index 72e10eb354141be9d2e2ec55438a22d27d8bffe5..0000000000000000000000000000000000000000
--- a/tests/lbm/codegen/SrtWithForceFieldModel.gen.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import sympy as sp
-from lbmpy.boundaries import NoSlip, UBB
-from lbmpy_walberla import generate_lattice_model_files, RefinementScaling
-from lbmpy.creationfunctions import create_lb_method
-from lbmpy_walberla.boundary import create_boundary_class
-from pystencils_walberla.cmake_integration import codegen
-import pystencils as ps
-
-# ------------- Lattice Model ------------------------------
-force_field = ps.fields("force(3): [3D]", layout='fzyx')
-
-omega = sp.Symbol("omega")
-
-scaling = RefinementScaling()
-scaling.add_standard_relaxation_rate_scaling(omega)
-scaling.add_force_scaling(force_field)
-
-generate_lattice_model_files(class_name='SrtWithForceFieldModel',
-                             method='srt', stencil='D3Q19', force_model='guo', force=force_field.center_vector,
-                             relaxation_rates=[omega], refinement_scaling=scaling)
-
-
-def genBoundary():
-    boundary = UBB([0.05, 0, 0], dim=3, name="MyUBB")
-    method = create_lb_method(stencil='D3Q19', method='srt')
-    return create_boundary_class(boundary, method)
-
-
-def genNoSlip():
-    boundary = NoSlip(name='MyNoSlip')
-    method = create_lb_method(stencil='D3Q19', method='srt')
-    return create_boundary_class(boundary, method)
-
-
-codegen.register(['MyUBB.h', 'MyUBB.cpp'], genBoundary)
-codegen.register(['MyNoSlip.h', 'MyNoSlip.cpp'], genNoSlip)
diff --git a/tests/lbm/codegen/SrtWithForceFieldModel.py b/tests/lbm/codegen/SrtWithForceFieldModel.py
new file mode 100644
index 0000000000000000000000000000000000000000..f68ec173ffeb81a4c36c70016b18fa703a4aa664
--- /dev/null
+++ b/tests/lbm/codegen/SrtWithForceFieldModel.py
@@ -0,0 +1,24 @@
+import sympy as sp
+import pystencils as ps
+from lbmpy.creationfunctions import create_lb_method
+from lbmpy.boundaries import NoSlip, UBB
+from pystencils_walberla import CodeGeneration
+from lbmpy_walberla import generate_lattice_model, RefinementScaling, generate_boundary
+
+
+with CodeGeneration() as ctx:
+    omega = sp.Symbol("omega")
+    force_field = ps.fields("force(3): [3D]", layout='fzyx')
+
+    # lattice Boltzmann method
+    lb_method = create_lb_method(stencil='D3Q19', method='srt', relaxation_rates=[omega],
+                                 force_model='guo', force=force_field.center_vector)
+
+    scaling = RefinementScaling()
+    scaling.add_standard_relaxation_rate_scaling(omega)
+    scaling.add_force_scaling(force_field)
+
+    # generate components
+    generate_lattice_model(ctx, 'SrtWithForceFieldModel', lb_method, refinement_scaling=scaling)
+    generate_boundary(ctx, 'MyUBB', UBB([0.05, 0, 0]), lb_method)
+    generate_boundary(ctx, 'MyNoSlip', NoSlip(), lb_method)