From b88d44d47e24114838d3480845270c7274a07c29 Mon Sep 17 00:00:00 2001
From: Martin Bauer <martin.bauer@fau.de>
Date: Tue, 22 Jan 2019 16:43:11 +0100
Subject: [PATCH] Warning fixes in GPU communication & benchmark

---
 .../UniformGridGPU_LatticeModel.cpp           |   69 +-
 .../UniformGridGPU_LatticeModel.h             |    9 +
 .../UniformGridGPU/UniformGridGPU_LbKernel.cu |   44 +-
 .../UniformGridGPU/UniformGridGPU_NoSlip.cu   |   19 +-
 .../UniformGridGPU/UniformGridGPU_NoSlip.h    |    2 +-
 .../UniformGridGPU/UniformGridGPU_PackInfo.cu | 1158 ++++++++---------
 .../UniformGridGPU/UniformGridGPU_PackInfo.h  |    2 +-
 .../UniformGridGPU/UniformGridGPU_UBB.cu      |   19 +-
 .../UniformGridGPU/UniformGridGPU_UBB.h       |    2 +-
 .../communication/UniformGPUScheme.impl.h     |    4 +-
 10 files changed, 691 insertions(+), 637 deletions(-)

diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.cpp b/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.cpp
index 0a288ebbb..20712a5bc 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.cpp
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.cpp
@@ -30,6 +30,21 @@
 
 #define FUNC_PREFIX
 
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wshadow"
+#endif
+
+#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#pragma clang diagnostic ignored "-Wshadow"
+#endif
+
+
 using namespace std;
 
 namespace walberla {
@@ -458,15 +473,15 @@ void UniformGridGPU_LatticeModel::Sweep::streamCollide( IBlock * block, const ui
 
     auto & omega = lm.omega;
     WALBERLA_ASSERT_GREATER_EQUAL(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -int_c(pdfs->nrOfGhostLayers()));
-    double * const _data_pdfs = pdfs->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, 0);
+    double * const _data_pdfs = pdfs->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, 0, 0, 0);
     WALBERLA_ASSERT_GREATER_EQUAL(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
-    double * _data_pdfs_tmp = pdfs_tmp->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(pdfs->xSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_0 = int64_t(pdfs->xSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(pdfs->ySize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_1 = int64_t(pdfs->ySize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(pdfs->zSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_2 = int64_t(pdfs->zSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
+    double * _data_pdfs_tmp = pdfs_tmp->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, 0, 0, 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
+    const int64_t _size_pdfs_0 = int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
+    const int64_t _size_pdfs_1 = int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
+    const int64_t _size_pdfs_2 = int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
     const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
     const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
     const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
@@ -486,13 +501,13 @@ void UniformGridGPU_LatticeModel::Sweep::collide( IBlock * block, const uint_t n
 
     auto & omega = lm.omega;
     WALBERLA_ASSERT_GREATER_EQUAL(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -int_c(pdfs->nrOfGhostLayers()));
-    double * _data_pdfs = pdfs->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(pdfs->xSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_0 = int64_t(pdfs->xSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(pdfs->ySize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_1 = int64_t(pdfs->ySize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(pdfs->zSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_2 = int64_t(pdfs->zSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
+    double * _data_pdfs = pdfs->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, 0, 0, 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
+    const int64_t _size_pdfs_0 = int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
+    const int64_t _size_pdfs_1 = int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
+    const int64_t _size_pdfs_2 = int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
     const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
     const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
     const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
@@ -519,15 +534,15 @@ void UniformGridGPU_LatticeModel::Sweep::stream( IBlock * block, const uint_t nu
 
 
     WALBERLA_ASSERT_GREATER_EQUAL(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -int_c(pdfs->nrOfGhostLayers()));
-    double * const _data_pdfs = pdfs->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, 0);
+    double * const _data_pdfs = pdfs->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, 0, 0, 0);
     WALBERLA_ASSERT_GREATER_EQUAL(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
-    double * _data_pdfs_tmp = pdfs_tmp->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(pdfs->xSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_0 = int64_t(pdfs->xSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(pdfs->ySize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_1 = int64_t(pdfs->ySize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(pdfs->zSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
-    const int64_t _size_pdfs_2 = int64_t(pdfs->zSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
+    double * _data_pdfs_tmp = pdfs_tmp->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, 0, 0, 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
+    const int64_t _size_pdfs_0 = int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
+    const int64_t _size_pdfs_1 = int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2));
+    const int64_t _size_pdfs_2 = int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2);
     const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
     const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
     const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
@@ -569,3 +584,11 @@ mpi::RecvBuffer & operator>> (mpi::RecvBuffer & buf, ::walberla::lbm::UniformGri
 
 } // namespace mpi
 } // namespace walberla
+
+#ifdef WALBERLA_CXX_COMPILER_IS_GNU
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
+#pragma clang diagnostic pop
+#endif
\ No newline at end of file
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.h b/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.h
index f48737a15..02a6c7cf8 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.h
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.h
@@ -50,6 +50,11 @@
 #pragma GCC diagnostic ignored "-Wunused-parameter"
 #endif
 
+#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#endif
 
 
 
@@ -734,4 +739,8 @@ struct ShearRate<UniformGridGPU_LatticeModel>
 
 #ifdef WALBERLA_CXX_COMPILER_IS_GNU
 #pragma GCC diagnostic pop
+#endif
+
+#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
+#pragma clang diagnostic pop
 #endif
\ No newline at end of file
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.cu b/apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.cu
index 12b238db3..a650f8b3e 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.cu
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.cu
@@ -169,15 +169,15 @@ void UniformGridGPU_LbKernel::operator() ( IBlock * block , cudaStream_t stream
     }
 
     WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs->nrOfGhostLayers()));
-    double * const _data_pdfs = pdfs->dataAt(-1, -1, -1, 0);
+    double * const _data_pdfs = pdfs->dataAt(-1, 0, 0, 0);
     WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs_tmp->nrOfGhostLayers()));
-    double * _data_pdfs_tmp = pdfs_tmp->dataAt(-1, -1, -1, 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(pdfs->xSize() + 2));
-    const int64_t _size_pdfs_0 = int64_t(pdfs->xSize() + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(pdfs->ySize() + 2));
-    const int64_t _size_pdfs_1 = int64_t(pdfs->ySize() + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(pdfs->zSize() + 2));
-    const int64_t _size_pdfs_2 = int64_t(pdfs->zSize() + 2);
+    double * _data_pdfs_tmp = pdfs_tmp->dataAt(-1, 0, 0, 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->xSize()) + 2));
+    const int64_t _size_pdfs_0 = int64_t(cell_idx_c(pdfs->xSize()) + 2);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->ySize()) + 2));
+    const int64_t _size_pdfs_1 = int64_t(cell_idx_c(pdfs->ySize()) + 2);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->zSize()) + 2));
+    const int64_t _size_pdfs_2 = int64_t(cell_idx_c(pdfs->zSize()) + 2);
     const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
     const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
     const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
@@ -219,12 +219,12 @@ void UniformGridGPU_LbKernel::inner( IBlock * block , cudaStream_t stream )
     WALBERLA_ASSERT_GREATER_EQUAL(inner.yMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
     WALBERLA_ASSERT_GREATER_EQUAL(inner.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
     double * _data_pdfs_tmp = pdfs_tmp->dataAt(inner.xMin() - 1, inner.yMin() - 1, inner.zMin() - 1, 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(inner.xSize() + 2));
-    const int64_t _size_pdfs_0 = int64_t(inner.xSize() + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(inner.ySize() + 2));
-    const int64_t _size_pdfs_1 = int64_t(inner.ySize() + 2);
-    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(inner.zSize() + 2));
-    const int64_t _size_pdfs_2 = int64_t(inner.zSize() + 2);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(inner.xSize()) + 2));
+    const int64_t _size_pdfs_0 = int64_t(cell_idx_c(inner.xSize()) + 2);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(inner.ySize()) + 2));
+    const int64_t _size_pdfs_1 = int64_t(cell_idx_c(inner.ySize()) + 2);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(inner.zSize()) + 2));
+    const int64_t _size_pdfs_2 = int64_t(cell_idx_c(inner.zSize()) + 2);
     const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
     const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
     const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
@@ -284,20 +284,20 @@ void UniformGridGPU_LbKernel::outer( IBlock * block , cudaStream_t stream  )
         for( auto & ci: layers )
         {
             parallelSection_.run([&]( auto s ) {
+                WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
                 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
                 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
-                WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
                 double * const _data_pdfs = pdfs->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
+                WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
                 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
                 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
-                WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
                 double * _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
-                WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 2));
-                const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 2);
-                WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 2));
-                const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 2);
-                WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 2));
-                const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 2);
+                WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2));
+                const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 2);
+                WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2));
+                const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 2);
+                WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2));
+                const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 2);
                 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
                 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
                 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.cu b/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.cu
index ca898b65c..acabe1d0c 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.cu
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.cu
@@ -40,6 +40,12 @@ namespace lbm {
 #pragma GCC diagnostic ignored "-Wconversion"
 #endif
 
+#ifdef __CUDACC__
+#pragma push
+#pragma diag_suppress = declared_but_not_referenced
+#endif
+
+
 namespace internal_boundary_UniformGridGPU_NoSlip {
 static FUNC_PREFIX void boundary_UniformGridGPU_NoSlip(uint8_t * const _data_indexVector, double * _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t indexVectorSize)
 {
@@ -63,9 +69,9 @@ static FUNC_PREFIX void boundary_UniformGridGPU_NoSlip(uint8_t * const _data_ind
       
       uint8_t * const _data_indexVector_112 = _data_indexVector + 12;
       const int32_t dir = *((int32_t *)(& _data_indexVector_112[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
-      double * _data_pdfs_1ACA00C755A3ABE3 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*cy[dir] + _stride_pdfs_2*z + _stride_pdfs_2*cz[dir] + _stride_pdfs_3*invdir[dir];
-      double * _data_pdfs_10_20_m7D57D887F63BE1DF = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir;
-      _data_pdfs_1ACA00C755A3ABE3[_stride_pdfs_0*x + _stride_pdfs_0*cx[dir]] = _data_pdfs_10_20_m7D57D887F63BE1DF[_stride_pdfs_0*x];
+      double * _data_pdfsf9cc34cc4e2b6261 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*cy[dir] + _stride_pdfs_2*z + _stride_pdfs_2*cz[dir] + _stride_pdfs_3*invdir[dir];
+      double * _data_pdfs_10_2011ac6bf6446d4afa = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir;
+      _data_pdfsf9cc34cc4e2b6261[_stride_pdfs_0*x + _stride_pdfs_0*cx[dir]] = _data_pdfs_10_2011ac6bf6446d4afa[_stride_pdfs_0*x];
    } 
 }
 }
@@ -74,6 +80,10 @@ static FUNC_PREFIX void boundary_UniformGridGPU_NoSlip(uint8_t * const _data_ind
 #pragma GCC diagnostic pop
 #endif
 
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
 
 void UniformGridGPU_NoSlip::run( IBlock * block, IndexVectors::Type type , cudaStream_t stream )
 {
@@ -118,4 +128,5 @@ void UniformGridGPU_NoSlip::outer( IBlock * block, cudaStream_t stream  )
 
 
 } // namespace lbm
-} // namespace walberla
\ No newline at end of file
+} // namespace walberla
+
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.h b/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.h
index 536a99a66..fa64a1984 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.h
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.h
@@ -87,7 +87,7 @@ public:
         void syncGPU()
         {
             gpuVectors_.resize( cpuVectors_.size() );
-            for(int i=0; i < NUM_TYPES; ++i )
+            for(size_t i=0; i < size_t(NUM_TYPES); ++i )
             {
                 auto & gpuVec = gpuVectors_[i];
                 auto & cpuVec = cpuVectors_[i];
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.cu b/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.cu
index 604896ce2..fcd3df807 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.cu
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.cu
@@ -16,160 +16,152 @@ using walberla::stencil::Direction;
 
 
 
-namespace internal_pack_SW {
-static FUNC_PREFIX void pack_SW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_pack_T {
+static FUNC_PREFIX void pack_T(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_35[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0];
    } 
 }
 }
 
-namespace internal_pack_S {
-static FUNC_PREFIX void pack_S(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_pack_TN {
+static FUNC_PREFIX void pack_TN(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_32[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0];
    } 
 }
 }
 
-namespace internal_pack_W {
-static FUNC_PREFIX void pack_W(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_pack_N {
+static FUNC_PREFIX void pack_N(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_31[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0];
       double * const _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0];
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0];
    } 
 }
 }
 
-namespace internal_pack_B {
-static FUNC_PREFIX void pack_B(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_pack_TE {
+static FUNC_PREFIX void pack_TE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_36[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0];
    } 
 }
 }
 
-namespace internal_pack_T {
-static FUNC_PREFIX void pack_T(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_pack_E {
+static FUNC_PREFIX void pack_E(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_35[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0];
       double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0];
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0];
    } 
 }
 }
 
-namespace internal_pack_BN {
-static FUNC_PREFIX void pack_BN(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_pack_NW {
+static FUNC_PREFIX void pack_NW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0];
    } 
 }
 }
 
-namespace internal_pack_N {
-static FUNC_PREFIX void pack_N(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_pack_W {
+static FUNC_PREFIX void pack_W(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0];
       double * const _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_31[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0];
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0];
    } 
 }
 }
 
-namespace internal_pack_TE {
-static FUNC_PREFIX void pack_TE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_pack_BW {
+static FUNC_PREFIX void pack_BW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0];
    } 
 }
 }
 
-namespace internal_pack_E {
-static FUNC_PREFIX void pack_E(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_pack_B {
+static FUNC_PREFIX void pack_B(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
@@ -178,14 +170,14 @@ static FUNC_PREFIX void pack_E(double * _data_buffer, double * const _data_pdfs,
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
       double * const _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
       _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0];
-      double * const _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_36[_stride_pdfs_0*ctr_0];
    } 
 }
 }
@@ -204,44 +196,52 @@ static FUNC_PREFIX void pack_BE(double * _data_buffer, double * const _data_pdfs
 }
 }
 
-namespace internal_pack_SE {
-static FUNC_PREFIX void pack_SE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_pack_BN {
+static FUNC_PREFIX void pack_BN(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0];
    } 
 }
 }
 
-namespace internal_pack_C {
-static FUNC_PREFIX void pack_C(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2)
+namespace internal_pack_SW {
+static FUNC_PREFIX void pack_SW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0];
    } 
 }
 }
 
-namespace internal_pack_TN {
-static FUNC_PREFIX void pack_TN(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_pack_S {
+static FUNC_PREFIX void pack_S(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_32[_stride_pdfs_0*ctr_0];
    } 
 }
 }
@@ -260,232 +260,224 @@ static FUNC_PREFIX void pack_TS(double * _data_buffer, double * const _data_pdfs
 }
 }
 
-namespace internal_pack_NE {
-static FUNC_PREFIX void pack_NE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_pack_TW {
+static FUNC_PREFIX void pack_TW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0];
    } 
 }
 }
 
-namespace internal_pack_BW {
-static FUNC_PREFIX void pack_BW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_pack_SE {
+static FUNC_PREFIX void pack_SE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0];
    } 
 }
 }
 
-namespace internal_pack_NW {
-static FUNC_PREFIX void pack_NW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_pack_NE {
+static FUNC_PREFIX void pack_NE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
    } 
 }
 }
 
-namespace internal_pack_BS {
-static FUNC_PREFIX void pack_BS(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_pack_C {
+static FUNC_PREFIX void pack_C(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0];
    } 
 }
 }
 
-namespace internal_pack_TW {
-static FUNC_PREFIX void pack_TW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_pack_BS {
+static FUNC_PREFIX void pack_BS(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0];
+      double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
+      _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0];
    } 
 }
 }
 
 
 
-namespace internal_unpack_NE {
-static FUNC_PREFIX void unpack_NE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_unpack_B {
+static FUNC_PREFIX void unpack_B(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
+      double * _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
+      _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
+      double * _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
+      _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
+      double * _data_pdfs_10_20_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
+      _data_pdfs_10_20_35[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
+      double * _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
+      _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
+      double * _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
+      _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
    } 
 }
 }
 
-namespace internal_unpack_N {
-static FUNC_PREFIX void unpack_N(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_unpack_BS {
+static FUNC_PREFIX void unpack_BS(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
-      double * _data_pdfs_10_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
-      _data_pdfs_10_20_32[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
-      double * _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
-      double * _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
-      double * _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
+      double * _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
+      _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
    } 
 }
 }
 
-namespace internal_unpack_E {
-static FUNC_PREFIX void unpack_E(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_unpack_S {
+static FUNC_PREFIX void unpack_S(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
+      double * _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
+      _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
+      double * _data_pdfs_10_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
+      _data_pdfs_10_20_31[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
+      double * _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
+      double * _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
+      _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
       double * _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
-      double * _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
-      _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
-      double * _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
-      double * _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
-      _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
+      _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
    } 
 }
 }
 
-namespace internal_unpack_T {
-static FUNC_PREFIX void unpack_T(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_unpack_BW {
+static FUNC_PREFIX void unpack_BW(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
-      _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
-      double * _data_pdfs_10_20_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
-      _data_pdfs_10_20_36[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
-      double * _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
-      double * _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
-      double * _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
+      double * _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
+      _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
    } 
 }
 }
 
-namespace internal_unpack_B {
-static FUNC_PREFIX void unpack_B(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_unpack_W {
+static FUNC_PREFIX void unpack_W(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
-      _data_pdfs_10_20_35[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
-      double * _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
-      double * _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
-      _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
+      double * _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
+      _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
       double * _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
-      double * _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
+      _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
+      double * _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
+      double * _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
+      double * _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
+      _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
    } 
 }
 }
 
-namespace internal_unpack_TS {
-static FUNC_PREFIX void unpack_TS(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_unpack_SE {
+static FUNC_PREFIX void unpack_SE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
+      double * _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
+      _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
    } 
 }
 }
 
-namespace internal_unpack_S {
-static FUNC_PREFIX void unpack_S(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_unpack_E {
+static FUNC_PREFIX void unpack_E(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
+      double * _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
+      double * _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
+      _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
+      double * _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
+      _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
+      double * _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
+      _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
       double * _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
-      double * _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
-      _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
-      double * _data_pdfs_10_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
-      _data_pdfs_10_20_31[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
-      double * _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
+      _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
    } 
 }
 }
 
-namespace internal_unpack_BW {
-static FUNC_PREFIX void unpack_BW(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_unpack_TE {
+static FUNC_PREFIX void unpack_TE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
+      double * _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
+      _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
    } 
 }
 }
 
-namespace internal_unpack_W {
-static FUNC_PREFIX void unpack_W(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_unpack_T {
+static FUNC_PREFIX void unpack_T(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
@@ -494,14 +486,14 @@ static FUNC_PREFIX void unpack_W(double * const _data_buffer, double * _data_pdf
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
       double * _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
       _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
-      double * _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
-      double * _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
-      double * _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
-      _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
-      double * _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
-      _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
+      double * _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
+      _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
+      double * _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
+      _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
+      double * _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
+      _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
+      double * _data_pdfs_10_20_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
+      _data_pdfs_10_20_36[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
    } 
 }
 }
@@ -520,44 +512,52 @@ static FUNC_PREFIX void unpack_TW(double * const _data_buffer, double * _data_pd
 }
 }
 
-namespace internal_unpack_NW {
-static FUNC_PREFIX void unpack_NW(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_unpack_TS {
+static FUNC_PREFIX void unpack_TS(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
-      _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
+      double * _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
+      _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
    } 
 }
 }
 
-namespace internal_unpack_C {
-static FUNC_PREFIX void unpack_C(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2)
+namespace internal_unpack_NE {
+static FUNC_PREFIX void unpack_NE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2;
-      _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
+      double * _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
    } 
 }
 }
 
-namespace internal_unpack_BS {
-static FUNC_PREFIX void unpack_BS(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_unpack_N {
+static FUNC_PREFIX void unpack_N(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
-      _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
+      double * _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
+      _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
+      double * _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
+      _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
+      double * _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];
+      double * _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
+      _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3];
+      double * _data_pdfs_10_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
+      _data_pdfs_10_20_32[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4];
    } 
 }
 }
@@ -576,72 +576,72 @@ static FUNC_PREFIX void unpack_BN(double * const _data_buffer, double * _data_pd
 }
 }
 
-namespace internal_unpack_SW {
-static FUNC_PREFIX void unpack_SW(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_unpack_BE {
+static FUNC_PREFIX void unpack_BE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
-      _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
+      double * _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
+      _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
    } 
 }
 }
 
-namespace internal_unpack_TE {
-static FUNC_PREFIX void unpack_TE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_unpack_NW {
+static FUNC_PREFIX void unpack_NW(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
-      _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
+      double * _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
+      _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
    } 
 }
 }
 
-namespace internal_unpack_SE {
-static FUNC_PREFIX void unpack_SE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_unpack_SW {
+static FUNC_PREFIX void unpack_SW(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
-      _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
+      double * _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
+      _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
    } 
 }
 }
 
-namespace internal_unpack_TN {
-static FUNC_PREFIX void unpack_TN(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_unpack_C {
+static FUNC_PREFIX void unpack_C(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
-      _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
+      double * _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2;
+      _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
    } 
 }
 }
 
-namespace internal_unpack_BE {
-static FUNC_PREFIX void unpack_BE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
+namespace internal_unpack_TN {
+static FUNC_PREFIX void unpack_TN(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
 {
    if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
    {
       const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
       const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
       const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
-      double * _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
-      _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
+      double * _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
+      _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x];
    } 
 }
 }
@@ -660,226 +660,226 @@ void UniformGridGPU_PackInfo::pack(Direction dir, unsigned char * byte_buffer, I
 
     switch( dir )
     {
-        case stencil::SW:
+        case stencil::T:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_SW::pack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_pack_T::pack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::S:
+        case stencil::TN:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_S::pack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_pack_TN::pack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::W:
+        case stencil::N:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_W::pack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_pack_N::pack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::B:
+        case stencil::TE:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_B::pack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_pack_TE::pack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::T:
+        case stencil::E:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_T::pack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_pack_E::pack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::BN:
+        case stencil::NW:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_BN::pack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_pack_NW::pack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::N:
+        case stencil::W:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_N::pack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_pack_W::pack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::TE:
+        case stencil::BW:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_TE::pack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_pack_BW::pack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::E:
+        case stencil::B:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_E::pack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_pack_B::pack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
         case stencil::BE:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
@@ -890,87 +890,88 @@ void UniformGridGPU_PackInfo::pack(Direction dir, unsigned char * byte_buffer, I
             break;
         }
         
-        case stencil::SE:
+        case stencil::BN:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_SE::pack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_pack_BN::pack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::C:
+        case stencil::SW:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_C::pack_C<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2);
+            internal_pack_SW::pack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::TN:
+        case stencil::S:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_TN::pack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_pack_S::pack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
         case stencil::TS:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
@@ -981,118 +982,117 @@ void UniformGridGPU_PackInfo::pack(Direction dir, unsigned char * byte_buffer, I
             break;
         }
         
-        case stencil::NE:
+        case stencil::TW:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_NE::pack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_pack_TW::pack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::BW:
+        case stencil::SE:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_BW::pack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_pack_SE::pack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::NW:
+        case stencil::NE:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_NW::pack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_pack_NE::pack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::BS:
+        case stencil::C:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_BS::pack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_pack_C::pack_C<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2);
             break;
         }
         
-        case stencil::TW:
+        case stencil::BS:
         {
             double * _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_pack_TW::pack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_pack_BS::pack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
@@ -1114,226 +1114,226 @@ void UniformGridGPU_PackInfo::unpack(Direction dir, unsigned char * byte_buffer,
 
     switch( dir )
     {
-        case stencil::NE:
+        case stencil::B:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_NE::unpack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_unpack_B::unpack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::N:
+        case stencil::BS:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_N::unpack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_unpack_BS::unpack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::E:
+        case stencil::S:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_E::unpack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_unpack_S::unpack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::T:
+        case stencil::BW:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_T::unpack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_unpack_BW::unpack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::B:
+        case stencil::W:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_B::unpack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_unpack_W::unpack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::TS:
+        case stencil::SE:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_TS::unpack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_unpack_SE::unpack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::S:
+        case stencil::E:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_S::unpack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_unpack_E::unpack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::BW:
+        case stencil::TE:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_BW::unpack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_unpack_TE::unpack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::W:
+        case stencil::T:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_W::unpack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_unpack_T::unpack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
         case stencil::TW:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
@@ -1344,87 +1344,88 @@ void UniformGridGPU_PackInfo::unpack(Direction dir, unsigned char * byte_buffer,
             break;
         }
         
-        case stencil::NW:
+        case stencil::TS:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_NW::unpack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_unpack_TS::unpack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::C:
+        case stencil::NE:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_C::unpack_C<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2);
+            internal_unpack_NE::unpack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::BS:
+        case stencil::N:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_BS::unpack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_unpack_N::unpack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
         case stencil::BN:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
@@ -1435,118 +1436,117 @@ void UniformGridGPU_PackInfo::unpack(Direction dir, unsigned char * byte_buffer,
             break;
         }
         
-        case stencil::SW:
+        case stencil::BE:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_SW::unpack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_unpack_BE::unpack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::TE:
+        case stencil::NW:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_TE::unpack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_unpack_NW::unpack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::SE:
+        case stencil::SW:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_SE::unpack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_unpack_SW::unpack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
-        case stencil::TN:
+        case stencil::C:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
-            const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_TN::unpack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_unpack_C::unpack_C<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2);
             break;
         }
         
-        case stencil::BE:
+        case stencil::TN:
         {
             double * const _data_buffer = buffer;
+            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()));
             WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()));
-            WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()));
             double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0));
-            const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0));
-            const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0);
-            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0));
-            const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
+            const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
+            const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
+            WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
+            const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
             const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
             const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
             const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
             const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
             dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)));
             dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 )));
-            internal_unpack_BE::unpack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+            internal_unpack_TN::unpack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
             break;
         }
         
@@ -1568,39 +1568,39 @@ uint_t UniformGridGPU_PackInfo::size(stencil::Direction dir, IBlock * block)
 
     switch( dir )
     {
-        case stencil::SW:
-            elementsPerCell = 1;
+        case stencil::T:
+            elementsPerCell = 5;
             break;
         
-        case stencil::S:
-            elementsPerCell = 5;
+        case stencil::TN:
+            elementsPerCell = 1;
             break;
         
-        case stencil::W:
+        case stencil::N:
             elementsPerCell = 5;
             break;
         
-        case stencil::B:
-            elementsPerCell = 5;
+        case stencil::TE:
+            elementsPerCell = 1;
             break;
         
-        case stencil::T:
+        case stencil::E:
             elementsPerCell = 5;
             break;
         
-        case stencil::BN:
+        case stencil::NW:
             elementsPerCell = 1;
             break;
         
-        case stencil::N:
+        case stencil::W:
             elementsPerCell = 5;
             break;
         
-        case stencil::TE:
+        case stencil::BW:
             elementsPerCell = 1;
             break;
         
-        case stencil::E:
+        case stencil::B:
             elementsPerCell = 5;
             break;
         
@@ -1608,39 +1608,39 @@ uint_t UniformGridGPU_PackInfo::size(stencil::Direction dir, IBlock * block)
             elementsPerCell = 1;
             break;
         
-        case stencil::SE:
+        case stencil::BN:
             elementsPerCell = 1;
             break;
         
-        case stencil::C:
+        case stencil::SW:
             elementsPerCell = 1;
             break;
         
-        case stencil::TN:
-            elementsPerCell = 1;
+        case stencil::S:
+            elementsPerCell = 5;
             break;
         
         case stencil::TS:
             elementsPerCell = 1;
             break;
         
-        case stencil::NE:
+        case stencil::TW:
             elementsPerCell = 1;
             break;
         
-        case stencil::BW:
+        case stencil::SE:
             elementsPerCell = 1;
             break;
         
-        case stencil::NW:
+        case stencil::NE:
             elementsPerCell = 1;
             break;
         
-        case stencil::BS:
+        case stencil::C:
             elementsPerCell = 1;
             break;
         
-        case stencil::TW:
+        case stencil::BS:
             elementsPerCell = 1;
             break;
         
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.h b/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.h
index 4c9ab9865..c68a7b063 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.h
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.h
@@ -19,7 +19,7 @@ public:
     UniformGridGPU_PackInfo( BlockDataID pdfsID_ )
         : pdfsID(pdfsID_)
     {};
-
+    virtual ~UniformGridGPU_PackInfo() {}
 
     virtual void pack  (stencil::Direction dir, unsigned char * buffer, IBlock * block, cudaStream_t stream);
     virtual void unpack(stencil::Direction dir, unsigned char * buffer, IBlock * block, cudaStream_t stream);
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.cu b/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.cu
index bdab52db2..b6fcbbe3c 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.cu
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.cu
@@ -40,6 +40,12 @@ namespace lbm {
 #pragma GCC diagnostic ignored "-Wconversion"
 #endif
 
+#ifdef __CUDACC__
+#pragma push
+#pragma diag_suppress = declared_but_not_referenced
+#endif
+
+
 namespace internal_boundary_UniformGridGPU_UBB {
 static FUNC_PREFIX void boundary_UniformGridGPU_UBB(uint8_t * const _data_indexVector, double * _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t indexVectorSize)
 {
@@ -63,9 +69,9 @@ static FUNC_PREFIX void boundary_UniformGridGPU_UBB(uint8_t * const _data_indexV
       
       uint8_t * const _data_indexVector_112 = _data_indexVector + 12;
       const int32_t dir = *((int32_t *)(& _data_indexVector_112[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
-      double * _data_pdfs_1ACA00C755A3ABE3 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*cy[dir] + _stride_pdfs_2*z + _stride_pdfs_2*cz[dir] + _stride_pdfs_3*invdir[dir];
-      double * _data_pdfs_10_20_m7D57D887F63BE1DF = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir;
-      _data_pdfs_1ACA00C755A3ABE3[_stride_pdfs_0*x + _stride_pdfs_0*cx[dir]] = -0.30000000000000004*cx[dir]*weights[dir] + _data_pdfs_10_20_m7D57D887F63BE1DF[_stride_pdfs_0*x];
+      double * _data_pdfsf9cc34cc4e2b6261 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*cy[dir] + _stride_pdfs_2*z + _stride_pdfs_2*cz[dir] + _stride_pdfs_3*invdir[dir];
+      double * _data_pdfs_10_2011ac6bf6446d4afa = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir;
+      _data_pdfsf9cc34cc4e2b6261[_stride_pdfs_0*x + _stride_pdfs_0*cx[dir]] = -0.30000000000000004*cx[dir]*weights[dir] + _data_pdfs_10_2011ac6bf6446d4afa[_stride_pdfs_0*x];
    } 
 }
 }
@@ -74,6 +80,10 @@ static FUNC_PREFIX void boundary_UniformGridGPU_UBB(uint8_t * const _data_indexV
 #pragma GCC diagnostic pop
 #endif
 
+#ifdef __CUDACC__
+#pragma pop
+#endif
+
 
 void UniformGridGPU_UBB::run( IBlock * block, IndexVectors::Type type , cudaStream_t stream )
 {
@@ -118,4 +128,5 @@ void UniformGridGPU_UBB::outer( IBlock * block, cudaStream_t stream  )
 
 
 } // namespace lbm
-} // namespace walberla
\ No newline at end of file
+} // namespace walberla
+
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.h b/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.h
index 3ad393854..0b0017759 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.h
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.h
@@ -87,7 +87,7 @@ public:
         void syncGPU()
         {
             gpuVectors_.resize( cpuVectors_.size() );
-            for(int i=0; i < NUM_TYPES; ++i )
+            for(size_t i=0; i < size_t(NUM_TYPES); ++i )
             {
                 auto & gpuVec = gpuVectors_[i];
                 auto & cpuVec = cpuVectors_[i];
diff --git a/src/cuda/communication/UniformGPUScheme.impl.h b/src/cuda/communication/UniformGPUScheme.impl.h
index 0d7667012..51ab70d72 100644
--- a/src/cuda/communication/UniformGPUScheme.impl.h
+++ b/src/cuda/communication/UniformGPUScheme.impl.h
@@ -223,8 +223,8 @@ UniformGPUScheme<Stencil>::UniformGPUScheme( weak_ptr_wrapper <StructuredBlockFo
       bufferSystemGPU_.setReceiverInfo( receiverInfo );
 
       for( auto it : receiverInfo ) {
-         bufferSystemCPU_.sendBuffer( it.first ).resize( it.second );
-         bufferSystemGPU_.sendBuffer( it.first ).resize( it.second );
+         bufferSystemCPU_.sendBuffer( it.first ).resize( size_t(it.second) );
+         bufferSystemGPU_.sendBuffer( it.first ).resize( size_t(it.second) );
       }
 
       forestModificationStamp_ = forest->getBlockForest().getModificationStamp();
-- 
GitLab