From b88d44d47e24114838d3480845270c7274a07c29 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Tue, 22 Jan 2019 16:43:11 +0100 Subject: [PATCH] Warning fixes in GPU communication & benchmark --- .../UniformGridGPU_LatticeModel.cpp | 69 +- .../UniformGridGPU_LatticeModel.h | 9 + .../UniformGridGPU/UniformGridGPU_LbKernel.cu | 44 +- .../UniformGridGPU/UniformGridGPU_NoSlip.cu | 19 +- .../UniformGridGPU/UniformGridGPU_NoSlip.h | 2 +- .../UniformGridGPU/UniformGridGPU_PackInfo.cu | 1158 ++++++++--------- .../UniformGridGPU/UniformGridGPU_PackInfo.h | 2 +- .../UniformGridGPU/UniformGridGPU_UBB.cu | 19 +- .../UniformGridGPU/UniformGridGPU_UBB.h | 2 +- .../communication/UniformGPUScheme.impl.h | 4 +- 10 files changed, 691 insertions(+), 637 deletions(-) diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.cpp b/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.cpp index 0a288ebbb..20712a5bc 100644 --- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.cpp +++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.cpp @@ -30,6 +30,21 @@ #define FUNC_PREFIX +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wunused-parameter" +#pragma GCC diagnostic ignored "-Wshadow" +#endif + +#ifdef WALBERLA_CXX_COMPILER_IS_CLANG +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-parameter" +#pragma clang diagnostic ignored "-Wshadow" +#endif + + using namespace std; namespace walberla { @@ -458,15 +473,15 @@ void UniformGridGPU_LatticeModel::Sweep::streamCollide( IBlock * block, const ui auto & omega = lm.omega; WALBERLA_ASSERT_GREATER_EQUAL(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -int_c(pdfs->nrOfGhostLayers())); - double * const _data_pdfs = pdfs->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, 0); + double * const _data_pdfs = pdfs->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, 0, 0, 0); WALBERLA_ASSERT_GREATER_EQUAL(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -int_c(pdfs_tmp->nrOfGhostLayers())); - double * _data_pdfs_tmp = pdfs_tmp->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(pdfs->xSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); - const int64_t _size_pdfs_0 = int64_t(pdfs->xSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(pdfs->ySize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); - const int64_t _size_pdfs_1 = int64_t(pdfs->ySize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(pdfs->zSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); - const int64_t _size_pdfs_2 = int64_t(pdfs->zSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); + double * _data_pdfs_tmp = pdfs_tmp->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, 0, 0, 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); @@ -486,13 +501,13 @@ void UniformGridGPU_LatticeModel::Sweep::collide( IBlock * block, const uint_t n auto & omega = lm.omega; WALBERLA_ASSERT_GREATER_EQUAL(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -int_c(pdfs->nrOfGhostLayers())); - double * _data_pdfs = pdfs->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(pdfs->xSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); - const int64_t _size_pdfs_0 = int64_t(pdfs->xSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(pdfs->ySize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); - const int64_t _size_pdfs_1 = int64_t(pdfs->ySize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(pdfs->zSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); - const int64_t _size_pdfs_2 = int64_t(pdfs->zSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); + double * _data_pdfs = pdfs->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, 0, 0, 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); @@ -519,15 +534,15 @@ void UniformGridGPU_LatticeModel::Sweep::stream( IBlock * block, const uint_t nu WALBERLA_ASSERT_GREATER_EQUAL(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -int_c(pdfs->nrOfGhostLayers())); - double * const _data_pdfs = pdfs->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, 0); + double * const _data_pdfs = pdfs->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, 0, 0, 0); WALBERLA_ASSERT_GREATER_EQUAL(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -int_c(pdfs_tmp->nrOfGhostLayers())); - double * _data_pdfs_tmp = pdfs_tmp->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, -cell_idx_c(numberOfGhostLayersToInclude) - 1, 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(pdfs->xSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); - const int64_t _size_pdfs_0 = int64_t(pdfs->xSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(pdfs->ySize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); - const int64_t _size_pdfs_1 = int64_t(pdfs->ySize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(pdfs->zSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); - const int64_t _size_pdfs_2 = int64_t(pdfs->zSize() + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); + double * _data_pdfs_tmp = pdfs_tmp->dataAt(-cell_idx_c(numberOfGhostLayersToInclude) - 1, 0, 0, 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(pdfs->xSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(pdfs->ySize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(pdfs->zSize()) + 2*cell_idx_c(numberOfGhostLayersToInclude) + 2); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); @@ -569,3 +584,11 @@ mpi::RecvBuffer & operator>> (mpi::RecvBuffer & buf, ::walberla::lbm::UniformGri } // namespace mpi } // namespace walberla + +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic pop +#endif + +#ifdef WALBERLA_CXX_COMPILER_IS_CLANG +#pragma clang diagnostic pop +#endif \ No newline at end of file diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.h b/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.h index f48737a15..02a6c7cf8 100644 --- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.h +++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.h @@ -50,6 +50,11 @@ #pragma GCC diagnostic ignored "-Wunused-parameter" #endif +#ifdef WALBERLA_CXX_COMPILER_IS_CLANG +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-parameter" +#endif @@ -734,4 +739,8 @@ struct ShearRate<UniformGridGPU_LatticeModel> #ifdef WALBERLA_CXX_COMPILER_IS_GNU #pragma GCC diagnostic pop +#endif + +#ifdef WALBERLA_CXX_COMPILER_IS_CLANG +#pragma clang diagnostic pop #endif \ No newline at end of file diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.cu b/apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.cu index 12b238db3..a650f8b3e 100644 --- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.cu +++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.cu @@ -169,15 +169,15 @@ void UniformGridGPU_LbKernel::operator() ( IBlock * block , cudaStream_t stream } WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs->nrOfGhostLayers())); - double * const _data_pdfs = pdfs->dataAt(-1, -1, -1, 0); + double * const _data_pdfs = pdfs->dataAt(-1, 0, 0, 0); WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs_tmp->nrOfGhostLayers())); - double * _data_pdfs_tmp = pdfs_tmp->dataAt(-1, -1, -1, 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(pdfs->xSize() + 2)); - const int64_t _size_pdfs_0 = int64_t(pdfs->xSize() + 2); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(pdfs->ySize() + 2)); - const int64_t _size_pdfs_1 = int64_t(pdfs->ySize() + 2); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(pdfs->zSize() + 2)); - const int64_t _size_pdfs_2 = int64_t(pdfs->zSize() + 2); + double * _data_pdfs_tmp = pdfs_tmp->dataAt(-1, 0, 0, 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->xSize()) + 2)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(pdfs->xSize()) + 2); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->ySize()) + 2)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(pdfs->ySize()) + 2); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(pdfs->zSize()) + 2)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(pdfs->zSize()) + 2); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); @@ -219,12 +219,12 @@ void UniformGridGPU_LbKernel::inner( IBlock * block , cudaStream_t stream ) WALBERLA_ASSERT_GREATER_EQUAL(inner.yMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(inner.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers())); double * _data_pdfs_tmp = pdfs_tmp->dataAt(inner.xMin() - 1, inner.yMin() - 1, inner.zMin() - 1, 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(inner.xSize() + 2)); - const int64_t _size_pdfs_0 = int64_t(inner.xSize() + 2); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(inner.ySize() + 2)); - const int64_t _size_pdfs_1 = int64_t(inner.ySize() + 2); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(inner.zSize() + 2)); - const int64_t _size_pdfs_2 = int64_t(inner.zSize() + 2); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(inner.xSize()) + 2)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(inner.xSize()) + 2); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(inner.ySize()) + 2)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(inner.ySize()) + 2); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(inner.zSize()) + 2)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(inner.zSize()) + 2); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); @@ -284,20 +284,20 @@ void UniformGridGPU_LbKernel::outer( IBlock * block , cudaStream_t stream ) for( auto & ci: layers ) { parallelSection_.run([&]( auto s ) { + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0); + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers())); double * _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 2)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 2); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 2)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 2); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 2)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 2); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 2); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 2); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 2); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.cu b/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.cu index ca898b65c..acabe1d0c 100644 --- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.cu +++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.cu @@ -40,6 +40,12 @@ namespace lbm { #pragma GCC diagnostic ignored "-Wconversion" #endif +#ifdef __CUDACC__ +#pragma push +#pragma diag_suppress = declared_but_not_referenced +#endif + + namespace internal_boundary_UniformGridGPU_NoSlip { static FUNC_PREFIX void boundary_UniformGridGPU_NoSlip(uint8_t * const _data_indexVector, double * _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t indexVectorSize) { @@ -63,9 +69,9 @@ static FUNC_PREFIX void boundary_UniformGridGPU_NoSlip(uint8_t * const _data_ind uint8_t * const _data_indexVector_112 = _data_indexVector + 12; const int32_t dir = *((int32_t *)(& _data_indexVector_112[16*blockDim.x*blockIdx.x + 16*threadIdx.x])); - double * _data_pdfs_1ACA00C755A3ABE3 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*cy[dir] + _stride_pdfs_2*z + _stride_pdfs_2*cz[dir] + _stride_pdfs_3*invdir[dir]; - double * _data_pdfs_10_20_m7D57D887F63BE1DF = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir; - _data_pdfs_1ACA00C755A3ABE3[_stride_pdfs_0*x + _stride_pdfs_0*cx[dir]] = _data_pdfs_10_20_m7D57D887F63BE1DF[_stride_pdfs_0*x]; + double * _data_pdfsf9cc34cc4e2b6261 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*cy[dir] + _stride_pdfs_2*z + _stride_pdfs_2*cz[dir] + _stride_pdfs_3*invdir[dir]; + double * _data_pdfs_10_2011ac6bf6446d4afa = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir; + _data_pdfsf9cc34cc4e2b6261[_stride_pdfs_0*x + _stride_pdfs_0*cx[dir]] = _data_pdfs_10_2011ac6bf6446d4afa[_stride_pdfs_0*x]; } } } @@ -74,6 +80,10 @@ static FUNC_PREFIX void boundary_UniformGridGPU_NoSlip(uint8_t * const _data_ind #pragma GCC diagnostic pop #endif +#ifdef __CUDACC__ +#pragma pop +#endif + void UniformGridGPU_NoSlip::run( IBlock * block, IndexVectors::Type type , cudaStream_t stream ) { @@ -118,4 +128,5 @@ void UniformGridGPU_NoSlip::outer( IBlock * block, cudaStream_t stream ) } // namespace lbm -} // namespace walberla \ No newline at end of file +} // namespace walberla + diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.h b/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.h index 536a99a66..fa64a1984 100644 --- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.h +++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.h @@ -87,7 +87,7 @@ public: void syncGPU() { gpuVectors_.resize( cpuVectors_.size() ); - for(int i=0; i < NUM_TYPES; ++i ) + for(size_t i=0; i < size_t(NUM_TYPES); ++i ) { auto & gpuVec = gpuVectors_[i]; auto & cpuVec = cpuVectors_[i]; diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.cu b/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.cu index 604896ce2..fcd3df807 100644 --- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.cu +++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.cu @@ -16,160 +16,152 @@ using walberla::stencil::Direction; -namespace internal_pack_SW { -static FUNC_PREFIX void pack_SW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_pack_T { +static FUNC_PREFIX void pack_T(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_35[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0]; } } } -namespace internal_pack_S { -static FUNC_PREFIX void pack_S(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_pack_TN { +static FUNC_PREFIX void pack_TN(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_32[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0]; } } } -namespace internal_pack_W { -static FUNC_PREFIX void pack_W(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_pack_N { +static FUNC_PREFIX void pack_N(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_31[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0]; double * const _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0]; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0]; } } } -namespace internal_pack_B { -static FUNC_PREFIX void pack_B(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_pack_TE { +static FUNC_PREFIX void pack_TE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * const _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_36[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0]; } } } -namespace internal_pack_T { -static FUNC_PREFIX void pack_T(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_pack_E { +static FUNC_PREFIX void pack_E(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * const _data_pdfs_10_20_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_35[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0]; double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0]; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0]; } } } -namespace internal_pack_BN { -static FUNC_PREFIX void pack_BN(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_pack_NW { +static FUNC_PREFIX void pack_NW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0]; } } } -namespace internal_pack_N { -static FUNC_PREFIX void pack_N(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_pack_W { +static FUNC_PREFIX void pack_W(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0]; double * const _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_31[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0]; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0]; } } } -namespace internal_pack_TE { -static FUNC_PREFIX void pack_TE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_pack_BW { +static FUNC_PREFIX void pack_BW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0]; } } } -namespace internal_pack_E { -static FUNC_PREFIX void pack_E(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_pack_B { +static FUNC_PREFIX void pack_B(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { @@ -178,14 +170,14 @@ static FUNC_PREFIX void pack_E(double * _data_buffer, double * const _data_pdfs, const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; double * const _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3; _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0]; - double * const _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_36[_stride_pdfs_0*ctr_0]; } } } @@ -204,44 +196,52 @@ static FUNC_PREFIX void pack_BE(double * _data_buffer, double * const _data_pdfs } } -namespace internal_pack_SE { -static FUNC_PREFIX void pack_SE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_pack_BN { +static FUNC_PREFIX void pack_BN(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0]; } } } -namespace internal_pack_C { -static FUNC_PREFIX void pack_C(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2) +namespace internal_pack_SW { +static FUNC_PREFIX void pack_SW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * const _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0]; } } } -namespace internal_pack_TN { -static FUNC_PREFIX void pack_TN(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_pack_S { +static FUNC_PREFIX void pack_S(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_32[_stride_pdfs_0*ctr_0]; } } } @@ -260,232 +260,224 @@ static FUNC_PREFIX void pack_TS(double * _data_buffer, double * const _data_pdfs } } -namespace internal_pack_NE { -static FUNC_PREFIX void pack_NE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_pack_TW { +static FUNC_PREFIX void pack_TW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0]; } } } -namespace internal_pack_BW { -static FUNC_PREFIX void pack_BW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_pack_SE { +static FUNC_PREFIX void pack_SE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0]; } } } -namespace internal_pack_NW { -static FUNC_PREFIX void pack_NW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_pack_NE { +static FUNC_PREFIX void pack_NE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * const _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0]; } } } -namespace internal_pack_BS { -static FUNC_PREFIX void pack_BS(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_pack_C { +static FUNC_PREFIX void pack_C(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0]; } } } -namespace internal_pack_TW { -static FUNC_PREFIX void pack_TW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_pack_BS { +static FUNC_PREFIX void pack_BS(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; - _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0]; + double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; + _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0]; } } } -namespace internal_unpack_NE { -static FUNC_PREFIX void unpack_NE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_unpack_B { +static FUNC_PREFIX void unpack_B(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; + double * _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; + _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x]; + double * _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3; + _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1]; + double * _data_pdfs_10_20_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3; + _data_pdfs_10_20_35[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2]; + double * _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; + _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3]; + double * _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3; + _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4]; } } } -namespace internal_unpack_N { -static FUNC_PREFIX void unpack_N(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_unpack_BS { +static FUNC_PREFIX void unpack_BS(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x]; - double * _data_pdfs_10_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3; - _data_pdfs_10_20_32[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1]; - double * _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3; - _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2]; - double * _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; - _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3]; - double * _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4]; + double * _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; + _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; } } } -namespace internal_unpack_E { -static FUNC_PREFIX void unpack_E(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_unpack_S { +static FUNC_PREFIX void unpack_S(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; - _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x]; + double * _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; + _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x]; + double * _data_pdfs_10_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3; + _data_pdfs_10_20_31[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1]; + double * _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; + _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2]; + double * _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; + _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3]; double * _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1]; - double * _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3; - _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2]; - double * _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; - _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3]; - double * _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4]; + _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4]; } } } -namespace internal_unpack_T { -static FUNC_PREFIX void unpack_T(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_unpack_BW { +static FUNC_PREFIX void unpack_BW(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3; - _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x]; - double * _data_pdfs_10_20_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3; - _data_pdfs_10_20_36[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1]; - double * _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; - _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2]; - double * _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; - _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3]; - double * _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; - _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4]; + double * _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3; + _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; } } } -namespace internal_unpack_B { -static FUNC_PREFIX void unpack_B(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_unpack_W { +static FUNC_PREFIX void unpack_W(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * _data_pdfs_10_20_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3; - _data_pdfs_10_20_35[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x]; - double * _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; - _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1]; - double * _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3; - _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2]; + double * _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3; + _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x]; double * _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3; - _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3]; - double * _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; - _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4]; + _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1]; + double * _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; + _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2]; + double * _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; + _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3]; + double * _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3; + _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4]; } } } -namespace internal_unpack_TS { -static FUNC_PREFIX void unpack_TS(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_unpack_SE { +static FUNC_PREFIX void unpack_SE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; - _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; + double * _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; + _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; } } } -namespace internal_unpack_S { -static FUNC_PREFIX void unpack_S(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_unpack_E { +static FUNC_PREFIX void unpack_E(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x]; + double * _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; + _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x]; + double * _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3; + _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1]; + double * _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; + _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2]; + double * _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; + _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3]; double * _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1]; - double * _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; - _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2]; - double * _data_pdfs_10_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3; - _data_pdfs_10_20_31[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3]; - double * _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; - _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4]; + _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4]; } } } -namespace internal_unpack_BW { -static FUNC_PREFIX void unpack_BW(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_unpack_TE { +static FUNC_PREFIX void unpack_TE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3; - _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; + double * _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; + _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; } } } -namespace internal_unpack_W { -static FUNC_PREFIX void unpack_W(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_unpack_T { +static FUNC_PREFIX void unpack_T(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { @@ -494,14 +486,14 @@ static FUNC_PREFIX void unpack_W(double * const _data_buffer, double * _data_pdf const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; double * _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3; _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x]; - double * _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1]; - double * _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2]; - double * _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3; - _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3]; - double * _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3; - _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4]; + double * _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; + _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1]; + double * _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; + _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2]; + double * _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; + _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3]; + double * _data_pdfs_10_20_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3; + _data_pdfs_10_20_36[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4]; } } } @@ -520,44 +512,52 @@ static FUNC_PREFIX void unpack_TW(double * const _data_buffer, double * _data_pd } } -namespace internal_unpack_NW { -static FUNC_PREFIX void unpack_NW(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_unpack_TS { +static FUNC_PREFIX void unpack_TS(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; + double * _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; + _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; } } } -namespace internal_unpack_C { -static FUNC_PREFIX void unpack_C(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2) +namespace internal_unpack_NE { +static FUNC_PREFIX void unpack_NE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2; - _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; + double * _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; + _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; } } } -namespace internal_unpack_BS { -static FUNC_PREFIX void unpack_BS(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_unpack_N { +static FUNC_PREFIX void unpack_N(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; - _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; + double * _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; + _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x]; + double * _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3; + _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1]; + double * _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; + _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2]; + double * _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; + _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3]; + double * _data_pdfs_10_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3; + _data_pdfs_10_20_32[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4]; } } } @@ -576,72 +576,72 @@ static FUNC_PREFIX void unpack_BN(double * const _data_buffer, double * _data_pd } } -namespace internal_unpack_SW { -static FUNC_PREFIX void unpack_SW(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_unpack_BE { +static FUNC_PREFIX void unpack_BE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; + double * _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; + _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; } } } -namespace internal_unpack_TE { -static FUNC_PREFIX void unpack_TE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_unpack_NW { +static FUNC_PREFIX void unpack_NW(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; - _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; + double * _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; + _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; } } } -namespace internal_unpack_SE { -static FUNC_PREFIX void unpack_SE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_unpack_SW { +static FUNC_PREFIX void unpack_SW(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; + double * _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; + _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; } } } -namespace internal_unpack_TN { -static FUNC_PREFIX void unpack_TN(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_unpack_C { +static FUNC_PREFIX void unpack_C(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; - _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; + double * _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2; + _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; } } } -namespace internal_unpack_BE { -static FUNC_PREFIX void unpack_BE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) +namespace internal_unpack_TN { +static FUNC_PREFIX void unpack_TN(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2) { const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x; const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y; const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z; - double * _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; - _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; + double * _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; + _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x]; } } } @@ -660,226 +660,226 @@ void UniformGridGPU_PackInfo::pack(Direction dir, unsigned char * byte_buffer, I switch( dir ) { - case stencil::SW: + case stencil::T: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_pack_SW::pack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_pack_T::pack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::S: + case stencil::TN: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_pack_S::pack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_pack_TN::pack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::W: + case stencil::N: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_pack_W::pack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_pack_N::pack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::B: + case stencil::TE: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_pack_B::pack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_pack_TE::pack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::T: + case stencil::E: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_pack_T::pack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_pack_E::pack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::BN: + case stencil::NW: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_pack_BN::pack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_pack_NW::pack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::N: + case stencil::W: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_pack_N::pack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_pack_W::pack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::TE: + case stencil::BW: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_pack_TE::pack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_pack_BW::pack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::E: + case stencil::B: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_pack_E::pack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_pack_B::pack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } case stencil::BE: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); @@ -890,87 +890,88 @@ void UniformGridGPU_PackInfo::pack(Direction dir, unsigned char * byte_buffer, I break; } - case stencil::SE: + case stencil::BN: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_pack_SE::pack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_pack_BN::pack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::C: + case stencil::SW: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_pack_C::pack_C<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2); + internal_pack_SW::pack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::TN: + case stencil::S: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_pack_TN::pack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_pack_S::pack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } case stencil::TS: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); @@ -981,118 +982,117 @@ void UniformGridGPU_PackInfo::pack(Direction dir, unsigned char * byte_buffer, I break; } - case stencil::NE: + case stencil::TW: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_pack_NE::pack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_pack_TW::pack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::BW: + case stencil::SE: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_pack_BW::pack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_pack_SE::pack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::NW: + case stencil::NE: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_pack_NW::pack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_pack_NE::pack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::BS: + case stencil::C: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); - const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_pack_BS::pack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_pack_C::pack_C<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2); break; } - case stencil::TW: + case stencil::BS: { double * _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_pack_TW::pack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_pack_BS::pack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } @@ -1114,226 +1114,226 @@ void UniformGridGPU_PackInfo::unpack(Direction dir, unsigned char * byte_buffer, switch( dir ) { - case stencil::NE: + case stencil::B: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_unpack_NE::unpack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_unpack_B::unpack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::N: + case stencil::BS: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_unpack_N::unpack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_unpack_BS::unpack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::E: + case stencil::S: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_unpack_E::unpack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_unpack_S::unpack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::T: + case stencil::BW: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_unpack_T::unpack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_unpack_BW::unpack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::B: + case stencil::W: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_unpack_B::unpack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_unpack_W::unpack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::TS: + case stencil::SE: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_unpack_TS::unpack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_unpack_SE::unpack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::S: + case stencil::E: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_unpack_S::unpack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_unpack_E::unpack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::BW: + case stencil::TE: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_unpack_BW::unpack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_unpack_TE::unpack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::W: + case stencil::T: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_unpack_W::unpack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_unpack_T::unpack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } case stencil::TW: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); @@ -1344,87 +1344,88 @@ void UniformGridGPU_PackInfo::unpack(Direction dir, unsigned char * byte_buffer, break; } - case stencil::NW: + case stencil::TS: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_unpack_NW::unpack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_unpack_TS::unpack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::C: + case stencil::NE: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_unpack_C::unpack_C<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2); + internal_unpack_NE::unpack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::BS: + case stencil::N: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_unpack_BS::unpack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_unpack_N::unpack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } case stencil::BN: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); @@ -1435,118 +1436,117 @@ void UniformGridGPU_PackInfo::unpack(Direction dir, unsigned char * byte_buffer, break; } - case stencil::SW: + case stencil::BE: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_unpack_SW::unpack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_unpack_BE::unpack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::TE: + case stencil::NW: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_unpack_TE::unpack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_unpack_NW::unpack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::SE: + case stencil::SW: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_unpack_SE::unpack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_unpack_SW::unpack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } - case stencil::TN: + case stencil::C: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); - const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_unpack_TN::unpack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_unpack_C::unpack_C<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2); break; } - case stencil::BE: + case stencil::TN: { double * const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())); WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())); double * _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 0)); - const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(ci.ySize() + 0)); - const int64_t _size_pdfs_1 = int64_t(ci.ySize() + 0); - WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(ci.zSize() + 0)); - const int64_t _size_pdfs_2 = int64_t(ci.zSize() + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); + const int64_t _size_pdfs_0 = int64_t(cell_idx_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); + const int64_t _size_pdfs_1 = int64_t(cell_idx_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); + const int64_t _size_pdfs_2 = int64_t(cell_idx_c(ci.zSize()) + 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride()); dim3 _block(int(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)), int(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)), int(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2))); dim3 _grid(int(( (_size_pdfs_0) % (((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) : ( (int64_t)(_size_pdfs_0) / (int64_t)(((16 < _size_pdfs_0) ? 16 : _size_pdfs_0)) ) +1 )), int(( (_size_pdfs_1) % (((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) : ( (int64_t)(_size_pdfs_1) / (int64_t)(((16 < _size_pdfs_1) ? 16 : _size_pdfs_1)) ) +1 )), int(( (_size_pdfs_2) % (((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) : ( (int64_t)(_size_pdfs_2) / (int64_t)(((1 < _size_pdfs_2) ? 1 : _size_pdfs_2)) ) +1 ))); - internal_unpack_BE::unpack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + internal_unpack_TN::unpack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); break; } @@ -1568,39 +1568,39 @@ uint_t UniformGridGPU_PackInfo::size(stencil::Direction dir, IBlock * block) switch( dir ) { - case stencil::SW: - elementsPerCell = 1; + case stencil::T: + elementsPerCell = 5; break; - case stencil::S: - elementsPerCell = 5; + case stencil::TN: + elementsPerCell = 1; break; - case stencil::W: + case stencil::N: elementsPerCell = 5; break; - case stencil::B: - elementsPerCell = 5; + case stencil::TE: + elementsPerCell = 1; break; - case stencil::T: + case stencil::E: elementsPerCell = 5; break; - case stencil::BN: + case stencil::NW: elementsPerCell = 1; break; - case stencil::N: + case stencil::W: elementsPerCell = 5; break; - case stencil::TE: + case stencil::BW: elementsPerCell = 1; break; - case stencil::E: + case stencil::B: elementsPerCell = 5; break; @@ -1608,39 +1608,39 @@ uint_t UniformGridGPU_PackInfo::size(stencil::Direction dir, IBlock * block) elementsPerCell = 1; break; - case stencil::SE: + case stencil::BN: elementsPerCell = 1; break; - case stencil::C: + case stencil::SW: elementsPerCell = 1; break; - case stencil::TN: - elementsPerCell = 1; + case stencil::S: + elementsPerCell = 5; break; case stencil::TS: elementsPerCell = 1; break; - case stencil::NE: + case stencil::TW: elementsPerCell = 1; break; - case stencil::BW: + case stencil::SE: elementsPerCell = 1; break; - case stencil::NW: + case stencil::NE: elementsPerCell = 1; break; - case stencil::BS: + case stencil::C: elementsPerCell = 1; break; - case stencil::TW: + case stencil::BS: elementsPerCell = 1; break; diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.h b/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.h index 4c9ab9865..c68a7b063 100644 --- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.h +++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.h @@ -19,7 +19,7 @@ public: UniformGridGPU_PackInfo( BlockDataID pdfsID_ ) : pdfsID(pdfsID_) {}; - + virtual ~UniformGridGPU_PackInfo() {} virtual void pack (stencil::Direction dir, unsigned char * buffer, IBlock * block, cudaStream_t stream); virtual void unpack(stencil::Direction dir, unsigned char * buffer, IBlock * block, cudaStream_t stream); diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.cu b/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.cu index bdab52db2..b6fcbbe3c 100644 --- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.cu +++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.cu @@ -40,6 +40,12 @@ namespace lbm { #pragma GCC diagnostic ignored "-Wconversion" #endif +#ifdef __CUDACC__ +#pragma push +#pragma diag_suppress = declared_but_not_referenced +#endif + + namespace internal_boundary_UniformGridGPU_UBB { static FUNC_PREFIX void boundary_UniformGridGPU_UBB(uint8_t * const _data_indexVector, double * _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t indexVectorSize) { @@ -63,9 +69,9 @@ static FUNC_PREFIX void boundary_UniformGridGPU_UBB(uint8_t * const _data_indexV uint8_t * const _data_indexVector_112 = _data_indexVector + 12; const int32_t dir = *((int32_t *)(& _data_indexVector_112[16*blockDim.x*blockIdx.x + 16*threadIdx.x])); - double * _data_pdfs_1ACA00C755A3ABE3 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*cy[dir] + _stride_pdfs_2*z + _stride_pdfs_2*cz[dir] + _stride_pdfs_3*invdir[dir]; - double * _data_pdfs_10_20_m7D57D887F63BE1DF = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir; - _data_pdfs_1ACA00C755A3ABE3[_stride_pdfs_0*x + _stride_pdfs_0*cx[dir]] = -0.30000000000000004*cx[dir]*weights[dir] + _data_pdfs_10_20_m7D57D887F63BE1DF[_stride_pdfs_0*x]; + double * _data_pdfsf9cc34cc4e2b6261 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*cy[dir] + _stride_pdfs_2*z + _stride_pdfs_2*cz[dir] + _stride_pdfs_3*invdir[dir]; + double * _data_pdfs_10_2011ac6bf6446d4afa = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir; + _data_pdfsf9cc34cc4e2b6261[_stride_pdfs_0*x + _stride_pdfs_0*cx[dir]] = -0.30000000000000004*cx[dir]*weights[dir] + _data_pdfs_10_2011ac6bf6446d4afa[_stride_pdfs_0*x]; } } } @@ -74,6 +80,10 @@ static FUNC_PREFIX void boundary_UniformGridGPU_UBB(uint8_t * const _data_indexV #pragma GCC diagnostic pop #endif +#ifdef __CUDACC__ +#pragma pop +#endif + void UniformGridGPU_UBB::run( IBlock * block, IndexVectors::Type type , cudaStream_t stream ) { @@ -118,4 +128,5 @@ void UniformGridGPU_UBB::outer( IBlock * block, cudaStream_t stream ) } // namespace lbm -} // namespace walberla \ No newline at end of file +} // namespace walberla + diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.h b/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.h index 3ad393854..0b0017759 100644 --- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.h +++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.h @@ -87,7 +87,7 @@ public: void syncGPU() { gpuVectors_.resize( cpuVectors_.size() ); - for(int i=0; i < NUM_TYPES; ++i ) + for(size_t i=0; i < size_t(NUM_TYPES); ++i ) { auto & gpuVec = gpuVectors_[i]; auto & cpuVec = cpuVectors_[i]; diff --git a/src/cuda/communication/UniformGPUScheme.impl.h b/src/cuda/communication/UniformGPUScheme.impl.h index 0d7667012..51ab70d72 100644 --- a/src/cuda/communication/UniformGPUScheme.impl.h +++ b/src/cuda/communication/UniformGPUScheme.impl.h @@ -223,8 +223,8 @@ UniformGPUScheme<Stencil>::UniformGPUScheme( weak_ptr_wrapper <StructuredBlockFo bufferSystemGPU_.setReceiverInfo( receiverInfo ); for( auto it : receiverInfo ) { - bufferSystemCPU_.sendBuffer( it.first ).resize( it.second ); - bufferSystemGPU_.sendBuffer( it.first ).resize( it.second ); + bufferSystemCPU_.sendBuffer( it.first ).resize( size_t(it.second) ); + bufferSystemGPU_.sendBuffer( it.first ).resize( size_t(it.second) ); } forestModificationStamp_ = forest->getBlockForest().getModificationStamp(); -- GitLab