Commit 83a55d9a authored by Martin Bauer's avatar Martin Bauer
Browse files

Warning fixes in GPU communication & benchmark

parent b88d44d4
......@@ -16,146 +16,130 @@ using walberla::stencil::Direction;
namespace internal_pack_T {
static FUNC_PREFIX void pack_T(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_pack_SW {
static FUNC_PREFIX void pack_SW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_35[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_pack_TN {
static FUNC_PREFIX void pack_TN(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_pack_BW {
static FUNC_PREFIX void pack_BW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_pack_N {
static FUNC_PREFIX void pack_N(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_pack_W {
static FUNC_PREFIX void pack_W(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_31[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0];
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_pack_TE {
static FUNC_PREFIX void pack_TE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_pack_TW {
static FUNC_PREFIX void pack_TW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_pack_E {
static FUNC_PREFIX void pack_E(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_pack_NW {
static FUNC_PREFIX void pack_NW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_pack_NW {
static FUNC_PREFIX void pack_NW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_pack_BS {
static FUNC_PREFIX void pack_BS(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_pack_W {
static FUNC_PREFIX void pack_W(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_pack_S {
static FUNC_PREFIX void pack_S(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_32[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0];
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_pack_BW {
static FUNC_PREFIX void pack_BW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_pack_TS {
static FUNC_PREFIX void pack_TS(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0];
}
}
}
......@@ -168,310 +152,310 @@ static FUNC_PREFIX void pack_B(double * _data_buffer, double * const _data_pdfs,
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_317[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_36[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_pack_BE {
static FUNC_PREFIX void pack_BE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_pack_C {
static FUNC_PREFIX void pack_C(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_pack_BN {
static FUNC_PREFIX void pack_BN(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_pack_T {
static FUNC_PREFIX void pack_T(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_35[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_pack_SW {
static FUNC_PREFIX void pack_SW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_pack_BN {
static FUNC_PREFIX void pack_BN(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_pack_S {
static FUNC_PREFIX void pack_S(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_pack_N {
static FUNC_PREFIX void pack_N(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_39[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_32[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_31 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_31[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_315[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_37[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_pack_TS {
static FUNC_PREFIX void pack_TS(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_pack_TN {
static FUNC_PREFIX void pack_TN(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_312[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_311[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_pack_TW {
static FUNC_PREFIX void pack_TW(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_pack_SE {
static FUNC_PREFIX void pack_SE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_313[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_pack_SE {
static FUNC_PREFIX void pack_SE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_pack_BE {
static FUNC_PREFIX void pack_BE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_pack_NE {
static FUNC_PREFIX void pack_NE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_pack_E {
static FUNC_PREFIX void pack_E(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x] = _data_pdfs_10_20_310[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2] = _data_pdfs_10_20_318[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 3] = _data_pdfs_10_20_34[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
_data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 4] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_pack_C {
static FUNC_PREFIX void pack_C(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2)
namespace internal_pack_TE {
static FUNC_PREFIX void pack_TE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_314[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_pack_BS {
static FUNC_PREFIX void pack_BS(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_pack_NE {
static FUNC_PREFIX void pack_NE(double * _data_buffer, double * const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * const _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_316[_stride_pdfs_0*ctr_0];
double * const _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
_data_buffer[_size_pdfs_0*_size_pdfs_1*(blockDim.z*blockIdx.z + threadIdx.z) + _size_pdfs_0*(blockDim.y*blockIdx.y + threadIdx.y) + blockDim.x*blockIdx.x + threadIdx.x] = _data_pdfs_10_20_38[_stride_pdfs_0*ctr_0];
}
}
}
namespace internal_unpack_B {
static FUNC_PREFIX void unpack_B(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
namespace internal_unpack_NE {
static FUNC_PREFIX void unpack_NE(double * const _data_buffer, double * _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3)
{
if (blockDim.x*blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y*blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z*blockIdx.z + threadIdx.z < _size_pdfs_2)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y;
const int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z;
double * _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
_data_pdfs_10_20_311[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x];
double * _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
_data_pdfs_10_20_312[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 1];
double * _data_pdfs_10_20_35 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
_data_pdfs_10_20_35[_stride_pdfs_0*ctr_0] = _data_buffer[_size_pdfs_0*_size_pdfs_1*(5*blockDim.z*blockIdx.z + 5*threadIdx.z) + _size_pdfs_0*(5*blockDim.y*blockIdx.y + 5*threadIdx.y) + 5*blockDim.x*blockIdx.x + 5*threadIdx.x + 2];