diff --git a/src/cuda/GPUField.h b/src/cuda/GPUField.h index bc24470726cadb69e4d30894e369e26dacb5ceae..75766957a0199448b61cddf200261fa96d44fc3c 100755 --- a/src/cuda/GPUField.h +++ b/src/cuda/GPUField.h @@ -146,6 +146,9 @@ namespace cuda { uint_t ySize_; uint_t zSize_; uint_t fSize_; + + uint_t xAllocSize_; + uint_t fAllocSize_; Layout layout_; bool usePitchedMem_; }; diff --git a/src/cuda/GPUField.impl.h b/src/cuda/GPUField.impl.h index 840ebd6a59cb98e3ae57e75bdae4513b61039766..e5b49c7d7bf537ce9723071601886fb39537f141 100644 --- a/src/cuda/GPUField.impl.h +++ b/src/cuda/GPUField.impl.h @@ -58,6 +58,21 @@ GPUField<T>::GPUField( uint_t _xSize, uint_t _ySize, uint_t _zSize, uint_t _fSiz pitchedPtr_ = make_cudaPitchedPtr( NULL, extent.width, extent.width, extent.height ); WALBERLA_CUDA_CHECK ( cudaMalloc( &pitchedPtr_.ptr, extent.width * extent.height * extent.depth ) ); } + + // allocation size is stored in pitched pointer + // pitched pointer stores the amount of padded region in bytes + // but we keep track of the size in #elements + WALBERLA_ASSERT_EQUAL( pitchedPtr_.pitch % sizeof(T), 0 ); + if ( layout_ == field::fzyx ) + { + xAllocSize_ = pitchedPtr_.pitch / sizeof(T); + fAllocSize_ = fSize_; + } + else + { + fAllocSize_ = pitchedPtr_.pitch / sizeof(T); + xAllocSize_ = xSize_ + 2 * nrOfGhostLayers_; + } } @@ -242,15 +257,7 @@ bool GPUField<T>::operator==( const GPUField & o ) const template<typename T> uint_t GPUField<T>::xAllocSize() const { - if ( layout_ == field::fzyx ) - { - // allocation size is stored in pitched pointer - // pitched pointer stores the amount of padded region in bytes - // but we have to return the size in #elements - WALBERLA_ASSERT_EQUAL( pitchedPtr_.pitch % sizeof(T), 0 ); - return pitchedPtr_.pitch / sizeof(T); - } - return xSize_ + 2 * nrOfGhostLayers_; + return xAllocSize_; } template<typename T> @@ -268,12 +275,7 @@ uint_t GPUField<T>::zAllocSize() const template<typename T> uint_t GPUField<T>::fAllocSize() const { - if ( layout_ == field::zyxf ) - { - WALBERLA_ASSERT_EQUAL( pitchedPtr_.pitch % sizeof(T), 0 ); - return pitchedPtr_.pitch / sizeof(T); - } - return fSize_; + return fAllocSize_; } template<typename T>