From 020743a1a0387db068fb87b2660129133f4d960b Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Wed, 24 Oct 2018 09:33:33 +0200 Subject: [PATCH] Precompute x and f allocation size of GPUField --- src/cuda/GPUField.h | 3 +++ src/cuda/GPUField.impl.h | 32 +++++++++++++++++--------------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/cuda/GPUField.h b/src/cuda/GPUField.h index bc2447072..75766957a 100755 --- a/src/cuda/GPUField.h +++ b/src/cuda/GPUField.h @@ -146,6 +146,9 @@ namespace cuda { uint_t ySize_; uint_t zSize_; uint_t fSize_; + + uint_t xAllocSize_; + uint_t fAllocSize_; Layout layout_; bool usePitchedMem_; }; diff --git a/src/cuda/GPUField.impl.h b/src/cuda/GPUField.impl.h index 840ebd6a5..e5b49c7d7 100644 --- a/src/cuda/GPUField.impl.h +++ b/src/cuda/GPUField.impl.h @@ -58,6 +58,21 @@ GPUField<T>::GPUField( uint_t _xSize, uint_t _ySize, uint_t _zSize, uint_t _fSiz pitchedPtr_ = make_cudaPitchedPtr( NULL, extent.width, extent.width, extent.height ); WALBERLA_CUDA_CHECK ( cudaMalloc( &pitchedPtr_.ptr, extent.width * extent.height * extent.depth ) ); } + + // allocation size is stored in pitched pointer + // pitched pointer stores the amount of padded region in bytes + // but we keep track of the size in #elements + WALBERLA_ASSERT_EQUAL( pitchedPtr_.pitch % sizeof(T), 0 ); + if ( layout_ == field::fzyx ) + { + xAllocSize_ = pitchedPtr_.pitch / sizeof(T); + fAllocSize_ = fSize_; + } + else + { + fAllocSize_ = pitchedPtr_.pitch / sizeof(T); + xAllocSize_ = xSize_ + 2 * nrOfGhostLayers_; + } } @@ -242,15 +257,7 @@ bool GPUField<T>::operator==( const GPUField & o ) const template<typename T> uint_t GPUField<T>::xAllocSize() const { - if ( layout_ == field::fzyx ) - { - // allocation size is stored in pitched pointer - // pitched pointer stores the amount of padded region in bytes - // but we have to return the size in #elements - WALBERLA_ASSERT_EQUAL( pitchedPtr_.pitch % sizeof(T), 0 ); - return pitchedPtr_.pitch / sizeof(T); - } - return xSize_ + 2 * nrOfGhostLayers_; + return xAllocSize_; } template<typename T> @@ -268,12 +275,7 @@ uint_t GPUField<T>::zAllocSize() const template<typename T> uint_t GPUField<T>::fAllocSize() const { - if ( layout_ == field::zyxf ) - { - WALBERLA_ASSERT_EQUAL( pitchedPtr_.pitch % sizeof(T), 0 ); - return pitchedPtr_.pitch / sizeof(T); - } - return fSize_; + return fAllocSize_; } template<typename T> -- GitLab