From 020743a1a0387db068fb87b2660129133f4d960b Mon Sep 17 00:00:00 2001
From: Martin Bauer <martin.bauer@fau.de>
Date: Wed, 24 Oct 2018 09:33:33 +0200
Subject: [PATCH] Precompute x and f allocation size of GPUField

---
 src/cuda/GPUField.h      |  3 +++
 src/cuda/GPUField.impl.h | 32 +++++++++++++++++---------------
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/cuda/GPUField.h b/src/cuda/GPUField.h
index bc2447072..75766957a 100755
--- a/src/cuda/GPUField.h
+++ b/src/cuda/GPUField.h
@@ -146,6 +146,9 @@ namespace cuda {
       uint_t         ySize_;
       uint_t         zSize_;
       uint_t         fSize_;
+
+      uint_t         xAllocSize_;
+      uint_t         fAllocSize_;
       Layout         layout_;
       bool           usePitchedMem_;
    };
diff --git a/src/cuda/GPUField.impl.h b/src/cuda/GPUField.impl.h
index 840ebd6a5..e5b49c7d7 100644
--- a/src/cuda/GPUField.impl.h
+++ b/src/cuda/GPUField.impl.h
@@ -58,6 +58,21 @@ GPUField<T>::GPUField( uint_t _xSize, uint_t _ySize, uint_t _zSize, uint_t _fSiz
       pitchedPtr_ = make_cudaPitchedPtr( NULL, extent.width, extent.width, extent.height );
       WALBERLA_CUDA_CHECK ( cudaMalloc( &pitchedPtr_.ptr, extent.width * extent.height * extent.depth ) );
    }
+
+   // allocation size is stored in pitched pointer
+   // pitched pointer stores the amount of padded region in bytes
+   // but we keep track of the size in #elements
+   WALBERLA_ASSERT_EQUAL( pitchedPtr_.pitch % sizeof(T), 0 );
+   if ( layout_ == field::fzyx )
+   {
+      xAllocSize_ = pitchedPtr_.pitch / sizeof(T);
+      fAllocSize_ = fSize_;
+   }
+   else
+   {
+      fAllocSize_ = pitchedPtr_.pitch / sizeof(T);
+      xAllocSize_ = xSize_ + 2 * nrOfGhostLayers_;
+   }
 }
 
 
@@ -242,15 +257,7 @@ bool GPUField<T>::operator==( const GPUField & o ) const
 template<typename T>
 uint_t  GPUField<T>::xAllocSize() const
 {
-   if ( layout_ == field::fzyx )
-   {
-      // allocation size is stored in pitched pointer
-      // pitched pointer stores the amount of padded region in bytes
-      // but we have to return the size in #elements
-      WALBERLA_ASSERT_EQUAL( pitchedPtr_.pitch % sizeof(T), 0 );
-      return pitchedPtr_.pitch / sizeof(T);
-   }
-   return xSize_ + 2 * nrOfGhostLayers_;
+   return xAllocSize_;
 }
 
 template<typename T>
@@ -268,12 +275,7 @@ uint_t  GPUField<T>::zAllocSize() const
 template<typename T>
 uint_t GPUField<T>::fAllocSize() const
 {
-   if ( layout_ == field::zyxf )
-   {
-      WALBERLA_ASSERT_EQUAL( pitchedPtr_.pitch % sizeof(T), 0 );
-      return pitchedPtr_.pitch / sizeof(T);
-   }
-   return fSize_;
+   return fAllocSize_;
 }
 
 template<typename T>
-- 
GitLab