From db0214ca77a2f19987d0377b1a829bac48702d29 Mon Sep 17 00:00:00 2001
From: Martin Bauer <martin.bauer@fau.de>
Date: Tue, 30 May 2017 09:54:08 +0200
Subject: [PATCH] Python export for GPUFields and interface to pycuda

---
 apps/pythonmodule/CMakeLists.txt     |  13 +-
 apps/pythonmodule/PythonModule.cpp   |  10 +
 python/waLBerla/__init__.py          |   5 +-
 python/waLBerla/cuda_extension.py    |  15 ++
 src/cuda/AddGPUFieldToStorage.impl.h |   1 +
 src/cuda/CMakeLists.txt              |   2 +-
 src/cuda/GPUField.cpp                |  15 +-
 src/cuda/GPUField.h                  |  20 +-
 src/cuda/python/Exports.h            |  43 ++++
 src/cuda/python/Exports.impl.h       | 285 +++++++++++++++++++++++++++
 src/waLBerlaDefinitions.in.h         |   2 +
 11 files changed, 401 insertions(+), 10 deletions(-)
 create mode 100644 python/waLBerla/cuda_extension.py
 create mode 100644 src/cuda/python/Exports.h
 create mode 100644 src/cuda/python/Exports.impl.h

diff --git a/apps/pythonmodule/CMakeLists.txt b/apps/pythonmodule/CMakeLists.txt
index b52d000d..b4d92d77 100644
--- a/apps/pythonmodule/CMakeLists.txt
+++ b/apps/pythonmodule/CMakeLists.txt
@@ -2,13 +2,18 @@
 
 
 if ( WALBERLA_BUILD_WITH_PYTHON_MODULE )
-    
+
+    set(PYTHON_MODULE_DEPENDENCIES blockforest boundary domain_decomposition core field geometry lbm postprocessing python_coupling timeloop vtk)
+    if (WALBERLA_BUILD_WITH_CUDA)
+        set(PYTHON_MODULE_DEPENDENCIES ${PYTHON_MODULE_DEPENDENCIES} cuda)
+    endif()
+
     if( WALBERLA_CXX_COMPILER_IS_MSVC )
-       set ( pythonModules blockforest boundary domain_decomposition core field geometry lbm postprocessing python_coupling timeloop vtk)
+       set ( pythonModules ${PYTHON_MODULE_DEPENDENCIES})
     elseif( APPLE )
-       set ( pythonModules "-Wl,-force_load" blockforest boundary domain_decomposition core field geometry lbm postprocessing python_coupling timeloop vtk)
+       set ( pythonModules "-Wl,-force_load" ${PYTHON_MODULE_DEPENDENCIES})
     else()
-       set ( pythonModules "-Wl,-whole-archive" blockforest boundary domain_decomposition core field geometry lbm postprocessing python_coupling timeloop vtk  "-Wl,-no-whole-archive" )
+       set ( pythonModules "-Wl,-whole-archive" ${PYTHON_MODULE_DEPENDENCIES}  "-Wl,-no-whole-archive" )
     endif()
 
     if( WALBERLA_BUILD_WITH_PYTHON_LBM )
diff --git a/apps/pythonmodule/PythonModule.cpp b/apps/pythonmodule/PythonModule.cpp
index 9d6791c0..a648d08c 100644
--- a/apps/pythonmodule/PythonModule.cpp
+++ b/apps/pythonmodule/PythonModule.cpp
@@ -30,10 +30,15 @@
 #include "timeloop/python/Exports.h"
 #include "vtk/python/Exports.h"
 
+#ifdef WALBERLA_BUILD_WITH_CUDA
+#include "cuda/python/Exports.h"
+#endif
+
 #include <boost/mpl/vector.hpp>
 #include <boost/mpl/insert_range.hpp>
 
 
+
 namespace bmpl = boost::mpl;
 using namespace walberla;
 
@@ -62,6 +67,7 @@ typedef bmpl::vector<
             Field<walberla::uint32_t,1>
       > FieldTypes;
 
+typedef bmpl::vector<double, float, int, uint8_t, uint16_t>CudaFieldTypes;
 
 typedef bmpl::vector<
                       GhostLayerField<walberla::real_t,1>,
@@ -111,6 +117,10 @@ struct InitObject
       // Timeloop
       pythonManager->addExporterFunction( timeloop::exportModuleToPython );
 
+#ifdef WALBERLA_BUILD_WITH_CUDA
+      pythonManager->addExporterFunction( cuda::exportModuleToPython<CudaFieldTypes> );
+#endif
+
       python_coupling::initWalberlaForPythonModule();
    }
 };
diff --git a/python/waLBerla/__init__.py b/python/waLBerla/__init__.py
index 622f88ab..93244d0a 100644
--- a/python/waLBerla/__init__.py
+++ b/python/waLBerla/__init__.py
@@ -28,7 +28,10 @@ if cpp_available:
         # extend the C++ module with some python functions
         from .field_extension import extend as extend_field
         extend_field( field     )
-
+    if 'cuda' in globals():
+        sys.modules[__name__ + '.cuda'] = cuda
+        from .cuda_extension import extend as extend_cuda
+        extend_cuda( cuda )
     if 'geometry' in globals():
         sys.modules[__name__ + '.geometry'] = geometry
     if 'lbm' in globals():
diff --git a/python/waLBerla/cuda_extension.py b/python/waLBerla/cuda_extension.py
new file mode 100644
index 00000000..be218d11
--- /dev/null
+++ b/python/waLBerla/cuda_extension.py
@@ -0,0 +1,15 @@
+from pycuda.gpuarray import GPUArray
+import numpy as np
+
+def toGpuArray(f):
+    """Converts a waLBerla GPUField to a pycuda GPUArray"""
+    if not f:
+        return None
+    dtype = np.dtype(f.dtypeStr)
+    strides = [dtype.itemsize*a for a in f.strides]
+    return GPUArray(f.sizeWithGhostLayers, dtype, gpudata=f.ptr, strides=strides)
+
+
+def extend(cppCudaModule):
+    cppCudaModule.toGpuArray = toGpuArray
+
diff --git a/src/cuda/AddGPUFieldToStorage.impl.h b/src/cuda/AddGPUFieldToStorage.impl.h
index f007181a..03b90c72 100644
--- a/src/cuda/AddGPUFieldToStorage.impl.h
+++ b/src/cuda/AddGPUFieldToStorage.impl.h
@@ -21,6 +21,7 @@
 
 #pragma once
 
+#include "cuda/FieldCopy.h"
 
 namespace walberla {
 namespace cuda {
diff --git a/src/cuda/CMakeLists.txt b/src/cuda/CMakeLists.txt
index a4c149c3..83db2151 100644
--- a/src/cuda/CMakeLists.txt
+++ b/src/cuda/CMakeLists.txt
@@ -4,6 +4,6 @@
 #
 ###################################################################################################
 
-waLBerla_add_module( DEPENDS core communication domain_decomposition field stencil BUILD_ONLY_IF_FOUND CUDA ) 
+waLBerla_add_module( DEPENDS core communication domain_decomposition python_coupling field stencil BUILD_ONLY_IF_FOUND CUDA )
 
 ###################################################################################################                        
\ No newline at end of file
diff --git a/src/cuda/GPUField.cpp b/src/cuda/GPUField.cpp
index 8d2b51ed..fe7c3ed9 100644
--- a/src/cuda/GPUField.cpp
+++ b/src/cuda/GPUField.cpp
@@ -124,12 +124,23 @@ void GPUField<T>::getSlice(stencil::Direction d, CellInterval & ci,
    }
 }
 
+template<typename T>
+inline uint_t GPUField<T>::size( uint_t coord ) const
+{
+   switch (coord) {
+      case 0: return this->xSize();
+      case 1: return this->ySize();
+      case 2: return this->zSize();
+      case 3: return this->fSize();
+      default: WALBERLA_ASSERT(false); return 0;
+   }
+}
 
 //*******************************************************************************************************************
 /*! True if sizes of all dimensions match
  *******************************************************************************************************************/
 template<typename T>
-inline bool GPUField<T>::hasSameSize( const GPUField<T> & other ) const
+bool GPUField<T>::hasSameSize( const GPUField<T> & other ) const
 {
    return xSize() == other.xSize() &&
           ySize() == other.ySize() &&
@@ -140,7 +151,7 @@ inline bool GPUField<T>::hasSameSize( const GPUField<T> & other ) const
 /*! True if allocation sizes of all dimensions match
  *******************************************************************************************************************/
 template<typename T>
-inline bool GPUField<T>::hasSameAllocSize( const GPUField<T> & other ) const
+bool GPUField<T>::hasSameAllocSize( const GPUField<T> & other ) const
 {
    return xAllocSize() == other.xAllocSize() &&
           yAllocSize() == other.yAllocSize() &&
diff --git a/src/cuda/GPUField.h b/src/cuda/GPUField.h
index 3153aba6..aa059b53 100755
--- a/src/cuda/GPUField.h
+++ b/src/cuda/GPUField.h
@@ -79,11 +79,27 @@ namespace cuda {
       inline uint_t  zSize() const  { return zSize_; }
       inline uint_t  fSize() const  { return fSize_; }
       inline uint_t  size()  const  { return fSize() * xSize() * ySize() * zSize(); }
+      inline uint_t  size( uint_t coord )  const;
+
+      inline uint_t       xSizeWithGhostLayer()        const  { return xSize() + uint_c(2)*nrOfGhostLayers_; }
+      inline uint_t       ySizeWithGhostLayer()        const  { return ySize() + uint_c(2)*nrOfGhostLayers_; }
+      inline uint_t       zSizeWithGhostLayer()        const  { return zSize() + uint_c(2)*nrOfGhostLayers_; }
+      inline uint_t       sizeWithGhostLayer(uint_t i) const  { return i==3 ? fSize_ :
+                                                                              size(i) + uint_c(2)*nrOfGhostLayers_; }
 
       cell_idx_t xOff() const { return cell_idx_c( nrOfGhostLayers_ ); }
       cell_idx_t yOff() const { return cell_idx_c( nrOfGhostLayers_ ); }
       cell_idx_t zOff() const { return cell_idx_c( nrOfGhostLayers_ ); }
 
+      cell_idx_t xStride() const { return (layout_ == fzyx) ? cell_idx_t(1) :
+                                                              cell_idx_c(fAllocSize()); }
+      cell_idx_t yStride() const { return (layout_ == fzyx) ? cell_idx_t(xAllocSize()) :
+                                                              cell_idx_c(fAllocSize() * xAllocSize()); }
+      cell_idx_t zStride() const { return (layout_ == fzyx) ? cell_idx_t(xAllocSize() * yAllocSize()) :
+                                                              cell_idx_c(fAllocSize() * xAllocSize() * yAllocSize()); }
+      cell_idx_t fStride() const { return (layout_ == fzyx) ? cell_idx_t(xAllocSize() * yAllocSize() * zAllocSize()) :
+                                                              cell_idx_c(1); }
+
 
       uint_t xAllocSize() const;
       uint_t yAllocSize() const;
@@ -91,8 +107,8 @@ namespace cuda {
       uint_t fAllocSize() const;
       inline uint_t allocSize() const { return fAllocSize() * xAllocSize() * yAllocSize() * zAllocSize(); }
 
-      inline bool hasSameAllocSize( const GPUField<T> & other ) const;
-      inline bool hasSameSize( const GPUField<T> & other ) const;
+      bool hasSameAllocSize( const GPUField<T> & other ) const;
+      bool hasSameSize( const GPUField<T> & other ) const;
 
       GPUField<T> * cloneUninitialized() const;
 
diff --git a/src/cuda/python/Exports.h b/src/cuda/python/Exports.h
new file mode 100644
index 00000000..f4cb588a
--- /dev/null
+++ b/src/cuda/python/Exports.h
@@ -0,0 +1,43 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file FieldExport.h
+//! \ingroup cuda
+//! \author Martin Bauer <martin.bauer@fau.de>
+//
+//======================================================================================================================
+
+#pragma once
+
+#ifdef WALBERLA_BUILD_WITH_PYTHON
+
+
+#include <string>
+
+namespace walberla {
+namespace cuda {
+
+
+   template<typename DataTypes >
+   void exportModuleToPython();
+
+
+} // namespace cuda
+} // namespace walberla
+
+#include "Exports.impl.h"
+
+
+#endif //WALBERLA_BUILD_WITH_PYTHON
diff --git a/src/cuda/python/Exports.impl.h b/src/cuda/python/Exports.impl.h
new file mode 100644
index 00000000..8cd714ed
--- /dev/null
+++ b/src/cuda/python/Exports.impl.h
@@ -0,0 +1,285 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file FieldExport.cpp
+//! \ingroup cuda
+//! \author Martin Bauer <martin.bauer@fau.de>
+//
+//======================================================================================================================
+
+// Do not reorder includes - the include order is important
+#include "python_coupling/PythonWrapper.h"
+
+#include "core/logging/Logging.h"
+#include "cuda/GPUField.h"
+#include "cuda/communication/GPUPackInfo.h"
+#include "cuda/AddGPUFieldToStorage.h"
+
+#include "field/communication/UniformMPIDatatypeInfo.h"
+
+#include "field/AddToStorage.h"
+#include "field/python/FieldExport.h"
+
+#include "python_coupling/helper/MplHelpers.h"
+#include "python_coupling/helper/BoostPythonHelpers.h"
+
+#include <boost/type_traits/is_unsigned.hpp>
+
+#include <iostream>
+
+namespace walberla {
+namespace cuda {
+
+
+
+namespace internal {
+
+   //===================================================================================================================
+   //
+   //  Field export
+   //
+   //===================================================================================================================
+
+   template<typename GpuField_T>
+   uint64_t gpufield_ptr(const GpuField_T & gpuField)
+   {
+      return reinterpret_cast<uint64_t>(gpuField.pitchedPtr().ptr);
+   }
+
+   template<typename GpuField_T>
+   std::string gpufield_dtypeStr(const GpuField_T & )
+   {
+      return std::string(field::internal::PythonFormatString<typename GpuField_T::value_type>::get());
+   }
+
+   struct GpuFieldExporter
+   {
+      template< typename DataType>
+      void operator() ( DataType )
+      {
+         typedef GPUField<DataType> GpuField_T;
+
+         using namespace boost::python;
+
+         class_<GpuField_T, shared_ptr<GpuField_T>, boost::noncopyable>( "GpuField", no_init )
+            .add_property("layout",              &field::internal::field_layout            < GpuField_T > )
+            .add_property("size",                &field::internal::field_size              < GpuField_T > )
+            .add_property("sizeWithGhostLayers", &field::internal::field_sizeWithGhostLayer< GpuField_T > )
+            .add_property("allocSize",           &field::internal::field_allocSize         < GpuField_T > )
+            .add_property("strides",             &field::internal::field_strides           < GpuField_T > )
+            .add_property("offsets",             &field::internal::field_offsets           < GpuField_T > )
+            .add_property("ptr",                 &gpufield_ptr                             < GpuField_T > )
+            .add_property("dtypeStr",            &gpufield_dtypeStr                        < GpuField_T > )
+            .def("swapDataPointers",             &field::internal::field_swapDataPointers  < GpuField_T > )
+            .add_property("nrOfGhostLayers",     &GpuField_T::nrOfGhostLayers )
+            .def("cloneUninitialized", &GpuField_T::cloneUninitialized, return_value_policy<manage_new_object>())
+            ;
+
+
+         using field::communication::PackInfo;
+         using communication::GPUPackInfo;
+         class_< GPUPackInfo<GpuField_T>,
+                 shared_ptr< GPUPackInfo<GpuField_T> >,
+                 bases<walberla::communication::UniformPackInfo>,
+                 boost::noncopyable >( "GpuFieldPackInfo", no_init );
+
+
+         using field::communication::UniformMPIDatatypeInfo;
+         class_< UniformMPIDatatypeInfo<GpuField_T>,
+                 shared_ptr< UniformMPIDatatypeInfo<GpuField_T> >,
+                 bases<walberla::communication::UniformMPIDatatypeInfo>,
+                 boost::noncopyable >( "GpuFieldMPIDataTypeInfo", no_init );
+
+      }
+   };
+
+
+   //===================================================================================================================
+   //
+   //  createField
+   //
+   //===================================================================================================================
+
+   class CreateFieldExporter
+   {
+   public:
+      CreateFieldExporter( uint_t xs, uint_t ys, uint_t zs, uint_t fs, uint_t gl,
+                           Layout layout, const boost::python::object & type, bool usePitchedMem,
+                           const shared_ptr<boost::python::object> & resultPointer )
+         : xs_( xs ), ys_(ys), zs_(zs), fs_(fs), gl_(gl),
+           layout_( layout),  type_( type ), usePitchedMem_( usePitchedMem ) , resultPointer_( resultPointer )
+      {}
+
+      template< typename T>
+      void operator() ( T )
+      {
+         using namespace boost::python;
+
+         if( python_coupling::isCppEqualToPythonType<T>( (PyTypeObject *)type_.ptr() )  )
+         {
+            *resultPointer_ = object( make_shared< GPUField<T> >( xs_,ys_,zs_, fs_,  gl_, layout_, usePitchedMem_ )  );
+         }
+      }
+
+   private:
+      uint_t xs_;
+      uint_t ys_;
+      uint_t zs_;
+      uint_t fs_;
+      uint_t gl_;
+      Layout layout_;
+      boost::python::object type_;
+      bool usePitchedMem_;
+      shared_ptr<boost::python::object> resultPointer_;
+   };
+
+   template<typename DataTypes>
+   boost::python::object createPythonGpuField( boost::python::list size,
+                                               boost::python::object type,
+                                               uint_t ghostLayers,
+                                               Layout layout,
+                                               bool usePitchedMem)
+   {
+      using namespace boost::python;
+      uint_t xSize = extract<uint_t> ( size[0] );
+      uint_t ySize = extract<uint_t> ( size[1] );
+      uint_t zSize = extract<uint_t> ( size[2] );
+      uint_t sizeLen = uint_c( len( size ) );
+      uint_t fSize = 1;
+      if ( sizeLen == 4 )
+         fSize = extract<uint_t> ( size[3] );
+
+      if ( ! PyType_Check( type.ptr() ) ) {
+         PyErr_SetString( PyExc_RuntimeError, "Invalid 'type' parameter");
+         throw error_already_set();
+      }
+
+      auto result = make_shared<boost::python::object>();
+      CreateFieldExporter exporter( xSize,ySize, zSize, fSize, ghostLayers, layout, type, usePitchedMem, result );
+      boost::mpl::for_each< DataTypes >  ( exporter );
+
+      if ( *result == object()  )
+      {
+         PyErr_SetString( PyExc_ValueError, "Cannot create field of this type");
+         throw error_already_set();
+      }
+      else {
+         return *result;
+      }
+   }
+
+
+   //===================================================================================================================
+   //
+   //  addToStorage
+   //
+   //===================================================================================================================
+
+   class AddToStorageExporter
+   {
+   public:
+      AddToStorageExporter( const shared_ptr<StructuredBlockStorage> & blocks,
+                           const std::string & name, uint_t fs, uint_t gl, Layout layout,
+                           const boost::python::object & type,
+                           bool usePitchedMem )
+         : blocks_( blocks ), name_( name ), fs_( fs ),
+           gl_(gl),layout_( layout),  type_( type ), usePitchedMem_(usePitchedMem), found_(false)
+      {}
+
+      template< typename T>
+      void operator() ( T )
+      {
+         if( python_coupling::isCppEqualToPythonType<T>( (PyTypeObject *)type_.ptr() )  )
+         {
+            WALBERLA_ASSERT(!found_);
+            addGPUFieldToStorage<GPUField<T> >(blocks_, name_, fs_, layout_, gl_, usePitchedMem_);
+            found_ = true;
+         }
+      }
+
+      bool successful() const { return found_; }
+   private:
+      shared_ptr< StructuredBlockStorage > blocks_;
+      std::string name_;
+      uint_t fs_;
+      uint_t gl_;
+      Layout layout_;
+      boost::python::object type_;
+      bool usePitchedMem_;
+      bool found_;
+   };
+
+   template<typename DataTypes>
+   void addToStorage( const shared_ptr<StructuredBlockStorage> & blocks, const std::string & name,
+                      boost::python::object type, uint_t fs, uint_t gl, Layout layout, bool usePitchedMem )
+   {
+      using namespace boost::python;
+
+      if ( ! PyType_Check( type.ptr() ) ) {
+         PyErr_SetString( PyExc_RuntimeError, "Invalid 'type' parameter");
+         throw error_already_set();
+      }
+
+      auto result = make_shared<boost::python::object>();
+      AddToStorageExporter exporter( blocks, name, fs, gl, layout, type, usePitchedMem );
+      boost::mpl::for_each<DataTypes>( boost::ref(exporter) );
+
+      if ( ! exporter.successful() ) {
+         PyErr_SetString( PyExc_ValueError, "Adding Field failed.");
+         throw error_already_set();
+      }
+   }
+
+
+
+} // namespace internal
+
+
+
+
+template<typename FieldTypes >
+void exportModuleToPython()
+{
+   python_coupling::ModuleScope fieldModule( "cuda" );
+
+   using namespace boost::python;
+
+   boost::mpl::for_each<FieldTypes>( internal::GpuFieldExporter() );
+
+   def( "createGpuField", &internal::createPythonGpuField<FieldTypes>, ( ( arg("size")                     ),
+                                                                         ( arg("type")                     ),
+                                                                         ( arg("ghostLayers") = uint_t(1)  ),
+                                                                         ( arg("layout")      = field::zyxf),
+                                                                         ( arg("usePitchedMem") = true     )  ) );
+
+
+   def( "addGpuFieldToStorage",  &internal::addToStorage<FieldTypes>, ( ( arg("blocks")                  ),
+                                                                        ( arg("name")                    ),
+                                                                        ( arg("type")                    ),
+                                                                        ( arg("fSize")       = 1         ),
+                                                                        ( arg("ghostLayers") = uint_t(1) ),
+                                                                        ( arg("layout")      = field::zyxf      ),
+                                                                        ( arg("usePitchedMem") = object()  ) ) );
+
+}
+
+
+
+
+
+} // namespace cuda
+} // namespace walberla
+
+
diff --git a/src/waLBerlaDefinitions.in.h b/src/waLBerlaDefinitions.in.h
index 9964b39f..dc8a5ce6 100644
--- a/src/waLBerlaDefinitions.in.h
+++ b/src/waLBerlaDefinitions.in.h
@@ -29,6 +29,8 @@
 
 #cmakedefine WALBERLA_BUILD_WITH_FFT
 
+#cmakedefine WALBERLA_BUILD_WITH_CUDA
+
 #cmakedefine WALBERLA_BUFFER_DEBUG
 
 #cmakedefine WALBERLA_THREAD_SAFE_LOGGING
-- 
GitLab