From 5f8068294d4acae46259644e9118b6369b349f6b Mon Sep 17 00:00:00 2001
From: Martin Bauer <martin.bauer@fau.de>
Date: Tue, 30 May 2017 13:39:01 +0200
Subject: [PATCH] CUDA: removed explicit instantiation of GPUFields

- easier Python export
---
 apps/pythonmodule/PythonModule.cpp            |   7 +-
 src/cuda/FieldIndexing.h                      |   1 +
 ...FieldIndexing.cpp => FieldIndexing.impl.h} |   5 -
 src/cuda/FieldIndexing3D.h                    |   1 +
 ...dIndexing3D.cpp => FieldIndexing3D.impl.h} |   5 -
 src/cuda/FieldIndexingXYZ.h                   |   1 +
 ...ndexingXYZ.cpp => FieldIndexingXYZ.impl.h} |   3 -
 src/cuda/GPUField.h                           |   1 +
 src/cuda/{GPUField.cpp => GPUField.impl.h}    |   4 -
 src/cuda/GPUTypesExplicitInstantiation.h      |   8 --
 src/cuda/python/Exports.h                     |   2 +-
 src/cuda/python/Exports.impl.h                | 109 +++++++++++++++---
 12 files changed, 102 insertions(+), 45 deletions(-)
 rename src/cuda/{FieldIndexing.cpp => FieldIndexing.impl.h} (98%)
 rename src/cuda/{FieldIndexing3D.cpp => FieldIndexing3D.impl.h} (98%)
 rename src/cuda/{FieldIndexingXYZ.cpp => FieldIndexingXYZ.impl.h} (97%)
 rename src/cuda/{GPUField.cpp => GPUField.impl.h} (99%)
 delete mode 100644 src/cuda/GPUTypesExplicitInstantiation.h

diff --git a/apps/pythonmodule/PythonModule.cpp b/apps/pythonmodule/PythonModule.cpp
index a648d08c..82fab2e0 100644
--- a/apps/pythonmodule/PythonModule.cpp
+++ b/apps/pythonmodule/PythonModule.cpp
@@ -67,7 +67,6 @@ typedef bmpl::vector<
             Field<walberla::uint32_t,1>
       > FieldTypes;
 
-typedef bmpl::vector<double, float, int, uint8_t, uint16_t>CudaFieldTypes;
 
 typedef bmpl::vector<
                       GhostLayerField<walberla::real_t,1>,
@@ -118,7 +117,11 @@ struct InitObject
       pythonManager->addExporterFunction( timeloop::exportModuleToPython );
 
 #ifdef WALBERLA_BUILD_WITH_CUDA
-      pythonManager->addExporterFunction( cuda::exportModuleToPython<CudaFieldTypes> );
+      using walberla::cuda::GPUField;
+      typedef bmpl::vector<GPUField<double>, GPUField<float>, GPUField<int>, GPUField<uint8_t>, GPUField<uint16_t> > GPUFields;
+
+      pythonManager->addExporterFunction( cuda::exportModuleToPython<GPUFields> );
+      pythonManager->addBlockDataConversion<GPUFields>();
 #endif
 
       python_coupling::initWalberlaForPythonModule();
diff --git a/src/cuda/FieldIndexing.h b/src/cuda/FieldIndexing.h
index 653a5de2..7ed089f4 100644
--- a/src/cuda/FieldIndexing.h
+++ b/src/cuda/FieldIndexing.h
@@ -91,4 +91,5 @@ namespace cuda {
 } // namespace cuda
 } // namespace walberla
 
+#include "FieldIndexing.impl.h"
 
diff --git a/src/cuda/FieldIndexing.cpp b/src/cuda/FieldIndexing.impl.h
similarity index 98%
rename from src/cuda/FieldIndexing.cpp
rename to src/cuda/FieldIndexing.impl.h
index 413bbe1a..c4837d3c 100644
--- a/src/cuda/FieldIndexing.cpp
+++ b/src/cuda/FieldIndexing.impl.h
@@ -20,7 +20,6 @@
 //======================================================================================================================
 
 #include "FieldIndexing.h"
-#include "GPUTypesExplicitInstantiation.h"
 #include "GPUField.h"
 
 #include "core/cell/CellInterval.h"
@@ -224,10 +223,6 @@ FieldIndexing<T> FieldIndexing<T>::all ( const GPUField<T> & f, const cell::Cell
 
 
 
-
-GPU_CLASS_TEMPLATE_INSTANTIATION( FieldIndexing )
-
-
 } // namespace cuda
 } // namespace walberla
 
diff --git a/src/cuda/FieldIndexing3D.h b/src/cuda/FieldIndexing3D.h
index 0dbe9756..c6637ec6 100644
--- a/src/cuda/FieldIndexing3D.h
+++ b/src/cuda/FieldIndexing3D.h
@@ -103,3 +103,4 @@ namespace cuda {
 } // namespace walberla
 
 
+#include "FieldIndexing.impl.h"
\ No newline at end of file
diff --git a/src/cuda/FieldIndexing3D.cpp b/src/cuda/FieldIndexing3D.impl.h
similarity index 98%
rename from src/cuda/FieldIndexing3D.cpp
rename to src/cuda/FieldIndexing3D.impl.h
index 5a797a23..896f7e1d 100644
--- a/src/cuda/FieldIndexing3D.cpp
+++ b/src/cuda/FieldIndexing3D.impl.h
@@ -20,7 +20,6 @@
 //======================================================================================================================
 
 #include "FieldIndexing3D.h"
-#include "GPUTypesExplicitInstantiation.h"
 #include "GPUField.h"
 
 #include "core/cell/CellInterval.h"
@@ -165,10 +164,6 @@ FieldIndexing3D<T> FieldIndexing3D<T>::intervalXYZ( const GPUField<T> & f, const
 
 
 
-
-GPU_CLASS_TEMPLATE_INSTANTIATION( FieldIndexing3D )
-
-
 } // namespace cuda
 } // namespace walberla
 
diff --git a/src/cuda/FieldIndexingXYZ.h b/src/cuda/FieldIndexingXYZ.h
index 2c25975e..18a6e264 100644
--- a/src/cuda/FieldIndexingXYZ.h
+++ b/src/cuda/FieldIndexingXYZ.h
@@ -77,3 +77,4 @@ template< typename T> class GPUField;
 } // namespace walberla
 
 
+#include "FieldIndexingXYZ.impl.h"
\ No newline at end of file
diff --git a/src/cuda/FieldIndexingXYZ.cpp b/src/cuda/FieldIndexingXYZ.impl.h
similarity index 97%
rename from src/cuda/FieldIndexingXYZ.cpp
rename to src/cuda/FieldIndexingXYZ.impl.h
index 8cc0bd63..c8ec561f 100644
--- a/src/cuda/FieldIndexingXYZ.cpp
+++ b/src/cuda/FieldIndexingXYZ.impl.h
@@ -20,7 +20,6 @@
 //======================================================================================================================
 
 #include "FieldIndexingXYZ.h"
-#include "GPUTypesExplicitInstantiation.h"
 #include "GPUField.h"
 
 #include "core/cell/CellInterval.h"
@@ -114,8 +113,6 @@ FieldIndexingXYZ<T> FieldIndexingXYZ<T>::withGhostLayerXYZ( const GPUField<T> &
 }
 
 
-GPU_CLASS_TEMPLATE_INSTANTIATION( FieldIndexingXYZ )
-
 
 } // namespace cuda
 } // namespace walberla
diff --git a/src/cuda/GPUField.h b/src/cuda/GPUField.h
index aa059b53..437fe1c9 100755
--- a/src/cuda/GPUField.h
+++ b/src/cuda/GPUField.h
@@ -149,3 +149,4 @@ namespace cuda {
 } // namespace walberla
 
 
+#include "GPUField.impl.h"
\ No newline at end of file
diff --git a/src/cuda/GPUField.cpp b/src/cuda/GPUField.impl.h
similarity index 99%
rename from src/cuda/GPUField.cpp
rename to src/cuda/GPUField.impl.h
index fe7c3ed9..b6fe3f8a 100644
--- a/src/cuda/GPUField.cpp
+++ b/src/cuda/GPUField.impl.h
@@ -21,7 +21,6 @@
 
 #include "GPUField.h"
 #include "ErrorChecking.h"
-#include "GPUTypesExplicitInstantiation.h"
 
 #include "core/logging/Logging.h"
 
@@ -249,9 +248,6 @@ void GPUField<T>::swapDataPointers( GPUField<T> & other )
 
 
 
-GPU_CLASS_TEMPLATE_INSTANTIATION( GPUField )
-
-
 } // namespace cuda
 } // namespace walberla
 
diff --git a/src/cuda/GPUTypesExplicitInstantiation.h b/src/cuda/GPUTypesExplicitInstantiation.h
deleted file mode 100644
index bdc4b584..00000000
--- a/src/cuda/GPUTypesExplicitInstantiation.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#define GPU_CLASS_TEMPLATE_INSTANTIATION(ClassName)\
-   template class ClassName< double   >;\
-   template class ClassName< float    >;\
-   template class ClassName< int      >;\
-   template class ClassName< uint8_t  >;\
-   template class ClassName< uint16_t >;
-
-
diff --git a/src/cuda/python/Exports.h b/src/cuda/python/Exports.h
index f4cb588a..a2990cfc 100644
--- a/src/cuda/python/Exports.h
+++ b/src/cuda/python/Exports.h
@@ -30,7 +30,7 @@ namespace walberla {
 namespace cuda {
 
 
-   template<typename DataTypes >
+   template<typename GpuFields >
    void exportModuleToPython();
 
 
diff --git a/src/cuda/python/Exports.impl.h b/src/cuda/python/Exports.impl.h
index 8cd714ed..1d2e2dc2 100644
--- a/src/cuda/python/Exports.impl.h
+++ b/src/cuda/python/Exports.impl.h
@@ -38,6 +38,7 @@
 #include <boost/type_traits/is_unsigned.hpp>
 
 #include <iostream>
+#include <cuda/communication/GPUPackInfo.h>
 
 namespace walberla {
 namespace cuda {
@@ -66,11 +67,9 @@ namespace internal {
 
    struct GpuFieldExporter
    {
-      template< typename DataType>
-      void operator() ( DataType )
+      template< typename GpuField_T>
+      void operator() ( python_coupling::NonCopyableWrap<GpuField_T> )
       {
-         typedef GPUField<DataType> GpuField_T;
-
          using namespace boost::python;
 
          class_<GpuField_T, shared_ptr<GpuField_T>, boost::noncopyable>( "GpuField", no_init )
@@ -122,11 +121,11 @@ namespace internal {
            layout_( layout),  type_( type ), usePitchedMem_( usePitchedMem ) , resultPointer_( resultPointer )
       {}
 
-      template< typename T>
-      void operator() ( T )
+      template< typename GpuField_T>
+      void operator() ( python_coupling::NonCopyableWrap<GpuField_T> )
       {
          using namespace boost::python;
-
+         typedef typename GpuField_T::value_type T;
          if( python_coupling::isCppEqualToPythonType<T>( (PyTypeObject *)type_.ptr() )  )
          {
             *resultPointer_ = object( make_shared< GPUField<T> >( xs_,ys_,zs_, fs_,  gl_, layout_, usePitchedMem_ )  );
@@ -145,7 +144,7 @@ namespace internal {
       shared_ptr<boost::python::object> resultPointer_;
    };
 
-   template<typename DataTypes>
+   template<typename GpuFields>
    boost::python::object createPythonGpuField( boost::python::list size,
                                                boost::python::object type,
                                                uint_t ghostLayers,
@@ -168,7 +167,7 @@ namespace internal {
 
       auto result = make_shared<boost::python::object>();
       CreateFieldExporter exporter( xSize,ySize, zSize, fSize, ghostLayers, layout, type, usePitchedMem, result );
-      boost::mpl::for_each< DataTypes >  ( exporter );
+      python_coupling::for_each_noncopyable_type< GpuFields >( exporter );
 
       if ( *result == object()  )
       {
@@ -198,9 +197,10 @@ namespace internal {
            gl_(gl),layout_( layout),  type_( type ), usePitchedMem_(usePitchedMem), found_(false)
       {}
 
-      template< typename T>
-      void operator() ( T )
+      template< typename GpuField_T>
+      void operator() ( python_coupling::NonCopyableWrap<GpuField_T> )
       {
+         typedef typename GpuField_T::value_type T;
          if( python_coupling::isCppEqualToPythonType<T>( (PyTypeObject *)type_.ptr() )  )
          {
             WALBERLA_ASSERT(!found_);
@@ -221,7 +221,7 @@ namespace internal {
       bool found_;
    };
 
-   template<typename DataTypes>
+   template<typename GpuFields>
    void addToStorage( const shared_ptr<StructuredBlockStorage> & blocks, const std::string & name,
                       boost::python::object type, uint_t fs, uint_t gl, Layout layout, bool usePitchedMem )
    {
@@ -234,7 +234,7 @@ namespace internal {
 
       auto result = make_shared<boost::python::object>();
       AddToStorageExporter exporter( blocks, name, fs, gl, layout, type, usePitchedMem );
-      boost::mpl::for_each<DataTypes>( boost::ref(exporter) );
+      python_coupling::for_each_noncopyable_type<GpuFields>( boost::ref(exporter) );
 
       if ( ! exporter.successful() ) {
          PyErr_SetString( PyExc_ValueError, "Adding Field failed.");
@@ -243,29 +243,101 @@ namespace internal {
    }
 
 
+   //===================================================================================================================
+   //
+   //  createPackInfo Export
+   //
+   //===================================================================================================================
+
+   template< typename GPUField_T >
+   boost::python::object createGPUPackInfoToObject( BlockDataID bdId, uint_t numberOfGhostLayers )
+   {
+      using cuda::communication::GPUPackInfo;
+      if ( numberOfGhostLayers > 0  )
+         return boost::python::object( make_shared< GPUPackInfo<GPUField_T> >( bdId, numberOfGhostLayers ) );
+      else
+         return boost::python::object( make_shared< GPUPackInfo<GPUField_T> >( bdId ) );
+   }
+
+   FunctionExporterClass( createGPUPackInfoToObject, boost::python::object( BlockDataID, uint_t  ) );
+
+   template< typename GpuFields>
+   boost::python::object createPackInfo( const shared_ptr<StructuredBlockStorage> & bs,
+                                         const std::string & blockDataName, uint_t numberOfGhostLayers )
+   {
+      using cuda::communication::GPUPackInfo;
+
+      auto bdId = python_coupling::blockDataIDFromString( *bs, blockDataName );
+      if ( bs->begin() == bs->end() ) {
+         // if no blocks are on this field an arbitrary PackInfo can be returned
+         return createGPUPackInfoToObject< GPUField<real_t> > ( bdId, numberOfGhostLayers );
+      }
+
+      IBlock * firstBlock =  & ( * bs->begin() );
+      python_coupling::Dispatcher<GpuFields, Exporter_createGPUPackInfoToObject > dispatcher( firstBlock );
+      return dispatcher( bdId )( bdId, numberOfGhostLayers ) ;
+   }
+
+
+   //===================================================================================================================
+   //
+   //  createMPIDatatypeInfo
+   //
+   //===================================================================================================================
+
+
+   template< typename GpuField_T >
+   boost::python::object createMPIDatatypeInfoToObject( BlockDataID bdId, uint_t numberOfGhostLayers )
+   {
+      using field::communication::UniformMPIDatatypeInfo;
+      if ( numberOfGhostLayers > 0 )
+         return boost::python::object( make_shared< UniformMPIDatatypeInfo<GpuField_T> >( bdId, numberOfGhostLayers ) );
+      else
+         return boost::python::object( make_shared< UniformMPIDatatypeInfo<GpuField_T> >( bdId ) );
+   }
+
+   FunctionExporterClass( createMPIDatatypeInfoToObject, boost::python::object( BlockDataID, uint_t  ) );
+
+   template< typename GpuFields>
+   boost::python::object createMPIDatatypeInfo( const shared_ptr<StructuredBlockStorage> & bs,
+                                                const std::string & blockDataName,
+                                                uint_t numberOfGhostLayers)
+   {
+      auto bdId = python_coupling::blockDataIDFromString( *bs, blockDataName );
+      if ( bs->begin() == bs->end() ) {
+         // if no blocks are on this field an arbitrary MPIDatatypeInfo can be returned
+         return createMPIDatatypeInfoToObject< GPUField<real_t> > ( bdId, numberOfGhostLayers );
+      }
+
+      IBlock * firstBlock =  & ( * bs->begin() );
+      python_coupling::Dispatcher<GpuFields, Exporter_createMPIDatatypeInfoToObject > dispatcher( firstBlock );
+      return dispatcher( bdId )( bdId, numberOfGhostLayers );
+   }
+
+
 
 } // namespace internal
 
 
 
 
-template<typename FieldTypes >
+template<typename GpuFields >
 void exportModuleToPython()
 {
    python_coupling::ModuleScope fieldModule( "cuda" );
 
    using namespace boost::python;
 
-   boost::mpl::for_each<FieldTypes>( internal::GpuFieldExporter() );
+   python_coupling::for_each_noncopyable_type<GpuFields>( internal::GpuFieldExporter() );
 
-   def( "createGpuField", &internal::createPythonGpuField<FieldTypes>, ( ( arg("size")                     ),
+   def( "createGpuField", &internal::createPythonGpuField<GpuFields>, ( ( arg("size")                     ),
                                                                          ( arg("type")                     ),
                                                                          ( arg("ghostLayers") = uint_t(1)  ),
                                                                          ( arg("layout")      = field::zyxf),
                                                                          ( arg("usePitchedMem") = true     )  ) );
 
 
-   def( "addGpuFieldToStorage",  &internal::addToStorage<FieldTypes>, ( ( arg("blocks")                  ),
+   def( "addGpuFieldToStorage",  &internal::addToStorage<GpuFields>, ( ( arg("blocks")                  ),
                                                                         ( arg("name")                    ),
                                                                         ( arg("type")                    ),
                                                                         ( arg("fSize")       = 1         ),
@@ -273,6 +345,9 @@ void exportModuleToPython()
                                                                         ( arg("layout")      = field::zyxf      ),
                                                                         ( arg("usePitchedMem") = object()  ) ) );
 
+   def( "createMPIDatatypeInfo",&internal::createMPIDatatypeInfo<GpuFields>, ( arg("blocks"), arg("blockDataName"), arg("numberOfGhostLayers" ) =0 ) );
+   def( "createPackInfo",       &internal::createPackInfo<GpuFields>,        ( arg("blocks"), arg("blockDataName"), arg("numberOfGhostLayers" ) =0 ) );
+
 }
 
 
-- 
GitLab