From 5f8068294d4acae46259644e9118b6369b349f6b Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Tue, 30 May 2017 13:39:01 +0200 Subject: [PATCH] CUDA: removed explicit instantiation of GPUFields - easier Python export --- apps/pythonmodule/PythonModule.cpp | 7 +- src/cuda/FieldIndexing.h | 1 + ...FieldIndexing.cpp => FieldIndexing.impl.h} | 5 - src/cuda/FieldIndexing3D.h | 1 + ...dIndexing3D.cpp => FieldIndexing3D.impl.h} | 5 - src/cuda/FieldIndexingXYZ.h | 1 + ...ndexingXYZ.cpp => FieldIndexingXYZ.impl.h} | 3 - src/cuda/GPUField.h | 1 + src/cuda/{GPUField.cpp => GPUField.impl.h} | 4 - src/cuda/GPUTypesExplicitInstantiation.h | 8 -- src/cuda/python/Exports.h | 2 +- src/cuda/python/Exports.impl.h | 109 +++++++++++++++--- 12 files changed, 102 insertions(+), 45 deletions(-) rename src/cuda/{FieldIndexing.cpp => FieldIndexing.impl.h} (98%) rename src/cuda/{FieldIndexing3D.cpp => FieldIndexing3D.impl.h} (98%) rename src/cuda/{FieldIndexingXYZ.cpp => FieldIndexingXYZ.impl.h} (97%) rename src/cuda/{GPUField.cpp => GPUField.impl.h} (99%) delete mode 100644 src/cuda/GPUTypesExplicitInstantiation.h diff --git a/apps/pythonmodule/PythonModule.cpp b/apps/pythonmodule/PythonModule.cpp index a648d08c..82fab2e0 100644 --- a/apps/pythonmodule/PythonModule.cpp +++ b/apps/pythonmodule/PythonModule.cpp @@ -67,7 +67,6 @@ typedef bmpl::vector< Field<walberla::uint32_t,1> > FieldTypes; -typedef bmpl::vector<double, float, int, uint8_t, uint16_t>CudaFieldTypes; typedef bmpl::vector< GhostLayerField<walberla::real_t,1>, @@ -118,7 +117,11 @@ struct InitObject pythonManager->addExporterFunction( timeloop::exportModuleToPython ); #ifdef WALBERLA_BUILD_WITH_CUDA - pythonManager->addExporterFunction( cuda::exportModuleToPython<CudaFieldTypes> ); + using walberla::cuda::GPUField; + typedef bmpl::vector<GPUField<double>, GPUField<float>, GPUField<int>, GPUField<uint8_t>, GPUField<uint16_t> > GPUFields; + + pythonManager->addExporterFunction( cuda::exportModuleToPython<GPUFields> ); + pythonManager->addBlockDataConversion<GPUFields>(); #endif python_coupling::initWalberlaForPythonModule(); diff --git a/src/cuda/FieldIndexing.h b/src/cuda/FieldIndexing.h index 653a5de2..7ed089f4 100644 --- a/src/cuda/FieldIndexing.h +++ b/src/cuda/FieldIndexing.h @@ -91,4 +91,5 @@ namespace cuda { } // namespace cuda } // namespace walberla +#include "FieldIndexing.impl.h" diff --git a/src/cuda/FieldIndexing.cpp b/src/cuda/FieldIndexing.impl.h similarity index 98% rename from src/cuda/FieldIndexing.cpp rename to src/cuda/FieldIndexing.impl.h index 413bbe1a..c4837d3c 100644 --- a/src/cuda/FieldIndexing.cpp +++ b/src/cuda/FieldIndexing.impl.h @@ -20,7 +20,6 @@ //====================================================================================================================== #include "FieldIndexing.h" -#include "GPUTypesExplicitInstantiation.h" #include "GPUField.h" #include "core/cell/CellInterval.h" @@ -224,10 +223,6 @@ FieldIndexing<T> FieldIndexing<T>::all ( const GPUField<T> & f, const cell::Cell - -GPU_CLASS_TEMPLATE_INSTANTIATION( FieldIndexing ) - - } // namespace cuda } // namespace walberla diff --git a/src/cuda/FieldIndexing3D.h b/src/cuda/FieldIndexing3D.h index 0dbe9756..c6637ec6 100644 --- a/src/cuda/FieldIndexing3D.h +++ b/src/cuda/FieldIndexing3D.h @@ -103,3 +103,4 @@ namespace cuda { } // namespace walberla +#include "FieldIndexing.impl.h" \ No newline at end of file diff --git a/src/cuda/FieldIndexing3D.cpp b/src/cuda/FieldIndexing3D.impl.h similarity index 98% rename from src/cuda/FieldIndexing3D.cpp rename to src/cuda/FieldIndexing3D.impl.h index 5a797a23..896f7e1d 100644 --- a/src/cuda/FieldIndexing3D.cpp +++ b/src/cuda/FieldIndexing3D.impl.h @@ -20,7 +20,6 @@ //====================================================================================================================== #include "FieldIndexing3D.h" -#include "GPUTypesExplicitInstantiation.h" #include "GPUField.h" #include "core/cell/CellInterval.h" @@ -165,10 +164,6 @@ FieldIndexing3D<T> FieldIndexing3D<T>::intervalXYZ( const GPUField<T> & f, const - -GPU_CLASS_TEMPLATE_INSTANTIATION( FieldIndexing3D ) - - } // namespace cuda } // namespace walberla diff --git a/src/cuda/FieldIndexingXYZ.h b/src/cuda/FieldIndexingXYZ.h index 2c25975e..18a6e264 100644 --- a/src/cuda/FieldIndexingXYZ.h +++ b/src/cuda/FieldIndexingXYZ.h @@ -77,3 +77,4 @@ template< typename T> class GPUField; } // namespace walberla +#include "FieldIndexingXYZ.impl.h" \ No newline at end of file diff --git a/src/cuda/FieldIndexingXYZ.cpp b/src/cuda/FieldIndexingXYZ.impl.h similarity index 97% rename from src/cuda/FieldIndexingXYZ.cpp rename to src/cuda/FieldIndexingXYZ.impl.h index 8cc0bd63..c8ec561f 100644 --- a/src/cuda/FieldIndexingXYZ.cpp +++ b/src/cuda/FieldIndexingXYZ.impl.h @@ -20,7 +20,6 @@ //====================================================================================================================== #include "FieldIndexingXYZ.h" -#include "GPUTypesExplicitInstantiation.h" #include "GPUField.h" #include "core/cell/CellInterval.h" @@ -114,8 +113,6 @@ FieldIndexingXYZ<T> FieldIndexingXYZ<T>::withGhostLayerXYZ( const GPUField<T> & } -GPU_CLASS_TEMPLATE_INSTANTIATION( FieldIndexingXYZ ) - } // namespace cuda } // namespace walberla diff --git a/src/cuda/GPUField.h b/src/cuda/GPUField.h index aa059b53..437fe1c9 100755 --- a/src/cuda/GPUField.h +++ b/src/cuda/GPUField.h @@ -149,3 +149,4 @@ namespace cuda { } // namespace walberla +#include "GPUField.impl.h" \ No newline at end of file diff --git a/src/cuda/GPUField.cpp b/src/cuda/GPUField.impl.h similarity index 99% rename from src/cuda/GPUField.cpp rename to src/cuda/GPUField.impl.h index fe7c3ed9..b6fe3f8a 100644 --- a/src/cuda/GPUField.cpp +++ b/src/cuda/GPUField.impl.h @@ -21,7 +21,6 @@ #include "GPUField.h" #include "ErrorChecking.h" -#include "GPUTypesExplicitInstantiation.h" #include "core/logging/Logging.h" @@ -249,9 +248,6 @@ void GPUField<T>::swapDataPointers( GPUField<T> & other ) -GPU_CLASS_TEMPLATE_INSTANTIATION( GPUField ) - - } // namespace cuda } // namespace walberla diff --git a/src/cuda/GPUTypesExplicitInstantiation.h b/src/cuda/GPUTypesExplicitInstantiation.h deleted file mode 100644 index bdc4b584..00000000 --- a/src/cuda/GPUTypesExplicitInstantiation.h +++ /dev/null @@ -1,8 +0,0 @@ -#define GPU_CLASS_TEMPLATE_INSTANTIATION(ClassName)\ - template class ClassName< double >;\ - template class ClassName< float >;\ - template class ClassName< int >;\ - template class ClassName< uint8_t >;\ - template class ClassName< uint16_t >; - - diff --git a/src/cuda/python/Exports.h b/src/cuda/python/Exports.h index f4cb588a..a2990cfc 100644 --- a/src/cuda/python/Exports.h +++ b/src/cuda/python/Exports.h @@ -30,7 +30,7 @@ namespace walberla { namespace cuda { - template<typename DataTypes > + template<typename GpuFields > void exportModuleToPython(); diff --git a/src/cuda/python/Exports.impl.h b/src/cuda/python/Exports.impl.h index 8cd714ed..1d2e2dc2 100644 --- a/src/cuda/python/Exports.impl.h +++ b/src/cuda/python/Exports.impl.h @@ -38,6 +38,7 @@ #include <boost/type_traits/is_unsigned.hpp> #include <iostream> +#include <cuda/communication/GPUPackInfo.h> namespace walberla { namespace cuda { @@ -66,11 +67,9 @@ namespace internal { struct GpuFieldExporter { - template< typename DataType> - void operator() ( DataType ) + template< typename GpuField_T> + void operator() ( python_coupling::NonCopyableWrap<GpuField_T> ) { - typedef GPUField<DataType> GpuField_T; - using namespace boost::python; class_<GpuField_T, shared_ptr<GpuField_T>, boost::noncopyable>( "GpuField", no_init ) @@ -122,11 +121,11 @@ namespace internal { layout_( layout), type_( type ), usePitchedMem_( usePitchedMem ) , resultPointer_( resultPointer ) {} - template< typename T> - void operator() ( T ) + template< typename GpuField_T> + void operator() ( python_coupling::NonCopyableWrap<GpuField_T> ) { using namespace boost::python; - + typedef typename GpuField_T::value_type T; if( python_coupling::isCppEqualToPythonType<T>( (PyTypeObject *)type_.ptr() ) ) { *resultPointer_ = object( make_shared< GPUField<T> >( xs_,ys_,zs_, fs_, gl_, layout_, usePitchedMem_ ) ); @@ -145,7 +144,7 @@ namespace internal { shared_ptr<boost::python::object> resultPointer_; }; - template<typename DataTypes> + template<typename GpuFields> boost::python::object createPythonGpuField( boost::python::list size, boost::python::object type, uint_t ghostLayers, @@ -168,7 +167,7 @@ namespace internal { auto result = make_shared<boost::python::object>(); CreateFieldExporter exporter( xSize,ySize, zSize, fSize, ghostLayers, layout, type, usePitchedMem, result ); - boost::mpl::for_each< DataTypes > ( exporter ); + python_coupling::for_each_noncopyable_type< GpuFields >( exporter ); if ( *result == object() ) { @@ -198,9 +197,10 @@ namespace internal { gl_(gl),layout_( layout), type_( type ), usePitchedMem_(usePitchedMem), found_(false) {} - template< typename T> - void operator() ( T ) + template< typename GpuField_T> + void operator() ( python_coupling::NonCopyableWrap<GpuField_T> ) { + typedef typename GpuField_T::value_type T; if( python_coupling::isCppEqualToPythonType<T>( (PyTypeObject *)type_.ptr() ) ) { WALBERLA_ASSERT(!found_); @@ -221,7 +221,7 @@ namespace internal { bool found_; }; - template<typename DataTypes> + template<typename GpuFields> void addToStorage( const shared_ptr<StructuredBlockStorage> & blocks, const std::string & name, boost::python::object type, uint_t fs, uint_t gl, Layout layout, bool usePitchedMem ) { @@ -234,7 +234,7 @@ namespace internal { auto result = make_shared<boost::python::object>(); AddToStorageExporter exporter( blocks, name, fs, gl, layout, type, usePitchedMem ); - boost::mpl::for_each<DataTypes>( boost::ref(exporter) ); + python_coupling::for_each_noncopyable_type<GpuFields>( boost::ref(exporter) ); if ( ! exporter.successful() ) { PyErr_SetString( PyExc_ValueError, "Adding Field failed."); @@ -243,29 +243,101 @@ namespace internal { } + //=================================================================================================================== + // + // createPackInfo Export + // + //=================================================================================================================== + + template< typename GPUField_T > + boost::python::object createGPUPackInfoToObject( BlockDataID bdId, uint_t numberOfGhostLayers ) + { + using cuda::communication::GPUPackInfo; + if ( numberOfGhostLayers > 0 ) + return boost::python::object( make_shared< GPUPackInfo<GPUField_T> >( bdId, numberOfGhostLayers ) ); + else + return boost::python::object( make_shared< GPUPackInfo<GPUField_T> >( bdId ) ); + } + + FunctionExporterClass( createGPUPackInfoToObject, boost::python::object( BlockDataID, uint_t ) ); + + template< typename GpuFields> + boost::python::object createPackInfo( const shared_ptr<StructuredBlockStorage> & bs, + const std::string & blockDataName, uint_t numberOfGhostLayers ) + { + using cuda::communication::GPUPackInfo; + + auto bdId = python_coupling::blockDataIDFromString( *bs, blockDataName ); + if ( bs->begin() == bs->end() ) { + // if no blocks are on this field an arbitrary PackInfo can be returned + return createGPUPackInfoToObject< GPUField<real_t> > ( bdId, numberOfGhostLayers ); + } + + IBlock * firstBlock = & ( * bs->begin() ); + python_coupling::Dispatcher<GpuFields, Exporter_createGPUPackInfoToObject > dispatcher( firstBlock ); + return dispatcher( bdId )( bdId, numberOfGhostLayers ) ; + } + + + //=================================================================================================================== + // + // createMPIDatatypeInfo + // + //=================================================================================================================== + + + template< typename GpuField_T > + boost::python::object createMPIDatatypeInfoToObject( BlockDataID bdId, uint_t numberOfGhostLayers ) + { + using field::communication::UniformMPIDatatypeInfo; + if ( numberOfGhostLayers > 0 ) + return boost::python::object( make_shared< UniformMPIDatatypeInfo<GpuField_T> >( bdId, numberOfGhostLayers ) ); + else + return boost::python::object( make_shared< UniformMPIDatatypeInfo<GpuField_T> >( bdId ) ); + } + + FunctionExporterClass( createMPIDatatypeInfoToObject, boost::python::object( BlockDataID, uint_t ) ); + + template< typename GpuFields> + boost::python::object createMPIDatatypeInfo( const shared_ptr<StructuredBlockStorage> & bs, + const std::string & blockDataName, + uint_t numberOfGhostLayers) + { + auto bdId = python_coupling::blockDataIDFromString( *bs, blockDataName ); + if ( bs->begin() == bs->end() ) { + // if no blocks are on this field an arbitrary MPIDatatypeInfo can be returned + return createMPIDatatypeInfoToObject< GPUField<real_t> > ( bdId, numberOfGhostLayers ); + } + + IBlock * firstBlock = & ( * bs->begin() ); + python_coupling::Dispatcher<GpuFields, Exporter_createMPIDatatypeInfoToObject > dispatcher( firstBlock ); + return dispatcher( bdId )( bdId, numberOfGhostLayers ); + } + + } // namespace internal -template<typename FieldTypes > +template<typename GpuFields > void exportModuleToPython() { python_coupling::ModuleScope fieldModule( "cuda" ); using namespace boost::python; - boost::mpl::for_each<FieldTypes>( internal::GpuFieldExporter() ); + python_coupling::for_each_noncopyable_type<GpuFields>( internal::GpuFieldExporter() ); - def( "createGpuField", &internal::createPythonGpuField<FieldTypes>, ( ( arg("size") ), + def( "createGpuField", &internal::createPythonGpuField<GpuFields>, ( ( arg("size") ), ( arg("type") ), ( arg("ghostLayers") = uint_t(1) ), ( arg("layout") = field::zyxf), ( arg("usePitchedMem") = true ) ) ); - def( "addGpuFieldToStorage", &internal::addToStorage<FieldTypes>, ( ( arg("blocks") ), + def( "addGpuFieldToStorage", &internal::addToStorage<GpuFields>, ( ( arg("blocks") ), ( arg("name") ), ( arg("type") ), ( arg("fSize") = 1 ), @@ -273,6 +345,9 @@ void exportModuleToPython() ( arg("layout") = field::zyxf ), ( arg("usePitchedMem") = object() ) ) ); + def( "createMPIDatatypeInfo",&internal::createMPIDatatypeInfo<GpuFields>, ( arg("blocks"), arg("blockDataName"), arg("numberOfGhostLayers" ) =0 ) ); + def( "createPackInfo", &internal::createPackInfo<GpuFields>, ( arg("blocks"), arg("blockDataName"), arg("numberOfGhostLayers" ) =0 ) ); + } -- GitLab