From ba5733cc15a297aa142739c550cc961c7f5bd9a1 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Tue, 30 May 2017 09:54:08 +0200 Subject: [PATCH] Python export for GPUFields and interface to pycuda --- apps/pythonmodule/CMakeLists.txt | 13 +- apps/pythonmodule/PythonModule.cpp | 13 + python/waLBerla/__init__.py | 5 +- python/waLBerla/cuda_extension.py | 15 + src/cuda/AddGPUFieldToStorage.impl.h | 1 + src/cuda/CMakeLists.txt | 2 +- src/cuda/FieldIndexing.h | 1 + ...FieldIndexing.cpp => FieldIndexing.impl.h} | 5 - src/cuda/FieldIndexing3D.h | 1 + ...dIndexing3D.cpp => FieldIndexing3D.impl.h} | 5 - src/cuda/FieldIndexingXYZ.h | 1 + ...ndexingXYZ.cpp => FieldIndexingXYZ.impl.h} | 3 - src/cuda/GPUField.h | 21 +- src/cuda/{GPUField.cpp => GPUField.impl.h} | 19 +- src/cuda/GPUTypesExplicitInstantiation.h | 8 - src/cuda/python/Exports.h | 43 +++ src/cuda/python/Exports.impl.h | 360 ++++++++++++++++++ src/waLBerlaDefinitions.in.h | 2 + 18 files changed, 483 insertions(+), 35 deletions(-) create mode 100644 python/waLBerla/cuda_extension.py rename src/cuda/{FieldIndexing.cpp => FieldIndexing.impl.h} (98%) rename src/cuda/{FieldIndexing3D.cpp => FieldIndexing3D.impl.h} (98%) rename src/cuda/{FieldIndexingXYZ.cpp => FieldIndexingXYZ.impl.h} (97%) rename src/cuda/{GPUField.cpp => GPUField.impl.h} (95%) delete mode 100644 src/cuda/GPUTypesExplicitInstantiation.h create mode 100644 src/cuda/python/Exports.h create mode 100644 src/cuda/python/Exports.impl.h diff --git a/apps/pythonmodule/CMakeLists.txt b/apps/pythonmodule/CMakeLists.txt index b52d000da..b4d92d772 100644 --- a/apps/pythonmodule/CMakeLists.txt +++ b/apps/pythonmodule/CMakeLists.txt @@ -2,13 +2,18 @@ if ( WALBERLA_BUILD_WITH_PYTHON_MODULE ) - + + set(PYTHON_MODULE_DEPENDENCIES blockforest boundary domain_decomposition core field geometry lbm postprocessing python_coupling timeloop vtk) + if (WALBERLA_BUILD_WITH_CUDA) + set(PYTHON_MODULE_DEPENDENCIES ${PYTHON_MODULE_DEPENDENCIES} cuda) + endif() + if( WALBERLA_CXX_COMPILER_IS_MSVC ) - set ( pythonModules blockforest boundary domain_decomposition core field geometry lbm postprocessing python_coupling timeloop vtk) + set ( pythonModules ${PYTHON_MODULE_DEPENDENCIES}) elseif( APPLE ) - set ( pythonModules "-Wl,-force_load" blockforest boundary domain_decomposition core field geometry lbm postprocessing python_coupling timeloop vtk) + set ( pythonModules "-Wl,-force_load" ${PYTHON_MODULE_DEPENDENCIES}) else() - set ( pythonModules "-Wl,-whole-archive" blockforest boundary domain_decomposition core field geometry lbm postprocessing python_coupling timeloop vtk "-Wl,-no-whole-archive" ) + set ( pythonModules "-Wl,-whole-archive" ${PYTHON_MODULE_DEPENDENCIES} "-Wl,-no-whole-archive" ) endif() if( WALBERLA_BUILD_WITH_PYTHON_LBM ) diff --git a/apps/pythonmodule/PythonModule.cpp b/apps/pythonmodule/PythonModule.cpp index 9d6791c0a..82fab2e04 100644 --- a/apps/pythonmodule/PythonModule.cpp +++ b/apps/pythonmodule/PythonModule.cpp @@ -30,10 +30,15 @@ #include "timeloop/python/Exports.h" #include "vtk/python/Exports.h" +#ifdef WALBERLA_BUILD_WITH_CUDA +#include "cuda/python/Exports.h" +#endif + #include <boost/mpl/vector.hpp> #include <boost/mpl/insert_range.hpp> + namespace bmpl = boost::mpl; using namespace walberla; @@ -111,6 +116,14 @@ struct InitObject // Timeloop pythonManager->addExporterFunction( timeloop::exportModuleToPython ); +#ifdef WALBERLA_BUILD_WITH_CUDA + using walberla::cuda::GPUField; + typedef bmpl::vector<GPUField<double>, GPUField<float>, GPUField<int>, GPUField<uint8_t>, GPUField<uint16_t> > GPUFields; + + pythonManager->addExporterFunction( cuda::exportModuleToPython<GPUFields> ); + pythonManager->addBlockDataConversion<GPUFields>(); +#endif + python_coupling::initWalberlaForPythonModule(); } }; diff --git a/python/waLBerla/__init__.py b/python/waLBerla/__init__.py index 622f88ab7..93244d0aa 100644 --- a/python/waLBerla/__init__.py +++ b/python/waLBerla/__init__.py @@ -28,7 +28,10 @@ if cpp_available: # extend the C++ module with some python functions from .field_extension import extend as extend_field extend_field( field ) - + if 'cuda' in globals(): + sys.modules[__name__ + '.cuda'] = cuda + from .cuda_extension import extend as extend_cuda + extend_cuda( cuda ) if 'geometry' in globals(): sys.modules[__name__ + '.geometry'] = geometry if 'lbm' in globals(): diff --git a/python/waLBerla/cuda_extension.py b/python/waLBerla/cuda_extension.py new file mode 100644 index 000000000..be218d116 --- /dev/null +++ b/python/waLBerla/cuda_extension.py @@ -0,0 +1,15 @@ +from pycuda.gpuarray import GPUArray +import numpy as np + +def toGpuArray(f): + """Converts a waLBerla GPUField to a pycuda GPUArray""" + if not f: + return None + dtype = np.dtype(f.dtypeStr) + strides = [dtype.itemsize*a for a in f.strides] + return GPUArray(f.sizeWithGhostLayers, dtype, gpudata=f.ptr, strides=strides) + + +def extend(cppCudaModule): + cppCudaModule.toGpuArray = toGpuArray + diff --git a/src/cuda/AddGPUFieldToStorage.impl.h b/src/cuda/AddGPUFieldToStorage.impl.h index f007181a6..03b90c728 100644 --- a/src/cuda/AddGPUFieldToStorage.impl.h +++ b/src/cuda/AddGPUFieldToStorage.impl.h @@ -21,6 +21,7 @@ #pragma once +#include "cuda/FieldCopy.h" namespace walberla { namespace cuda { diff --git a/src/cuda/CMakeLists.txt b/src/cuda/CMakeLists.txt index a4c149c36..83db21519 100644 --- a/src/cuda/CMakeLists.txt +++ b/src/cuda/CMakeLists.txt @@ -4,6 +4,6 @@ # ################################################################################################### -waLBerla_add_module( DEPENDS core communication domain_decomposition field stencil BUILD_ONLY_IF_FOUND CUDA ) +waLBerla_add_module( DEPENDS core communication domain_decomposition python_coupling field stencil BUILD_ONLY_IF_FOUND CUDA ) ################################################################################################### \ No newline at end of file diff --git a/src/cuda/FieldIndexing.h b/src/cuda/FieldIndexing.h index 653a5de27..7ed089f4b 100644 --- a/src/cuda/FieldIndexing.h +++ b/src/cuda/FieldIndexing.h @@ -91,4 +91,5 @@ namespace cuda { } // namespace cuda } // namespace walberla +#include "FieldIndexing.impl.h" diff --git a/src/cuda/FieldIndexing.cpp b/src/cuda/FieldIndexing.impl.h similarity index 98% rename from src/cuda/FieldIndexing.cpp rename to src/cuda/FieldIndexing.impl.h index 413bbe1aa..c4837d3c1 100644 --- a/src/cuda/FieldIndexing.cpp +++ b/src/cuda/FieldIndexing.impl.h @@ -20,7 +20,6 @@ //====================================================================================================================== #include "FieldIndexing.h" -#include "GPUTypesExplicitInstantiation.h" #include "GPUField.h" #include "core/cell/CellInterval.h" @@ -224,10 +223,6 @@ FieldIndexing<T> FieldIndexing<T>::all ( const GPUField<T> & f, const cell::Cell - -GPU_CLASS_TEMPLATE_INSTANTIATION( FieldIndexing ) - - } // namespace cuda } // namespace walberla diff --git a/src/cuda/FieldIndexing3D.h b/src/cuda/FieldIndexing3D.h index 0dbe97566..c6637ec6c 100644 --- a/src/cuda/FieldIndexing3D.h +++ b/src/cuda/FieldIndexing3D.h @@ -103,3 +103,4 @@ namespace cuda { } // namespace walberla +#include "FieldIndexing.impl.h" \ No newline at end of file diff --git a/src/cuda/FieldIndexing3D.cpp b/src/cuda/FieldIndexing3D.impl.h similarity index 98% rename from src/cuda/FieldIndexing3D.cpp rename to src/cuda/FieldIndexing3D.impl.h index 5a797a23d..896f7e1d2 100644 --- a/src/cuda/FieldIndexing3D.cpp +++ b/src/cuda/FieldIndexing3D.impl.h @@ -20,7 +20,6 @@ //====================================================================================================================== #include "FieldIndexing3D.h" -#include "GPUTypesExplicitInstantiation.h" #include "GPUField.h" #include "core/cell/CellInterval.h" @@ -165,10 +164,6 @@ FieldIndexing3D<T> FieldIndexing3D<T>::intervalXYZ( const GPUField<T> & f, const - -GPU_CLASS_TEMPLATE_INSTANTIATION( FieldIndexing3D ) - - } // namespace cuda } // namespace walberla diff --git a/src/cuda/FieldIndexingXYZ.h b/src/cuda/FieldIndexingXYZ.h index 2c25975ea..18a6e2645 100644 --- a/src/cuda/FieldIndexingXYZ.h +++ b/src/cuda/FieldIndexingXYZ.h @@ -77,3 +77,4 @@ template< typename T> class GPUField; } // namespace walberla +#include "FieldIndexingXYZ.impl.h" \ No newline at end of file diff --git a/src/cuda/FieldIndexingXYZ.cpp b/src/cuda/FieldIndexingXYZ.impl.h similarity index 97% rename from src/cuda/FieldIndexingXYZ.cpp rename to src/cuda/FieldIndexingXYZ.impl.h index 8cc0bd638..c8ec561f9 100644 --- a/src/cuda/FieldIndexingXYZ.cpp +++ b/src/cuda/FieldIndexingXYZ.impl.h @@ -20,7 +20,6 @@ //====================================================================================================================== #include "FieldIndexingXYZ.h" -#include "GPUTypesExplicitInstantiation.h" #include "GPUField.h" #include "core/cell/CellInterval.h" @@ -114,8 +113,6 @@ FieldIndexingXYZ<T> FieldIndexingXYZ<T>::withGhostLayerXYZ( const GPUField<T> & } -GPU_CLASS_TEMPLATE_INSTANTIATION( FieldIndexingXYZ ) - } // namespace cuda } // namespace walberla diff --git a/src/cuda/GPUField.h b/src/cuda/GPUField.h index 3153aba60..437fe1c95 100755 --- a/src/cuda/GPUField.h +++ b/src/cuda/GPUField.h @@ -79,11 +79,27 @@ namespace cuda { inline uint_t zSize() const { return zSize_; } inline uint_t fSize() const { return fSize_; } inline uint_t size() const { return fSize() * xSize() * ySize() * zSize(); } + inline uint_t size( uint_t coord ) const; + + inline uint_t xSizeWithGhostLayer() const { return xSize() + uint_c(2)*nrOfGhostLayers_; } + inline uint_t ySizeWithGhostLayer() const { return ySize() + uint_c(2)*nrOfGhostLayers_; } + inline uint_t zSizeWithGhostLayer() const { return zSize() + uint_c(2)*nrOfGhostLayers_; } + inline uint_t sizeWithGhostLayer(uint_t i) const { return i==3 ? fSize_ : + size(i) + uint_c(2)*nrOfGhostLayers_; } cell_idx_t xOff() const { return cell_idx_c( nrOfGhostLayers_ ); } cell_idx_t yOff() const { return cell_idx_c( nrOfGhostLayers_ ); } cell_idx_t zOff() const { return cell_idx_c( nrOfGhostLayers_ ); } + cell_idx_t xStride() const { return (layout_ == fzyx) ? cell_idx_t(1) : + cell_idx_c(fAllocSize()); } + cell_idx_t yStride() const { return (layout_ == fzyx) ? cell_idx_t(xAllocSize()) : + cell_idx_c(fAllocSize() * xAllocSize()); } + cell_idx_t zStride() const { return (layout_ == fzyx) ? cell_idx_t(xAllocSize() * yAllocSize()) : + cell_idx_c(fAllocSize() * xAllocSize() * yAllocSize()); } + cell_idx_t fStride() const { return (layout_ == fzyx) ? cell_idx_t(xAllocSize() * yAllocSize() * zAllocSize()) : + cell_idx_c(1); } + uint_t xAllocSize() const; uint_t yAllocSize() const; @@ -91,8 +107,8 @@ namespace cuda { uint_t fAllocSize() const; inline uint_t allocSize() const { return fAllocSize() * xAllocSize() * yAllocSize() * zAllocSize(); } - inline bool hasSameAllocSize( const GPUField<T> & other ) const; - inline bool hasSameSize( const GPUField<T> & other ) const; + bool hasSameAllocSize( const GPUField<T> & other ) const; + bool hasSameSize( const GPUField<T> & other ) const; GPUField<T> * cloneUninitialized() const; @@ -133,3 +149,4 @@ namespace cuda { } // namespace walberla +#include "GPUField.impl.h" \ No newline at end of file diff --git a/src/cuda/GPUField.cpp b/src/cuda/GPUField.impl.h similarity index 95% rename from src/cuda/GPUField.cpp rename to src/cuda/GPUField.impl.h index 8d2b51ed4..b6fe3f8a8 100644 --- a/src/cuda/GPUField.cpp +++ b/src/cuda/GPUField.impl.h @@ -21,7 +21,6 @@ #include "GPUField.h" #include "ErrorChecking.h" -#include "GPUTypesExplicitInstantiation.h" #include "core/logging/Logging.h" @@ -124,12 +123,23 @@ void GPUField<T>::getSlice(stencil::Direction d, CellInterval & ci, } } +template<typename T> +inline uint_t GPUField<T>::size( uint_t coord ) const +{ + switch (coord) { + case 0: return this->xSize(); + case 1: return this->ySize(); + case 2: return this->zSize(); + case 3: return this->fSize(); + default: WALBERLA_ASSERT(false); return 0; + } +} //******************************************************************************************************************* /*! True if sizes of all dimensions match *******************************************************************************************************************/ template<typename T> -inline bool GPUField<T>::hasSameSize( const GPUField<T> & other ) const +bool GPUField<T>::hasSameSize( const GPUField<T> & other ) const { return xSize() == other.xSize() && ySize() == other.ySize() && @@ -140,7 +150,7 @@ inline bool GPUField<T>::hasSameSize( const GPUField<T> & other ) const /*! True if allocation sizes of all dimensions match *******************************************************************************************************************/ template<typename T> -inline bool GPUField<T>::hasSameAllocSize( const GPUField<T> & other ) const +bool GPUField<T>::hasSameAllocSize( const GPUField<T> & other ) const { return xAllocSize() == other.xAllocSize() && yAllocSize() == other.yAllocSize() && @@ -238,9 +248,6 @@ void GPUField<T>::swapDataPointers( GPUField<T> & other ) -GPU_CLASS_TEMPLATE_INSTANTIATION( GPUField ) - - } // namespace cuda } // namespace walberla diff --git a/src/cuda/GPUTypesExplicitInstantiation.h b/src/cuda/GPUTypesExplicitInstantiation.h deleted file mode 100644 index bdc4b5846..000000000 --- a/src/cuda/GPUTypesExplicitInstantiation.h +++ /dev/null @@ -1,8 +0,0 @@ -#define GPU_CLASS_TEMPLATE_INSTANTIATION(ClassName)\ - template class ClassName< double >;\ - template class ClassName< float >;\ - template class ClassName< int >;\ - template class ClassName< uint8_t >;\ - template class ClassName< uint16_t >; - - diff --git a/src/cuda/python/Exports.h b/src/cuda/python/Exports.h new file mode 100644 index 000000000..a2990cfc2 --- /dev/null +++ b/src/cuda/python/Exports.h @@ -0,0 +1,43 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FieldExport.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#ifdef WALBERLA_BUILD_WITH_PYTHON + + +#include <string> + +namespace walberla { +namespace cuda { + + + template<typename GpuFields > + void exportModuleToPython(); + + +} // namespace cuda +} // namespace walberla + +#include "Exports.impl.h" + + +#endif //WALBERLA_BUILD_WITH_PYTHON diff --git a/src/cuda/python/Exports.impl.h b/src/cuda/python/Exports.impl.h new file mode 100644 index 000000000..1d2e2dc23 --- /dev/null +++ b/src/cuda/python/Exports.impl.h @@ -0,0 +1,360 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FieldExport.cpp +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +// Do not reorder includes - the include order is important +#include "python_coupling/PythonWrapper.h" + +#include "core/logging/Logging.h" +#include "cuda/GPUField.h" +#include "cuda/communication/GPUPackInfo.h" +#include "cuda/AddGPUFieldToStorage.h" + +#include "field/communication/UniformMPIDatatypeInfo.h" + +#include "field/AddToStorage.h" +#include "field/python/FieldExport.h" + +#include "python_coupling/helper/MplHelpers.h" +#include "python_coupling/helper/BoostPythonHelpers.h" + +#include <boost/type_traits/is_unsigned.hpp> + +#include <iostream> +#include <cuda/communication/GPUPackInfo.h> + +namespace walberla { +namespace cuda { + + + +namespace internal { + + //=================================================================================================================== + // + // Field export + // + //=================================================================================================================== + + template<typename GpuField_T> + uint64_t gpufield_ptr(const GpuField_T & gpuField) + { + return reinterpret_cast<uint64_t>(gpuField.pitchedPtr().ptr); + } + + template<typename GpuField_T> + std::string gpufield_dtypeStr(const GpuField_T & ) + { + return std::string(field::internal::PythonFormatString<typename GpuField_T::value_type>::get()); + } + + struct GpuFieldExporter + { + template< typename GpuField_T> + void operator() ( python_coupling::NonCopyableWrap<GpuField_T> ) + { + using namespace boost::python; + + class_<GpuField_T, shared_ptr<GpuField_T>, boost::noncopyable>( "GpuField", no_init ) + .add_property("layout", &field::internal::field_layout < GpuField_T > ) + .add_property("size", &field::internal::field_size < GpuField_T > ) + .add_property("sizeWithGhostLayers", &field::internal::field_sizeWithGhostLayer< GpuField_T > ) + .add_property("allocSize", &field::internal::field_allocSize < GpuField_T > ) + .add_property("strides", &field::internal::field_strides < GpuField_T > ) + .add_property("offsets", &field::internal::field_offsets < GpuField_T > ) + .add_property("ptr", &gpufield_ptr < GpuField_T > ) + .add_property("dtypeStr", &gpufield_dtypeStr < GpuField_T > ) + .def("swapDataPointers", &field::internal::field_swapDataPointers < GpuField_T > ) + .add_property("nrOfGhostLayers", &GpuField_T::nrOfGhostLayers ) + .def("cloneUninitialized", &GpuField_T::cloneUninitialized, return_value_policy<manage_new_object>()) + ; + + + using field::communication::PackInfo; + using communication::GPUPackInfo; + class_< GPUPackInfo<GpuField_T>, + shared_ptr< GPUPackInfo<GpuField_T> >, + bases<walberla::communication::UniformPackInfo>, + boost::noncopyable >( "GpuFieldPackInfo", no_init ); + + + using field::communication::UniformMPIDatatypeInfo; + class_< UniformMPIDatatypeInfo<GpuField_T>, + shared_ptr< UniformMPIDatatypeInfo<GpuField_T> >, + bases<walberla::communication::UniformMPIDatatypeInfo>, + boost::noncopyable >( "GpuFieldMPIDataTypeInfo", no_init ); + + } + }; + + + //=================================================================================================================== + // + // createField + // + //=================================================================================================================== + + class CreateFieldExporter + { + public: + CreateFieldExporter( uint_t xs, uint_t ys, uint_t zs, uint_t fs, uint_t gl, + Layout layout, const boost::python::object & type, bool usePitchedMem, + const shared_ptr<boost::python::object> & resultPointer ) + : xs_( xs ), ys_(ys), zs_(zs), fs_(fs), gl_(gl), + layout_( layout), type_( type ), usePitchedMem_( usePitchedMem ) , resultPointer_( resultPointer ) + {} + + template< typename GpuField_T> + void operator() ( python_coupling::NonCopyableWrap<GpuField_T> ) + { + using namespace boost::python; + typedef typename GpuField_T::value_type T; + if( python_coupling::isCppEqualToPythonType<T>( (PyTypeObject *)type_.ptr() ) ) + { + *resultPointer_ = object( make_shared< GPUField<T> >( xs_,ys_,zs_, fs_, gl_, layout_, usePitchedMem_ ) ); + } + } + + private: + uint_t xs_; + uint_t ys_; + uint_t zs_; + uint_t fs_; + uint_t gl_; + Layout layout_; + boost::python::object type_; + bool usePitchedMem_; + shared_ptr<boost::python::object> resultPointer_; + }; + + template<typename GpuFields> + boost::python::object createPythonGpuField( boost::python::list size, + boost::python::object type, + uint_t ghostLayers, + Layout layout, + bool usePitchedMem) + { + using namespace boost::python; + uint_t xSize = extract<uint_t> ( size[0] ); + uint_t ySize = extract<uint_t> ( size[1] ); + uint_t zSize = extract<uint_t> ( size[2] ); + uint_t sizeLen = uint_c( len( size ) ); + uint_t fSize = 1; + if ( sizeLen == 4 ) + fSize = extract<uint_t> ( size[3] ); + + if ( ! PyType_Check( type.ptr() ) ) { + PyErr_SetString( PyExc_RuntimeError, "Invalid 'type' parameter"); + throw error_already_set(); + } + + auto result = make_shared<boost::python::object>(); + CreateFieldExporter exporter( xSize,ySize, zSize, fSize, ghostLayers, layout, type, usePitchedMem, result ); + python_coupling::for_each_noncopyable_type< GpuFields >( exporter ); + + if ( *result == object() ) + { + PyErr_SetString( PyExc_ValueError, "Cannot create field of this type"); + throw error_already_set(); + } + else { + return *result; + } + } + + + //=================================================================================================================== + // + // addToStorage + // + //=================================================================================================================== + + class AddToStorageExporter + { + public: + AddToStorageExporter( const shared_ptr<StructuredBlockStorage> & blocks, + const std::string & name, uint_t fs, uint_t gl, Layout layout, + const boost::python::object & type, + bool usePitchedMem ) + : blocks_( blocks ), name_( name ), fs_( fs ), + gl_(gl),layout_( layout), type_( type ), usePitchedMem_(usePitchedMem), found_(false) + {} + + template< typename GpuField_T> + void operator() ( python_coupling::NonCopyableWrap<GpuField_T> ) + { + typedef typename GpuField_T::value_type T; + if( python_coupling::isCppEqualToPythonType<T>( (PyTypeObject *)type_.ptr() ) ) + { + WALBERLA_ASSERT(!found_); + addGPUFieldToStorage<GPUField<T> >(blocks_, name_, fs_, layout_, gl_, usePitchedMem_); + found_ = true; + } + } + + bool successful() const { return found_; } + private: + shared_ptr< StructuredBlockStorage > blocks_; + std::string name_; + uint_t fs_; + uint_t gl_; + Layout layout_; + boost::python::object type_; + bool usePitchedMem_; + bool found_; + }; + + template<typename GpuFields> + void addToStorage( const shared_ptr<StructuredBlockStorage> & blocks, const std::string & name, + boost::python::object type, uint_t fs, uint_t gl, Layout layout, bool usePitchedMem ) + { + using namespace boost::python; + + if ( ! PyType_Check( type.ptr() ) ) { + PyErr_SetString( PyExc_RuntimeError, "Invalid 'type' parameter"); + throw error_already_set(); + } + + auto result = make_shared<boost::python::object>(); + AddToStorageExporter exporter( blocks, name, fs, gl, layout, type, usePitchedMem ); + python_coupling::for_each_noncopyable_type<GpuFields>( boost::ref(exporter) ); + + if ( ! exporter.successful() ) { + PyErr_SetString( PyExc_ValueError, "Adding Field failed."); + throw error_already_set(); + } + } + + + //=================================================================================================================== + // + // createPackInfo Export + // + //=================================================================================================================== + + template< typename GPUField_T > + boost::python::object createGPUPackInfoToObject( BlockDataID bdId, uint_t numberOfGhostLayers ) + { + using cuda::communication::GPUPackInfo; + if ( numberOfGhostLayers > 0 ) + return boost::python::object( make_shared< GPUPackInfo<GPUField_T> >( bdId, numberOfGhostLayers ) ); + else + return boost::python::object( make_shared< GPUPackInfo<GPUField_T> >( bdId ) ); + } + + FunctionExporterClass( createGPUPackInfoToObject, boost::python::object( BlockDataID, uint_t ) ); + + template< typename GpuFields> + boost::python::object createPackInfo( const shared_ptr<StructuredBlockStorage> & bs, + const std::string & blockDataName, uint_t numberOfGhostLayers ) + { + using cuda::communication::GPUPackInfo; + + auto bdId = python_coupling::blockDataIDFromString( *bs, blockDataName ); + if ( bs->begin() == bs->end() ) { + // if no blocks are on this field an arbitrary PackInfo can be returned + return createGPUPackInfoToObject< GPUField<real_t> > ( bdId, numberOfGhostLayers ); + } + + IBlock * firstBlock = & ( * bs->begin() ); + python_coupling::Dispatcher<GpuFields, Exporter_createGPUPackInfoToObject > dispatcher( firstBlock ); + return dispatcher( bdId )( bdId, numberOfGhostLayers ) ; + } + + + //=================================================================================================================== + // + // createMPIDatatypeInfo + // + //=================================================================================================================== + + + template< typename GpuField_T > + boost::python::object createMPIDatatypeInfoToObject( BlockDataID bdId, uint_t numberOfGhostLayers ) + { + using field::communication::UniformMPIDatatypeInfo; + if ( numberOfGhostLayers > 0 ) + return boost::python::object( make_shared< UniformMPIDatatypeInfo<GpuField_T> >( bdId, numberOfGhostLayers ) ); + else + return boost::python::object( make_shared< UniformMPIDatatypeInfo<GpuField_T> >( bdId ) ); + } + + FunctionExporterClass( createMPIDatatypeInfoToObject, boost::python::object( BlockDataID, uint_t ) ); + + template< typename GpuFields> + boost::python::object createMPIDatatypeInfo( const shared_ptr<StructuredBlockStorage> & bs, + const std::string & blockDataName, + uint_t numberOfGhostLayers) + { + auto bdId = python_coupling::blockDataIDFromString( *bs, blockDataName ); + if ( bs->begin() == bs->end() ) { + // if no blocks are on this field an arbitrary MPIDatatypeInfo can be returned + return createMPIDatatypeInfoToObject< GPUField<real_t> > ( bdId, numberOfGhostLayers ); + } + + IBlock * firstBlock = & ( * bs->begin() ); + python_coupling::Dispatcher<GpuFields, Exporter_createMPIDatatypeInfoToObject > dispatcher( firstBlock ); + return dispatcher( bdId )( bdId, numberOfGhostLayers ); + } + + + +} // namespace internal + + + + +template<typename GpuFields > +void exportModuleToPython() +{ + python_coupling::ModuleScope fieldModule( "cuda" ); + + using namespace boost::python; + + python_coupling::for_each_noncopyable_type<GpuFields>( internal::GpuFieldExporter() ); + + def( "createGpuField", &internal::createPythonGpuField<GpuFields>, ( ( arg("size") ), + ( arg("type") ), + ( arg("ghostLayers") = uint_t(1) ), + ( arg("layout") = field::zyxf), + ( arg("usePitchedMem") = true ) ) ); + + + def( "addGpuFieldToStorage", &internal::addToStorage<GpuFields>, ( ( arg("blocks") ), + ( arg("name") ), + ( arg("type") ), + ( arg("fSize") = 1 ), + ( arg("ghostLayers") = uint_t(1) ), + ( arg("layout") = field::zyxf ), + ( arg("usePitchedMem") = object() ) ) ); + + def( "createMPIDatatypeInfo",&internal::createMPIDatatypeInfo<GpuFields>, ( arg("blocks"), arg("blockDataName"), arg("numberOfGhostLayers" ) =0 ) ); + def( "createPackInfo", &internal::createPackInfo<GpuFields>, ( arg("blocks"), arg("blockDataName"), arg("numberOfGhostLayers" ) =0 ) ); + +} + + + + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/waLBerlaDefinitions.in.h b/src/waLBerlaDefinitions.in.h index ed587958e..ce7d276a1 100644 --- a/src/waLBerlaDefinitions.in.h +++ b/src/waLBerlaDefinitions.in.h @@ -29,6 +29,8 @@ #cmakedefine WALBERLA_BUILD_WITH_OPENMESH +#cmakedefine WALBERLA_BUILD_WITH_CUDA + #cmakedefine WALBERLA_BUFFER_DEBUG #cmakedefine WALBERLA_THREAD_SAFE_LOGGING -- GitLab