From db0214ca77a2f19987d0377b1a829bac48702d29 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Tue, 30 May 2017 09:54:08 +0200 Subject: [PATCH] Python export for GPUFields and interface to pycuda --- apps/pythonmodule/CMakeLists.txt | 13 +- apps/pythonmodule/PythonModule.cpp | 10 + python/waLBerla/__init__.py | 5 +- python/waLBerla/cuda_extension.py | 15 ++ src/cuda/AddGPUFieldToStorage.impl.h | 1 + src/cuda/CMakeLists.txt | 2 +- src/cuda/GPUField.cpp | 15 +- src/cuda/GPUField.h | 20 +- src/cuda/python/Exports.h | 43 ++++ src/cuda/python/Exports.impl.h | 285 +++++++++++++++++++++++++++ src/waLBerlaDefinitions.in.h | 2 + 11 files changed, 401 insertions(+), 10 deletions(-) create mode 100644 python/waLBerla/cuda_extension.py create mode 100644 src/cuda/python/Exports.h create mode 100644 src/cuda/python/Exports.impl.h diff --git a/apps/pythonmodule/CMakeLists.txt b/apps/pythonmodule/CMakeLists.txt index b52d000d..b4d92d77 100644 --- a/apps/pythonmodule/CMakeLists.txt +++ b/apps/pythonmodule/CMakeLists.txt @@ -2,13 +2,18 @@ if ( WALBERLA_BUILD_WITH_PYTHON_MODULE ) - + + set(PYTHON_MODULE_DEPENDENCIES blockforest boundary domain_decomposition core field geometry lbm postprocessing python_coupling timeloop vtk) + if (WALBERLA_BUILD_WITH_CUDA) + set(PYTHON_MODULE_DEPENDENCIES ${PYTHON_MODULE_DEPENDENCIES} cuda) + endif() + if( WALBERLA_CXX_COMPILER_IS_MSVC ) - set ( pythonModules blockforest boundary domain_decomposition core field geometry lbm postprocessing python_coupling timeloop vtk) + set ( pythonModules ${PYTHON_MODULE_DEPENDENCIES}) elseif( APPLE ) - set ( pythonModules "-Wl,-force_load" blockforest boundary domain_decomposition core field geometry lbm postprocessing python_coupling timeloop vtk) + set ( pythonModules "-Wl,-force_load" ${PYTHON_MODULE_DEPENDENCIES}) else() - set ( pythonModules "-Wl,-whole-archive" blockforest boundary domain_decomposition core field geometry lbm postprocessing python_coupling timeloop vtk "-Wl,-no-whole-archive" ) + set ( pythonModules "-Wl,-whole-archive" ${PYTHON_MODULE_DEPENDENCIES} "-Wl,-no-whole-archive" ) endif() if( WALBERLA_BUILD_WITH_PYTHON_LBM ) diff --git a/apps/pythonmodule/PythonModule.cpp b/apps/pythonmodule/PythonModule.cpp index 9d6791c0..a648d08c 100644 --- a/apps/pythonmodule/PythonModule.cpp +++ b/apps/pythonmodule/PythonModule.cpp @@ -30,10 +30,15 @@ #include "timeloop/python/Exports.h" #include "vtk/python/Exports.h" +#ifdef WALBERLA_BUILD_WITH_CUDA +#include "cuda/python/Exports.h" +#endif + #include <boost/mpl/vector.hpp> #include <boost/mpl/insert_range.hpp> + namespace bmpl = boost::mpl; using namespace walberla; @@ -62,6 +67,7 @@ typedef bmpl::vector< Field<walberla::uint32_t,1> > FieldTypes; +typedef bmpl::vector<double, float, int, uint8_t, uint16_t>CudaFieldTypes; typedef bmpl::vector< GhostLayerField<walberla::real_t,1>, @@ -111,6 +117,10 @@ struct InitObject // Timeloop pythonManager->addExporterFunction( timeloop::exportModuleToPython ); +#ifdef WALBERLA_BUILD_WITH_CUDA + pythonManager->addExporterFunction( cuda::exportModuleToPython<CudaFieldTypes> ); +#endif + python_coupling::initWalberlaForPythonModule(); } }; diff --git a/python/waLBerla/__init__.py b/python/waLBerla/__init__.py index 622f88ab..93244d0a 100644 --- a/python/waLBerla/__init__.py +++ b/python/waLBerla/__init__.py @@ -28,7 +28,10 @@ if cpp_available: # extend the C++ module with some python functions from .field_extension import extend as extend_field extend_field( field ) - + if 'cuda' in globals(): + sys.modules[__name__ + '.cuda'] = cuda + from .cuda_extension import extend as extend_cuda + extend_cuda( cuda ) if 'geometry' in globals(): sys.modules[__name__ + '.geometry'] = geometry if 'lbm' in globals(): diff --git a/python/waLBerla/cuda_extension.py b/python/waLBerla/cuda_extension.py new file mode 100644 index 00000000..be218d11 --- /dev/null +++ b/python/waLBerla/cuda_extension.py @@ -0,0 +1,15 @@ +from pycuda.gpuarray import GPUArray +import numpy as np + +def toGpuArray(f): + """Converts a waLBerla GPUField to a pycuda GPUArray""" + if not f: + return None + dtype = np.dtype(f.dtypeStr) + strides = [dtype.itemsize*a for a in f.strides] + return GPUArray(f.sizeWithGhostLayers, dtype, gpudata=f.ptr, strides=strides) + + +def extend(cppCudaModule): + cppCudaModule.toGpuArray = toGpuArray + diff --git a/src/cuda/AddGPUFieldToStorage.impl.h b/src/cuda/AddGPUFieldToStorage.impl.h index f007181a..03b90c72 100644 --- a/src/cuda/AddGPUFieldToStorage.impl.h +++ b/src/cuda/AddGPUFieldToStorage.impl.h @@ -21,6 +21,7 @@ #pragma once +#include "cuda/FieldCopy.h" namespace walberla { namespace cuda { diff --git a/src/cuda/CMakeLists.txt b/src/cuda/CMakeLists.txt index a4c149c3..83db2151 100644 --- a/src/cuda/CMakeLists.txt +++ b/src/cuda/CMakeLists.txt @@ -4,6 +4,6 @@ # ################################################################################################### -waLBerla_add_module( DEPENDS core communication domain_decomposition field stencil BUILD_ONLY_IF_FOUND CUDA ) +waLBerla_add_module( DEPENDS core communication domain_decomposition python_coupling field stencil BUILD_ONLY_IF_FOUND CUDA ) ################################################################################################### \ No newline at end of file diff --git a/src/cuda/GPUField.cpp b/src/cuda/GPUField.cpp index 8d2b51ed..fe7c3ed9 100644 --- a/src/cuda/GPUField.cpp +++ b/src/cuda/GPUField.cpp @@ -124,12 +124,23 @@ void GPUField<T>::getSlice(stencil::Direction d, CellInterval & ci, } } +template<typename T> +inline uint_t GPUField<T>::size( uint_t coord ) const +{ + switch (coord) { + case 0: return this->xSize(); + case 1: return this->ySize(); + case 2: return this->zSize(); + case 3: return this->fSize(); + default: WALBERLA_ASSERT(false); return 0; + } +} //******************************************************************************************************************* /*! True if sizes of all dimensions match *******************************************************************************************************************/ template<typename T> -inline bool GPUField<T>::hasSameSize( const GPUField<T> & other ) const +bool GPUField<T>::hasSameSize( const GPUField<T> & other ) const { return xSize() == other.xSize() && ySize() == other.ySize() && @@ -140,7 +151,7 @@ inline bool GPUField<T>::hasSameSize( const GPUField<T> & other ) const /*! True if allocation sizes of all dimensions match *******************************************************************************************************************/ template<typename T> -inline bool GPUField<T>::hasSameAllocSize( const GPUField<T> & other ) const +bool GPUField<T>::hasSameAllocSize( const GPUField<T> & other ) const { return xAllocSize() == other.xAllocSize() && yAllocSize() == other.yAllocSize() && diff --git a/src/cuda/GPUField.h b/src/cuda/GPUField.h index 3153aba6..aa059b53 100755 --- a/src/cuda/GPUField.h +++ b/src/cuda/GPUField.h @@ -79,11 +79,27 @@ namespace cuda { inline uint_t zSize() const { return zSize_; } inline uint_t fSize() const { return fSize_; } inline uint_t size() const { return fSize() * xSize() * ySize() * zSize(); } + inline uint_t size( uint_t coord ) const; + + inline uint_t xSizeWithGhostLayer() const { return xSize() + uint_c(2)*nrOfGhostLayers_; } + inline uint_t ySizeWithGhostLayer() const { return ySize() + uint_c(2)*nrOfGhostLayers_; } + inline uint_t zSizeWithGhostLayer() const { return zSize() + uint_c(2)*nrOfGhostLayers_; } + inline uint_t sizeWithGhostLayer(uint_t i) const { return i==3 ? fSize_ : + size(i) + uint_c(2)*nrOfGhostLayers_; } cell_idx_t xOff() const { return cell_idx_c( nrOfGhostLayers_ ); } cell_idx_t yOff() const { return cell_idx_c( nrOfGhostLayers_ ); } cell_idx_t zOff() const { return cell_idx_c( nrOfGhostLayers_ ); } + cell_idx_t xStride() const { return (layout_ == fzyx) ? cell_idx_t(1) : + cell_idx_c(fAllocSize()); } + cell_idx_t yStride() const { return (layout_ == fzyx) ? cell_idx_t(xAllocSize()) : + cell_idx_c(fAllocSize() * xAllocSize()); } + cell_idx_t zStride() const { return (layout_ == fzyx) ? cell_idx_t(xAllocSize() * yAllocSize()) : + cell_idx_c(fAllocSize() * xAllocSize() * yAllocSize()); } + cell_idx_t fStride() const { return (layout_ == fzyx) ? cell_idx_t(xAllocSize() * yAllocSize() * zAllocSize()) : + cell_idx_c(1); } + uint_t xAllocSize() const; uint_t yAllocSize() const; @@ -91,8 +107,8 @@ namespace cuda { uint_t fAllocSize() const; inline uint_t allocSize() const { return fAllocSize() * xAllocSize() * yAllocSize() * zAllocSize(); } - inline bool hasSameAllocSize( const GPUField<T> & other ) const; - inline bool hasSameSize( const GPUField<T> & other ) const; + bool hasSameAllocSize( const GPUField<T> & other ) const; + bool hasSameSize( const GPUField<T> & other ) const; GPUField<T> * cloneUninitialized() const; diff --git a/src/cuda/python/Exports.h b/src/cuda/python/Exports.h new file mode 100644 index 00000000..f4cb588a --- /dev/null +++ b/src/cuda/python/Exports.h @@ -0,0 +1,43 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FieldExport.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#ifdef WALBERLA_BUILD_WITH_PYTHON + + +#include <string> + +namespace walberla { +namespace cuda { + + + template<typename DataTypes > + void exportModuleToPython(); + + +} // namespace cuda +} // namespace walberla + +#include "Exports.impl.h" + + +#endif //WALBERLA_BUILD_WITH_PYTHON diff --git a/src/cuda/python/Exports.impl.h b/src/cuda/python/Exports.impl.h new file mode 100644 index 00000000..8cd714ed --- /dev/null +++ b/src/cuda/python/Exports.impl.h @@ -0,0 +1,285 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FieldExport.cpp +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +// Do not reorder includes - the include order is important +#include "python_coupling/PythonWrapper.h" + +#include "core/logging/Logging.h" +#include "cuda/GPUField.h" +#include "cuda/communication/GPUPackInfo.h" +#include "cuda/AddGPUFieldToStorage.h" + +#include "field/communication/UniformMPIDatatypeInfo.h" + +#include "field/AddToStorage.h" +#include "field/python/FieldExport.h" + +#include "python_coupling/helper/MplHelpers.h" +#include "python_coupling/helper/BoostPythonHelpers.h" + +#include <boost/type_traits/is_unsigned.hpp> + +#include <iostream> + +namespace walberla { +namespace cuda { + + + +namespace internal { + + //=================================================================================================================== + // + // Field export + // + //=================================================================================================================== + + template<typename GpuField_T> + uint64_t gpufield_ptr(const GpuField_T & gpuField) + { + return reinterpret_cast<uint64_t>(gpuField.pitchedPtr().ptr); + } + + template<typename GpuField_T> + std::string gpufield_dtypeStr(const GpuField_T & ) + { + return std::string(field::internal::PythonFormatString<typename GpuField_T::value_type>::get()); + } + + struct GpuFieldExporter + { + template< typename DataType> + void operator() ( DataType ) + { + typedef GPUField<DataType> GpuField_T; + + using namespace boost::python; + + class_<GpuField_T, shared_ptr<GpuField_T>, boost::noncopyable>( "GpuField", no_init ) + .add_property("layout", &field::internal::field_layout < GpuField_T > ) + .add_property("size", &field::internal::field_size < GpuField_T > ) + .add_property("sizeWithGhostLayers", &field::internal::field_sizeWithGhostLayer< GpuField_T > ) + .add_property("allocSize", &field::internal::field_allocSize < GpuField_T > ) + .add_property("strides", &field::internal::field_strides < GpuField_T > ) + .add_property("offsets", &field::internal::field_offsets < GpuField_T > ) + .add_property("ptr", &gpufield_ptr < GpuField_T > ) + .add_property("dtypeStr", &gpufield_dtypeStr < GpuField_T > ) + .def("swapDataPointers", &field::internal::field_swapDataPointers < GpuField_T > ) + .add_property("nrOfGhostLayers", &GpuField_T::nrOfGhostLayers ) + .def("cloneUninitialized", &GpuField_T::cloneUninitialized, return_value_policy<manage_new_object>()) + ; + + + using field::communication::PackInfo; + using communication::GPUPackInfo; + class_< GPUPackInfo<GpuField_T>, + shared_ptr< GPUPackInfo<GpuField_T> >, + bases<walberla::communication::UniformPackInfo>, + boost::noncopyable >( "GpuFieldPackInfo", no_init ); + + + using field::communication::UniformMPIDatatypeInfo; + class_< UniformMPIDatatypeInfo<GpuField_T>, + shared_ptr< UniformMPIDatatypeInfo<GpuField_T> >, + bases<walberla::communication::UniformMPIDatatypeInfo>, + boost::noncopyable >( "GpuFieldMPIDataTypeInfo", no_init ); + + } + }; + + + //=================================================================================================================== + // + // createField + // + //=================================================================================================================== + + class CreateFieldExporter + { + public: + CreateFieldExporter( uint_t xs, uint_t ys, uint_t zs, uint_t fs, uint_t gl, + Layout layout, const boost::python::object & type, bool usePitchedMem, + const shared_ptr<boost::python::object> & resultPointer ) + : xs_( xs ), ys_(ys), zs_(zs), fs_(fs), gl_(gl), + layout_( layout), type_( type ), usePitchedMem_( usePitchedMem ) , resultPointer_( resultPointer ) + {} + + template< typename T> + void operator() ( T ) + { + using namespace boost::python; + + if( python_coupling::isCppEqualToPythonType<T>( (PyTypeObject *)type_.ptr() ) ) + { + *resultPointer_ = object( make_shared< GPUField<T> >( xs_,ys_,zs_, fs_, gl_, layout_, usePitchedMem_ ) ); + } + } + + private: + uint_t xs_; + uint_t ys_; + uint_t zs_; + uint_t fs_; + uint_t gl_; + Layout layout_; + boost::python::object type_; + bool usePitchedMem_; + shared_ptr<boost::python::object> resultPointer_; + }; + + template<typename DataTypes> + boost::python::object createPythonGpuField( boost::python::list size, + boost::python::object type, + uint_t ghostLayers, + Layout layout, + bool usePitchedMem) + { + using namespace boost::python; + uint_t xSize = extract<uint_t> ( size[0] ); + uint_t ySize = extract<uint_t> ( size[1] ); + uint_t zSize = extract<uint_t> ( size[2] ); + uint_t sizeLen = uint_c( len( size ) ); + uint_t fSize = 1; + if ( sizeLen == 4 ) + fSize = extract<uint_t> ( size[3] ); + + if ( ! PyType_Check( type.ptr() ) ) { + PyErr_SetString( PyExc_RuntimeError, "Invalid 'type' parameter"); + throw error_already_set(); + } + + auto result = make_shared<boost::python::object>(); + CreateFieldExporter exporter( xSize,ySize, zSize, fSize, ghostLayers, layout, type, usePitchedMem, result ); + boost::mpl::for_each< DataTypes > ( exporter ); + + if ( *result == object() ) + { + PyErr_SetString( PyExc_ValueError, "Cannot create field of this type"); + throw error_already_set(); + } + else { + return *result; + } + } + + + //=================================================================================================================== + // + // addToStorage + // + //=================================================================================================================== + + class AddToStorageExporter + { + public: + AddToStorageExporter( const shared_ptr<StructuredBlockStorage> & blocks, + const std::string & name, uint_t fs, uint_t gl, Layout layout, + const boost::python::object & type, + bool usePitchedMem ) + : blocks_( blocks ), name_( name ), fs_( fs ), + gl_(gl),layout_( layout), type_( type ), usePitchedMem_(usePitchedMem), found_(false) + {} + + template< typename T> + void operator() ( T ) + { + if( python_coupling::isCppEqualToPythonType<T>( (PyTypeObject *)type_.ptr() ) ) + { + WALBERLA_ASSERT(!found_); + addGPUFieldToStorage<GPUField<T> >(blocks_, name_, fs_, layout_, gl_, usePitchedMem_); + found_ = true; + } + } + + bool successful() const { return found_; } + private: + shared_ptr< StructuredBlockStorage > blocks_; + std::string name_; + uint_t fs_; + uint_t gl_; + Layout layout_; + boost::python::object type_; + bool usePitchedMem_; + bool found_; + }; + + template<typename DataTypes> + void addToStorage( const shared_ptr<StructuredBlockStorage> & blocks, const std::string & name, + boost::python::object type, uint_t fs, uint_t gl, Layout layout, bool usePitchedMem ) + { + using namespace boost::python; + + if ( ! PyType_Check( type.ptr() ) ) { + PyErr_SetString( PyExc_RuntimeError, "Invalid 'type' parameter"); + throw error_already_set(); + } + + auto result = make_shared<boost::python::object>(); + AddToStorageExporter exporter( blocks, name, fs, gl, layout, type, usePitchedMem ); + boost::mpl::for_each<DataTypes>( boost::ref(exporter) ); + + if ( ! exporter.successful() ) { + PyErr_SetString( PyExc_ValueError, "Adding Field failed."); + throw error_already_set(); + } + } + + + +} // namespace internal + + + + +template<typename FieldTypes > +void exportModuleToPython() +{ + python_coupling::ModuleScope fieldModule( "cuda" ); + + using namespace boost::python; + + boost::mpl::for_each<FieldTypes>( internal::GpuFieldExporter() ); + + def( "createGpuField", &internal::createPythonGpuField<FieldTypes>, ( ( arg("size") ), + ( arg("type") ), + ( arg("ghostLayers") = uint_t(1) ), + ( arg("layout") = field::zyxf), + ( arg("usePitchedMem") = true ) ) ); + + + def( "addGpuFieldToStorage", &internal::addToStorage<FieldTypes>, ( ( arg("blocks") ), + ( arg("name") ), + ( arg("type") ), + ( arg("fSize") = 1 ), + ( arg("ghostLayers") = uint_t(1) ), + ( arg("layout") = field::zyxf ), + ( arg("usePitchedMem") = object() ) ) ); + +} + + + + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/waLBerlaDefinitions.in.h b/src/waLBerlaDefinitions.in.h index 9964b39f..dc8a5ce6 100644 --- a/src/waLBerlaDefinitions.in.h +++ b/src/waLBerlaDefinitions.in.h @@ -29,6 +29,8 @@ #cmakedefine WALBERLA_BUILD_WITH_FFT +#cmakedefine WALBERLA_BUILD_WITH_CUDA + #cmakedefine WALBERLA_BUFFER_DEBUG #cmakedefine WALBERLA_THREAD_SAFE_LOGGING -- GitLab