//====================================================================================================================== // // This file is part of waLBerla. waLBerla is free software: you can // redistribute it and/or modify it under the terms of the GNU General Public // License as published by the Free Software Foundation, either version 3 of // the License, or (at your option) any later version. // // waLBerla is distributed in the hope that it will be useful, but WITHOUT // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. // // You should have received a copy of the GNU General Public License along // with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. // //! \file ParallelStreams.h //! \ingroup cuda //! \author Martin Bauer <martin.bauer@fau.de> // //====================================================================================================================== #pragma once #include "cuda/ErrorChecking.h" #include "cuda/CudaRAII.h" #include <vector> namespace walberla { namespace cuda { class ParallelStreams; class ParallelSection { public: ~ParallelSection(); void run( const std::function<void( cudaStream_t )> &f ); private: friend class ParallelStreams; ParallelSection( ParallelStreams *parent, cudaStream_t mainStream ); void synchronize(); cudaStream_t stream(); void next(); ParallelStreams * parent_; cudaStream_t mainStream_; cudaEvent_t startEvent_; uint_t counter_; }; //******************************************************************************************************************* /*! * Helper class to run CUDA operations on parallel streams * * This class introduces "side streams" that overlap with one "main stream". In a parallel section, multiple * kernels (or other CUDA operations) are scheduled to the streams. The first "run" is scheduled on the main stream * all subsequent operations on the side streams. The passed priority affects only the side streams. When * the parallel section goes out of scope the side streams are synchronized to the main stream via CUDA events. * * Example: * * \code * ParallelStreams streams; * { * // new scope for the parallel section * ParallelSection sec = streams.parallelSection( mainCudaStream ); * sec.run([&] ( cudaStream_t sideStream ) { * // run something on the side stream * }); * // after the parallel section goes out of scope the side streams are synchronized to the main stream * } * * \endcode * */ //******************************************************************************************************************* class ParallelStreams { public: ParallelStreams( int priority = 0 ); ParallelSection parallelSection( cudaStream_t stream ); void setStreamPriority( int priority ); private: friend class ParallelSection; void ensureSize( uint_t size ); std::vector<StreamRAII> sideStreams_; std::vector<EventRAII> events_; EventRAII mainEvent_; int streamPriority_; }; } // namespace cuda } // namespace walberla