ParallelStreams.cpp 3.39 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
//======================================================================================================================
//
//  This file is part of waLBerla. waLBerla is free software: you can
//  redistribute it and/or modify it under the terms of the GNU General Public
//  License as published by the Free Software Foundation, either version 3 of
//  the License, or (at your option) any later version.
//
//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
//  for more details.
//
//  You should have received a copy of the GNU General Public License along
//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file ParallelStreams.cpp
//! \ingroup cuda
//! \author Martin Bauer <martin.bauer@fau.de>
//
//======================================================================================================================


#include "cuda/ParallelStreams.h"

namespace walberla {
namespace cuda {


   ParallelSection::ParallelSection(ParallelStreams * parent, cudaStream_t mainStream)
     : parent_( parent ), mainStream_( mainStream ), counter_( 0 )
   {
      WALBERLA_CUDA_CHECK( cudaEventCreate(&startEvent_) );
      WALBERLA_CUDA_CHECK( cudaEventRecord( startEvent_, mainStream_ ) );
   }

   ParallelSection::~ParallelSection()
   {
      synchronize();
      WALBERLA_CUDA_CHECK( cudaEventDestroy(startEvent_) );
   }

   void ParallelSection::next()
   {
      if( counter_ > 0 ) {
         WALBERLA_CUDA_CHECK( cudaEventRecord( parent_->events_[counter_ - 1], parent_->sideStreams_[counter_ - 1] ) );
      }
      else {
         WALBERLA_CUDA_CHECK( cudaEventRecord( parent_->mainEvent_, mainStream_ ) );
      }
      ++counter_;

      parent_->ensureSize( counter_ );

      WALBERLA_CUDA_CHECK( cudaStreamWaitEvent( stream(), startEvent_, 0 ));
   }

   void ParallelSection::run(const std::function<void( cudaStream_t)> & f)
   {
      f( stream() );
      next();
   }

   void ParallelSection::synchronize()
   {
      for( uint_t i=0; i < counter_; ++i )
         for( uint_t j=0; j < counter_; ++j )
         {
            if( i == j )
               continue;

            auto & event  = i == 0 ? parent_->mainEvent_ : parent_->events_[i - 1];
            cudaStream_t stream = j == 0 ? mainStream_ : parent_->sideStreams_[j - 1];
            WALBERLA_CUDA_CHECK( cudaStreamWaitEvent( stream, event, 0 ));
         }

      WALBERLA_CUDA_CHECK( cudaEventRecord( startEvent_, mainStream_ ) );
   }

   cudaStream_t ParallelSection::stream()
   {
      return counter_ == 0 ? mainStream_ : parent_->sideStreams_[counter_ - 1];
   }



   ParallelStreams::ParallelStreams( int priority )
           : streamPriority_( priority )
   {
   }

   ParallelSection ParallelStreams::parallelSection( cudaStream_t stream ) {
      return ParallelSection(this, stream);
   }

   void ParallelStreams::ensureSize( uint_t size ) {
      for( uint_t i = sideStreams_.size(); i < size; ++i )
      {
         sideStreams_.emplace_back( StreamRAII::newPriorityStream(streamPriority_));
         events_.emplace_back( EventRAII() );
      }
   }

   void ParallelStreams::setStreamPriority( int priority )
   {
      streamPriority_ = priority;
      sideStreams_.clear();
      events_.clear();
   }



} // namespace cuda
} // namespace walberla