ParallelStreams.cpp 3.39 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
//======================================================================================================================
//
//  This file is part of waLBerla. waLBerla is free software: you can
//  redistribute it and/or modify it under the terms of the GNU General Public
//  License as published by the Free Software Foundation, either version 3 of
//  the License, or (at your option) any later version.
//
//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
//  for more details.
//
//  You should have received a copy of the GNU General Public License along
//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file ParallelStreams.cpp
//! \ingroup cuda
//! \author Martin Bauer <martin.bauer@fau.de>
//
//======================================================================================================================


#include "cuda/ParallelStreams.h"

namespace walberla {
namespace cuda {


   ParallelSection::ParallelSection(ParallelStreams * parent, cudaStream_t mainStream)
     : parent_( parent ), mainStream_( mainStream ), counter_( 0 )
   {
      WALBERLA_CUDA_CHECK( cudaEventCreate(&startEvent_) );
      WALBERLA_CUDA_CHECK( cudaEventRecord( startEvent_, mainStream_ ) );
   }

   ParallelSection::~ParallelSection()
   {
      synchronize();
      WALBERLA_CUDA_CHECK( cudaEventDestroy(startEvent_) );
   }

   void ParallelSection::next()
   {
      if( counter_ > 0 ) {
         WALBERLA_CUDA_CHECK( cudaEventRecord( parent_->events_[counter_ - 1], parent_->sideStreams_[counter_ - 1] ) );
      }
      else {
         WALBERLA_CUDA_CHECK( cudaEventRecord( parent_->mainEvent_, mainStream_ ) );
      }
      ++counter_;

      parent_->ensureSize( counter_ );

      WALBERLA_CUDA_CHECK( cudaStreamWaitEvent( stream(), startEvent_, 0 ));
   }

   void ParallelSection::run(const std::function<void( cudaStream_t)> & f)
   {
      f( stream() );
      next();
   }

   void ParallelSection::synchronize()
   {
      for( uint_t i=0; i < counter_; ++i )
         for( uint_t j=0; j < counter_; ++j )
         {
            if( i == j )
               continue;

            auto & event  = i == 0 ? parent_->mainEvent_ : parent_->events_[i - 1];
            cudaStream_t stream = j == 0 ? mainStream_ : parent_->sideStreams_[j - 1];
            WALBERLA_CUDA_CHECK( cudaStreamWaitEvent( stream, event, 0 ));
         }

      WALBERLA_CUDA_CHECK( cudaEventRecord( startEvent_, mainStream_ ) );
   }

   cudaStream_t ParallelSection::stream()
   {
      return counter_ == 0 ? mainStream_ : parent_->sideStreams_[counter_ - 1];
   }



   ParallelStreams::ParallelStreams( int priority )
           : streamPriority_( priority )
   {
   }

   ParallelSection ParallelStreams::parallelSection( cudaStream_t stream ) {
      return ParallelSection(this, stream);
   }

   void ParallelStreams::ensureSize( uint_t size ) {
      for( uint_t i = sideStreams_.size(); i < size; ++i )
      {
         sideStreams_.emplace_back( StreamRAII::newPriorityStream(streamPriority_));
         events_.emplace_back( EventRAII() );
      }
   }

   void ParallelStreams::setStreamPriority( int priority )
   {
      streamPriority_ = priority;
      sideStreams_.clear();
      events_.clear();
   }



} // namespace cuda
} // namespace walberla