GinkgoBlockSolver.hpp 13.8 KB
Newer Older
1
2
#pragma once

Marcel Koch's avatar
Marcel Koch committed
3
#include <ginkgo/core/base/mpi.hpp>
4
5
6
#include <ginkgo/core/log/convergence.hpp>
#include <ginkgo/core/log/convergence_stream.hpp>
#include <ginkgo/core/matrix/block_matrix.hpp>
Marcel Koch's avatar
Marcel Koch committed
7
#include <ginkgo/core/matrix/identity.hpp>
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#include <ginkgo/core/matrix/zero.hpp>
#include <ginkgo/core/preconditioner/jacobi.hpp>
#include <ginkgo/core/solver/bicgstab.hpp>
#include <ginkgo/core/solver/gmres.hpp>
#include <ginkgo/core/stop/iteration.hpp>
#include <ginkgo/core/stop/residual_norm.hpp>
#include <hyteg/composites/petsc/P2P1TaylorHoodPetsc.hpp>

#include "hyteg/ginkgo/GinkgoCommunication.hpp"
#include "hyteg/ginkgo/GinkgoDirichletHandling.hpp"
#include "hyteg/ginkgo/GinkgoSparseMatrixProxy.hpp"
#include "hyteg/ginkgo/GinkgoVectorProxy.hpp"
#include "hyteg/solvers/Solver.hpp"

namespace hyteg {

void gatherIndices( std::vector< int32_t >&                  velocityIndices,
                    std::vector< int32_t >&                  pressureIndices,
                    const PrimitiveStorage&                  storage,
                    uint_t                                   level,
                    const P2P1TaylorHoodFunction< int32_t >& numerator )
{
   for ( auto dof :
         FunctionIterator< vertexdof::VertexDoFFunction< int32_t > >( numerator.uvw[0].getVertexDoFFunction(), level ) )
   {
      velocityIndices.push_back( dof.value() );
   }
   for ( auto dof : FunctionIterator< EdgeDoFFunction< int32_t > >( numerator.uvw[0].getEdgeDoFFunction(), level ) )
   {
      velocityIndices.push_back( dof.value() );
   }
   for ( auto dof :
         FunctionIterator< vertexdof::VertexDoFFunction< int32_t > >( numerator.uvw[1].getVertexDoFFunction(), level ) )
   {
      velocityIndices.push_back( dof.value() );
   }
   for ( auto dof : FunctionIterator< EdgeDoFFunction< int32_t > >( numerator.uvw[1].getEdgeDoFFunction(), level ) )
   {
      velocityIndices.push_back( dof.value() );
   }
   if ( storage.hasGlobalCells() )
   {
      for ( auto dof :
            FunctionIterator< vertexdof::VertexDoFFunction< int32_t > >( numerator.uvw[2].getVertexDoFFunction(), level ) )
      {
         velocityIndices.push_back( dof.value() );
      }
      for ( auto dof : FunctionIterator< EdgeDoFFunction< int32_t > >( numerator.uvw[2].getEdgeDoFFunction(), level ) )
      {
         velocityIndices.push_back( dof.value() );
      }
   }
   for ( auto dof : FunctionIterator< vertexdof::VertexDoFFunction< int32_t > >( numerator.p, level ) )
   {
      pressureIndices.push_back( dof.value() );
   }
}

template < typename OperatorType >
class GinkgoBlockSolver : public Solver< OperatorType >
{
   using BlockPreconditioner_T = typename OperatorType::BlockPreconditioner_T;

 public:
   using FunctionType = typename OperatorType::srcType;
   using valueType    = typename FunctionType::valueType;

   using mtx   = gko::distributed::Matrix< valueType, int32_t >;
   using csr   = gko::matrix::Csr< valueType, int32_t >;
   using vec   = gko::distributed::Vector< valueType, int32_t >;
   using dense = gko::matrix::Dense< valueType >;

   explicit GinkgoBlockSolver( const std::shared_ptr< PrimitiveStorage >& storage,
                               const uint_t&                              level,
                               std::shared_ptr< const gko::Executor >     exec )
   : storage_( storage )
Marcel Koch's avatar
Marcel Koch committed
84
85
   , solver_exec_( exec )
   , host_exec_( exec->get_master() )
86
87
88
   , comm_( gko::mpi::communicator::create( storage->getSplitCommunicatorByPrimitiveDistribution() ) )
   , blockPreconditioner_( storage, level, level )
   , num_( "numerator", storage, level, level )
Marcel Koch's avatar
Marcel Koch committed
89
   , printInfo_( true )
90
91
92
93
94
95
96
97
98
99
100
   {}

   void solve( const OperatorType&                   A,
               const typename OperatorType::srcType& x,
               const typename OperatorType::dstType& b,
               const walberla::uint_t                level ) override
   {
      const auto num_local_dofs  = numberOfLocalDoFs< typename FunctionType::Tag >( *storage_, level );
      const auto num_global_dofs = numberOfGlobalDoFs< typename FunctionType::Tag >( *storage_, level, comm_->get() );

      auto [start, end] = local_range( num_local_dofs, comm_ );
Marcel Koch's avatar
Marcel Koch committed
101
      part_             = gko::share( gko::distributed::Partition<>::build_from_local_range( host_exec_, start, end, comm_ ) );
102
103
104
105
106
107
108
109
110

      // maybe called in parallel, thus need to keep it for empty processes
      num_.copyBoundaryConditionFromFunction( x );
      num_.enumerate( level );

      std::vector< int32_t > bcIndices;
      hyteg::petsc::applyDirichletBC( num_, bcIndices, level );
      std::sort( std::begin( bcIndices ), std::end( bcIndices ) );

Marcel Koch's avatar
Marcel Koch committed
111
112
113
114
      auto x_gko =
          vec::create( host_exec_, comm_, part_, gko::dim< 2 >{ num_global_dofs, 1 }, gko::dim< 2 >{ num_local_dofs, 1 } );
      auto b_gko =
          vec::create( host_exec_, comm_, part_, gko::dim< 2 >{ num_global_dofs, 1 }, gko::dim< 2 >{ num_local_dofs, 1 } );
115
116

      hyteg::petsc::createVectorFromFunction(
Marcel Koch's avatar
Marcel Koch committed
117
          x, num_, std::make_shared< GinkgoVectorProxy >( x_gko.get(), gko::dim< 2 >{ num_global_dofs, 1 }, part_ ), level, All );
118
119

      hyteg::petsc::createVectorFromFunction(
Marcel Koch's avatar
Marcel Koch committed
120
          b, num_, std::make_shared< GinkgoVectorProxy >( b_gko.get(), gko::dim< 2 >{ num_global_dofs, 1 }, part_ ), level, All );
121

Marcel Koch's avatar
Marcel Koch committed
122
      host_monolithic_matrix_ = gko::share( mtx::create( host_exec_, comm_ ) );
123
124
      {
         auto proxy = std::make_shared< GinkgoSparseMatrixProxy< mtx > >(
Marcel Koch's avatar
Marcel Koch committed
125
             host_monolithic_matrix_.get(), gko::dim< 2 >{ num_global_dofs, num_global_dofs }, part_ );
126
127
128
129
         hyteg::petsc::createMatrix( A, num_, num_, proxy, level, All );
         proxy->finalize();
      }

Marcel Koch's avatar
Marcel Koch committed
130
      auto monolithic_preconditioner_ = gko::share( mtx::create( host_exec_, comm_ ) );
131
132
      {
         auto proxy = std::make_shared< GinkgoSparseMatrixProxy< mtx > >(
Marcel Koch's avatar
Marcel Koch committed
133
             monolithic_preconditioner_.get(), gko::dim< 2 >{ num_global_dofs, num_global_dofs }, part_ );
134
135
136
137
138
139
140
         hyteg::petsc::createMatrix( blockPreconditioner_, num_, num_, proxy, level, All );
         proxy->finalize();
         auto dir_handler =
             std::make_unique< ZeroRowsDirichletHandler >( bcIndices, b_gko.get(), monolithic_preconditioner_, true );
         dir_handler->update_matrix();
      }

Marcel Koch's avatar
Marcel Koch committed
141
      auto dir_handler = std::make_unique< ZeroRowsDirichletHandler >( bcIndices, b_gko.get(), host_monolithic_matrix_, true );
142
      dir_handler->update_matrix();
Marcel Koch's avatar
Marcel Koch committed
143
144
      auto rhs = dir_handler->get_rhs( b_gko.get(), x_gko.get() );
      auto x0  = dir_handler->get_initial_guess( x_gko.get() );
145
146
147
148
149
150

      std::vector< int32_t > vIndices;
      std::vector< int32_t > pIndices;

      gatherIndices( vIndices, pIndices, *storage_, level, num_ );

Marcel Koch's avatar
Marcel Koch committed
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
      std::vector< int32_t > perm_vec;

      std::vector< int32_t > recv_sizes( comm_->size() );
      std::vector< int32_t > recv_offsets( comm_->size() + 1 );

      auto local_size = static_cast< int32_t >( vIndices.size() );
      gko::mpi::gather( &local_size, 1, recv_sizes.data(), 1, 0, comm_ );
      std::partial_sum( recv_sizes.begin(), recv_sizes.end(), recv_offsets.begin() + 1 );
      auto global_v_size = static_cast< gko::size_type >( recv_offsets.back() );
      perm_vec.resize( global_v_size );
      gko::mpi::gather( vIndices.data(), local_size, perm_vec.data(), recv_sizes.data(), recv_offsets.data(), 0, comm_ );

      local_size = static_cast< int32_t >( pIndices.size() );
      gko::mpi::gather( &local_size, 1, recv_sizes.data(), 1, 0, comm_ );
      std::partial_sum( recv_sizes.begin(), recv_sizes.end(), recv_offsets.begin() + 1 );
      auto global_p_size = static_cast< gko::size_type >( recv_offsets.back() );
      perm_vec.resize( global_v_size + global_p_size );
      gko::mpi::gather(
          pIndices.data(), local_size, perm_vec.data() + global_v_size, recv_sizes.data(), recv_offsets.data(), 0, comm_ );

171
172
173
174
175
176
177
178
179
      auto [p_v_min, p_v_max] = std::minmax_element( std::begin( vIndices ), std::end( vIndices ) );
      auto [p_p_min, p_p_max] = std::minmax_element( std::begin( pIndices ), std::end( pIndices ) );
      gko::span v_span( *p_v_min, *p_v_max + 1 );
      gko::span p_span( *p_p_min, *p_p_max + 1 );
      if ( p_span.begin != v_span.end )
      {
         WALBERLA_ABORT( "Indices are NOT blocked: v" << v_span << ", p" << p_span )
      }

Marcel Koch's avatar
Marcel Koch committed
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
      monolithic_matrix_ = gko::share( csr::create( solver_exec_ ) );
      gko::as< mtx >( host_monolithic_matrix_ )->convert_to( monolithic_matrix_.get() );

      gko::Array< gko::int32 > perm{ solver_exec_, perm_vec.begin(), perm_vec.end() };
      if ( monolithic_matrix_->get_size() )
      {
         monolithic_matrix_->permute( &perm );
      }

      {
         auto device_monolithic_pre = gko::share( csr::create( solver_exec_ ) );
         monolithic_preconditioner_->convert_to( device_monolithic_pre.get() );
         if ( device_monolithic_pre->get_size() && device_monolithic_pre->get_size() )
         {
            gko::span block_v_span{ 0, global_v_size };
            gko::span block_p_span{ global_v_size, global_v_size + global_p_size };

            device_monolithic_pre->permute( &perm );

            auto b_pre_v = gko::share(
                gko::preconditioner::Jacobi< valueType >::build()
                    .with_max_block_size( 1u )
                    .on( solver_exec_ )
                    ->generate( gko::share( device_monolithic_pre->create_submatrix( block_v_span, block_v_span ) ) ) );
            auto b_pre_p = gko::share(
                gko::preconditioner::Jacobi< valueType >::build()
                    .with_max_block_size( 1u )
                    .on( solver_exec_ )
                    ->generate( gko::share( device_monolithic_pre->create_submatrix( block_p_span, block_p_span ) ) ) );

            auto b_pre_vp = gko::share(
                gko::matrix::Zero< valueType >::create( solver_exec_, gko::dim< 2 >{ global_v_size, global_p_size } ) );
            auto b_pre_pv = gko::share(
                gko::matrix::Zero< valueType >::create( solver_exec_, gko::dim< 2 >{ global_p_size, global_v_size } ) );

            block_preconditioner_ = gko::share( gko::matrix::BlockMatrix::create(
                solver_exec_,
                gko::dim< 2 >{ global_v_size + global_p_size, global_v_size + global_p_size },
                std::vector< std::vector< std::shared_ptr< gko::LinOp > > >{ { b_pre_v, b_pre_vp }, { b_pre_pv, b_pre_p } },
                std::vector< gko::span >{ block_v_span, block_p_span } ) );
         }
      }

      auto log = gko::share( gko::log::Convergence< valueType >::create( solver_exec_ ) );
      if ( monolithic_matrix_->get_size() )
      {
         solver_ = gko::solver::Bicgstab< valueType >::build()
                       .with_criteria( gko::share( gko::stop::ResidualNorm<>::build()
                                                       .with_baseline( gko::stop::mode::initial_resnorm )
                                                       .with_reduction_factor( 1e-30 )
                                                       .on( solver_exec_ ) ),
                                       gko::share( gko::stop::ResidualNorm<>::build()
                                                       .with_baseline( gko::stop::mode::absolute )
                                                       .with_reduction_factor( 1e-12 )
                                                       .on( solver_exec_ ) ),
                                       gko::share( gko::stop::Iteration::build().with_max_iters( 15000 ).on( solver_exec_ ) ) )
                       .with_generated_preconditioner( block_preconditioner_ )
                       .on( solver_exec_ )
                       ->generate( monolithic_matrix_ );

         auto factory = gko::clone( solver_->get_stop_criterion_factory() );
         factory->add_logger( log );
         solver_->set_stop_criterion_factory( gko::share( factory ) );
      }

      auto global_rhs = dense::create( host_exec_ );
      auto global_x0  = dense::create( host_exec_ );
      rhs->convert_to( gko::lend( global_rhs ) );
      x0->convert_to( gko::lend( global_x0 ) );

      if ( monolithic_matrix_->get_size() )
      {
         global_rhs->row_permute( &perm );
         global_x0->row_permute( &perm );
         solver_->apply( gko::lend( global_rhs ), gko::lend( global_x0 ) );
         global_x0->inverse_row_permute( &perm );
      }
      gather_idxs_ = compute_gather_idxs( part_ );
      scatter_global_vector( global_x0.get(), x0, gather_idxs_, comm_ );
259
260
261
262

      dir_handler->update_solution( x0 );

      hyteg::petsc::createFunctionFromVector(
Marcel Koch's avatar
Marcel Koch committed
263
          x, num_, std::make_shared< GinkgoVectorProxy >( x0, gko::dim< 2 >{ num_global_dofs, 1 }, part_ ), level, All );
264

Marcel Koch's avatar
Marcel Koch committed
265
      if ( printInfo_ && comm_->rank() == 0 )
266
267
268
269
      {
         WALBERLA_LOG_INFO_ON_ROOT(
             "[Ginkgo CG]" << ( !log->has_converged() ? " NOT " : " " ) << "converged after " << log->get_num_iterations()
                           << " iterations,  residual norm: "
Marcel Koch's avatar
Marcel Koch committed
270
                           << solver_exec_->copy_val_to_host(
271
272
273
274
                                  gko::as< gko::matrix::Dense< valueType > >( log->get_residual_norm() )->get_const_values() ) );
      }
   }

Marcel Koch's avatar
Marcel Koch committed
275
276
   void setPrintInfo( bool printInfo ) { printInfo_ = printInfo; }

277
 private:
Marcel Koch's avatar
Marcel Koch committed
278
279
280
281
282
283
284
285
   std::shared_ptr< PrimitiveStorage > storage_;

   std::shared_ptr< const gko::Executor > host_exec_;
   std::shared_ptr< const gko::Executor > solver_exec_;

   std::shared_ptr< gko::mpi::communicator >                 comm_;
   std::shared_ptr< gko::distributed::Partition< int32_t > > part_;

286
287
288
289
290
   std::unique_ptr< gko::solver::Bicgstab< valueType > > solver_;

   BlockPreconditioner_T                                        blockPreconditioner_;
   typename OperatorType::srcType::template FunctionType< int > num_;

Marcel Koch's avatar
Marcel Koch committed
291
292
293
294
295
296
297
298
299
   std::shared_ptr< gko::LinOp >               block_preconditioner_;
   std::shared_ptr< gko::matrix::BlockMatrix > host_block_preconditioner_;

   std::shared_ptr< csr > monolithic_matrix_;
   std::shared_ptr< mtx > host_monolithic_matrix_;

   std::vector< gko::Array< gko::distributed::global_index_type > > gather_idxs_;

   bool printInfo_ = false;
300
301
302
};

} // namespace hyteg