GinkgoBlockSolver.hpp 10.5 KB
Newer Older
Marcel Koch's avatar
Marcel Koch committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
#pragma once

#include <ginkgo/core/log/convergence.hpp>
#include <ginkgo/core/log/convergence_stream.hpp>
#include <ginkgo/core/matrix/block_matrix.hpp>
#include <ginkgo/core/solver/bicgstab.hpp>
#include <ginkgo/core/solver/gmres.hpp>
#include <ginkgo/core/stop/iteration.hpp>
#include <ginkgo/core/stop/residual_norm.hpp>
#include <hyteg/composites/petsc/P2P1TaylorHoodPetsc.hpp>

#include "hyteg/ginkgo/GinkgoCommunication.hpp"
#include "hyteg/ginkgo/GinkgoDirichletHandling.hpp"
#include "hyteg/ginkgo/GinkgoSparseMatrixProxy.hpp"
#include "hyteg/ginkgo/GinkgoVectorProxy.hpp"
#include "hyteg/solvers/Solver.hpp"

namespace hyteg {
void gatherIndices( std::vector< int32_t >&            velocityIndices,
                    std::vector< int32_t >&            pressureIndices,
                    const PrimitiveStorage&            storage,
                    uint_t                             level,
                    const P1StokesFunction< int32_t >& numerator )
{
   for ( auto dof : FunctionIterator< vertexdof::VertexDoFFunction< int32_t > >( numerator.uvw[0], level ) )
   {
      velocityIndices.push_back( dof.value() );
   }

   for ( auto dof : FunctionIterator< vertexdof::VertexDoFFunction< int32_t > >( numerator.uvw[1], level ) )
   {
      velocityIndices.push_back( dof.value() );
   }

   if ( storage.hasGlobalCells() )
   {
      for ( auto dof : FunctionIterator< vertexdof::VertexDoFFunction< int32_t > >( numerator.uvw[2], level ) )
      {
         velocityIndices.push_back( dof.value() );
      }
   }
   for ( auto dof : FunctionIterator< vertexdof::VertexDoFFunction< int32_t > >( numerator.p, level ) )
   {
      pressureIndices.push_back( dof.value() );
   }
}

void gatherIndices( std::vector< int32_t >&                  velocityIndices,
                    std::vector< int32_t >&                  pressureIndices,
                    const PrimitiveStorage&                  storage,
                    uint_t                                   level,
                    const P2P1TaylorHoodFunction< int32_t >& numerator )
{
   for ( auto dof :
         FunctionIterator< vertexdof::VertexDoFFunction< int32_t > >( numerator.uvw[0].getVertexDoFFunction(), level ) )
   {
      velocityIndices.push_back( dof.value() );
   }
   for ( auto dof : FunctionIterator< EdgeDoFFunction< int32_t > >( numerator.uvw[0].getEdgeDoFFunction(), level ) )
   {
      velocityIndices.push_back( dof.value() );
   }
   for ( auto dof :
         FunctionIterator< vertexdof::VertexDoFFunction< int32_t > >( numerator.uvw[1].getVertexDoFFunction(), level ) )
   {
      velocityIndices.push_back( dof.value() );
   }
   for ( auto dof : FunctionIterator< EdgeDoFFunction< int32_t > >( numerator.uvw[1].getEdgeDoFFunction(), level ) )
   {
      velocityIndices.push_back( dof.value() );
   }
   if ( storage.hasGlobalCells() )
   {
      for ( auto dof :
            FunctionIterator< vertexdof::VertexDoFFunction< int32_t > >( numerator.uvw[2].getVertexDoFFunction(), level ) )
      {
         velocityIndices.push_back( dof.value() );
      }
      for ( auto dof : FunctionIterator< EdgeDoFFunction< int32_t > >( numerator.uvw[2].getEdgeDoFFunction(), level ) )
      {
         velocityIndices.push_back( dof.value() );
      }
   }
   for ( auto dof : FunctionIterator< vertexdof::VertexDoFFunction< int32_t > >( numerator.p, level ) )
   {
      pressureIndices.push_back( dof.value() );
   }
}


template < typename OperatorType >
class GinkgoBlockSolver : public Solver< OperatorType >
{
   using BlockPreconditioner_T = typename OperatorType::BlockPreconditioner_T;

 public:
   using FunctionType = typename OperatorType::srcType;
   using valueType    = typename FunctionType::valueType;

   using mtx   = gko::distributed::Matrix< valueType, int32_t >;
   using csr   = gko::matrix::Csr< valueType, int32_t >;
   using vec   = gko::distributed::Vector< valueType, int32_t >;
   using dense = gko::matrix::Dense< valueType >;

   explicit GinkgoBlockSolver( const std::shared_ptr< PrimitiveStorage >& storage,
                               const uint_t&                              level,
                               std::shared_ptr< const gko::Executor >     exec )
   : storage_( storage )
   , exec_( exec )
   , comm_( gko::mpi::communicator::create( storage->getSplitCommunicatorByPrimitiveDistribution() ) )
   , num_( "numerator", storage, level, level )
   {}

   void solve( const OperatorType&                   A,
               const typename OperatorType::srcType& x,
               const typename OperatorType::dstType& b,
               const walberla::uint_t                level ) override
   {
      const auto num_local_dofs  = numberOfLocalDoFs< typename FunctionType::Tag >( *storage_, level );
      const auto num_global_dofs = numberOfGlobalDoFs< typename FunctionType::Tag >( *storage_, level, comm_->get() );

      auto [start, end] = local_range( num_local_dofs, comm_ );
      auto part         = gko::share( gko::distributed::Partition<>::build_from_local_range( exec_, start, end, comm_ ) );

      // maybe called in parallel, thus need to keep it for empty processes
      num_.copyBoundaryConditionFromFunction( x );
      num_.enumerate( level );

      std::vector< int32_t > bcIndices;
      hyteg::petsc::applyDirichletBC( num_, bcIndices, level );
      std::sort( std::begin( bcIndices ), std::end( bcIndices ) );

      auto x_gko = vec::create( exec_, comm_, part, gko::dim< 2 >{ num_global_dofs, 1 }, gko::dim< 2 >{ num_local_dofs, 1 } );
      auto b_gko = vec::create( exec_, comm_, part, gko::dim< 2 >{ num_global_dofs, 1 }, gko::dim< 2 >{ num_local_dofs, 1 } );

      hyteg::petsc::createVectorFromFunction(
          x, num_, std::make_shared< GinkgoVectorProxy >( x_gko.get(), gko::dim< 2 >{ num_global_dofs, 1 }, part ), level, All );

      hyteg::petsc::createVectorFromFunction(
          b, num_, std::make_shared< GinkgoVectorProxy >( b_gko.get(), gko::dim< 2 >{ num_global_dofs, 1 }, part ), level, All );

      monolithic_matrix_ = gko::share( mtx::create( exec_, comm_ ) );

      auto proxy = std::make_shared< GinkgoSparseMatrixProxy< mtx > >(
          monolithic_matrix_.get(), gko::dim< 2 >{ num_global_dofs, num_global_dofs }, part );
      hyteg::petsc::createMatrix( A, num_, num_, proxy, level, All );
      proxy->finalize();

      auto dir_handler = std::make_unique< ZeroRowsDirichletHandler >( bcIndices, b_gko.get(), monolithic_matrix_, true );
      dir_handler->update_matrix();

      std::vector< int32_t > vIndices;
      std::vector< int32_t > pIndices;

      gatherIndices( vIndices, pIndices, *storage_, level, num_ );

      auto [p_v_min, p_v_max] = std::minmax_element( std::begin( vIndices ), std::end( vIndices ) );
      auto [p_p_min, p_p_max] = std::minmax_element( std::begin( pIndices ), std::end( pIndices ) );
      gko::span v_span( *p_v_min, *p_v_max + 1 );
      gko::span p_span( *p_p_min, *p_p_max + 1 );
      if ( p_span.begin != v_span.end )
      {
         WALBERLA_ABORT("Indices are NOT blocked: v" << v_span << ", p" << p_span)
      }

      auto b_mtx_v  = gko::share( monolithic_matrix_->get_local_diag()->create_submatrix( v_span, v_span ) );
      auto b_mtx_vp = gko::share( monolithic_matrix_->get_local_diag()->create_submatrix( v_span, p_span ) );
      auto b_mtx_pv = gko::share( monolithic_matrix_->get_local_diag()->create_submatrix( p_span, v_span ) );
      auto b_mtx_p  = gko::share( monolithic_matrix_->get_local_diag()->create_submatrix( p_span, p_span ) );
      matrix_ = gko::share(gko::matrix::BlockMatrix::create(exec_, {num_global_dofs, num_global_dofs}, {{b_mtx_v, b_mtx_vp}, {b_mtx_pv, b_mtx_p}}));

//      auto b_vec_b0 = gko::share( b_gko->get_local()->create_submatrix( v_span, 0 ) );
//      auto b_vec_b1 = gko::share( b_gko->get_local()->create_submatrix( p_span, 0 ) );
//      auto b_vec_x0 = gko::share( x_gko->get_local()->create_submatrix( v_span, 0 ) );
//      auto b_vec_x1 = gko::share( x_gko->get_local()->create_submatrix( p_span, 0 ) );
//
//      auto b_b_gko = gko::matrix::BlockMatrix::create(exec_, {num_global_dofs, num_global_dofs}, {{b_vec_b0}, {b_vec_b1}});
//      auto b_x_gko = gko::matrix::BlockMatrix::create(exec_, {num_global_dofs, num_global_dofs}, {{b_vec_x0}, {b_vec_x1}});

      solver_ = gko::solver::Bicgstab< valueType >::build()
                    .with_criteria( gko::share( gko::stop::ResidualNorm<>::build()
                                                    .with_baseline( gko::stop::mode::initial_resnorm )
                                                    .with_reduction_factor( 1e-30 )
                                                    .on( exec_ ) ),
                                    gko::share( gko::stop::ResidualNorm<>::build()
                                                    .with_baseline( gko::stop::mode::absolute )
                                                    .with_reduction_factor( 1e-12 )
                                                    .on( exec_ ) ),
                                    gko::share( gko::stop::Iteration::build().with_max_iters( 15000 ).on( exec_ ) ) )
                    .on( exec_ )
                    ->generate( matrix_ );

      auto log     = gko::share( gko::log::Convergence< valueType >::create( exec_ ) );
      auto factory = gko::clone( solver_->get_stop_criterion_factory() );
      factory->add_logger( log );
      solver_->set_stop_criterion_factory( gko::share( factory ) );

      auto rhs = dir_handler->get_rhs( b_gko.get(), x_gko.get() );
      auto x0  = dir_handler->get_initial_guess( x_gko.get() );
      solver_->apply( rhs, x0 );
      dir_handler->update_solution( x0 );

      hyteg::petsc::createFunctionFromVector(
          x, num_, std::make_shared< GinkgoVectorProxy >( x0, gko::dim< 2 >{ num_global_dofs, 1 }, part ), level, All );

      {
         WALBERLA_LOG_INFO_ON_ROOT(
             "[Ginkgo CG]" << ( !log->has_converged() ? " NOT " : " " ) << "converged after " << log->get_num_iterations()
                           << " iterations,  residual norm: "
                           << exec_->copy_val_to_host(
                                  gko::as< gko::matrix::Dense< valueType > >( log->get_residual_norm() )->get_const_values() ) );
      }
   }

 private:
   std::shared_ptr< PrimitiveStorage >                   storage_;
   std::shared_ptr< const gko::Executor >                exec_;
   std::shared_ptr< gko::mpi::communicator >             comm_;
   std::unique_ptr< gko::solver::Bicgstab< valueType > > solver_;

   typename OperatorType::srcType::template FunctionType< int > num_;

   std::shared_ptr< gko::matrix::BlockMatrix > matrix_;
   std::shared_ptr< mtx >                      monolithic_matrix_;
};

} // namespace hyteg