#pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include "hyteg/ginkgo/GinkgoCommunication.hpp" #include "hyteg/ginkgo/GinkgoDirichletHandling.hpp" #include "hyteg/ginkgo/GinkgoSparseMatrixProxy.hpp" #include "hyteg/ginkgo/GinkgoVectorProxy.hpp" #include "hyteg/solvers/Solver.hpp" namespace hyteg { void gatherIndices( std::vector< int32_t >& velocityIndices, std::vector< int32_t >& pressureIndices, const PrimitiveStorage& storage, uint_t level, const P2P1TaylorHoodFunction< int32_t >& numerator ) { for ( auto dof : FunctionIterator< vertexdof::VertexDoFFunction< int32_t > >( numerator.uvw[0].getVertexDoFFunction(), level ) ) { velocityIndices.push_back( dof.value() ); } for ( auto dof : FunctionIterator< EdgeDoFFunction< int32_t > >( numerator.uvw[0].getEdgeDoFFunction(), level ) ) { velocityIndices.push_back( dof.value() ); } for ( auto dof : FunctionIterator< vertexdof::VertexDoFFunction< int32_t > >( numerator.uvw[1].getVertexDoFFunction(), level ) ) { velocityIndices.push_back( dof.value() ); } for ( auto dof : FunctionIterator< EdgeDoFFunction< int32_t > >( numerator.uvw[1].getEdgeDoFFunction(), level ) ) { velocityIndices.push_back( dof.value() ); } if ( storage.hasGlobalCells() ) { for ( auto dof : FunctionIterator< vertexdof::VertexDoFFunction< int32_t > >( numerator.uvw[2].getVertexDoFFunction(), level ) ) { velocityIndices.push_back( dof.value() ); } for ( auto dof : FunctionIterator< EdgeDoFFunction< int32_t > >( numerator.uvw[2].getEdgeDoFFunction(), level ) ) { velocityIndices.push_back( dof.value() ); } } for ( auto dof : FunctionIterator< vertexdof::VertexDoFFunction< int32_t > >( numerator.p, level ) ) { pressureIndices.push_back( dof.value() ); } } template < typename OperatorType > class GinkgoBlockSolver : public Solver< OperatorType > { using BlockPreconditioner_T = typename OperatorType::BlockPreconditioner_T; public: using FunctionType = typename OperatorType::srcType; using valueType = typename FunctionType::valueType; using mtx = gko::distributed::Matrix< valueType, int32_t >; using csr = gko::matrix::Csr< valueType, int32_t >; using vec = gko::distributed::Vector< valueType, int32_t >; using dense = gko::matrix::Dense< valueType >; explicit GinkgoBlockSolver( const std::shared_ptr< PrimitiveStorage >& storage, const uint_t& level, std::shared_ptr< const gko::Executor > exec ) : storage_( storage ) , solver_exec_( exec ) , host_exec_( exec->get_master() ) , comm_( gko::mpi::communicator::create( storage->getSplitCommunicatorByPrimitiveDistribution() ) ) , blockPreconditioner_( storage, level, level ) , num_( "numerator", storage, level, level ) , printInfo_( true ) {} void solve( const OperatorType& A, const typename OperatorType::srcType& x, const typename OperatorType::dstType& b, const walberla::uint_t level ) override { const auto num_local_dofs = numberOfLocalDoFs< typename FunctionType::Tag >( *storage_, level ); const auto num_global_dofs = numberOfGlobalDoFs< typename FunctionType::Tag >( *storage_, level, comm_->get() ); auto [start, end] = local_range( num_local_dofs, comm_ ); part_ = gko::share( gko::distributed::Partition<>::build_from_local_range( host_exec_, start, end, comm_ ) ); // maybe called in parallel, thus need to keep it for empty processes num_.copyBoundaryConditionFromFunction( x ); num_.enumerate( level ); std::vector< int32_t > bcIndices; hyteg::petsc::applyDirichletBC( num_, bcIndices, level ); std::sort( std::begin( bcIndices ), std::end( bcIndices ) ); auto x_gko = vec::create( host_exec_, comm_, part_, gko::dim< 2 >{ num_global_dofs, 1 }, gko::dim< 2 >{ num_local_dofs, 1 } ); auto b_gko = vec::create( host_exec_, comm_, part_, gko::dim< 2 >{ num_global_dofs, 1 }, gko::dim< 2 >{ num_local_dofs, 1 } ); hyteg::petsc::createVectorFromFunction( x, num_, std::make_shared< GinkgoVectorProxy >( x_gko.get(), gko::dim< 2 >{ num_global_dofs, 1 }, part_ ), level, All ); hyteg::petsc::createVectorFromFunction( b, num_, std::make_shared< GinkgoVectorProxy >( b_gko.get(), gko::dim< 2 >{ num_global_dofs, 1 }, part_ ), level, All ); host_monolithic_matrix_ = gko::share( mtx::create( host_exec_, comm_ ) ); { auto proxy = std::make_shared< GinkgoSparseMatrixProxy< mtx > >( host_monolithic_matrix_.get(), gko::dim< 2 >{ num_global_dofs, num_global_dofs }, part_ ); hyteg::petsc::createMatrix( A, num_, num_, proxy, level, All ); proxy->finalize(); } auto monolithic_preconditioner_ = gko::share( mtx::create( host_exec_, comm_ ) ); { auto proxy = std::make_shared< GinkgoSparseMatrixProxy< mtx > >( monolithic_preconditioner_.get(), gko::dim< 2 >{ num_global_dofs, num_global_dofs }, part_ ); hyteg::petsc::createMatrix( blockPreconditioner_, num_, num_, proxy, level, All ); proxy->finalize(); auto dir_handler = std::make_unique< ZeroRowsDirichletHandler >( bcIndices, b_gko.get(), monolithic_preconditioner_, true ); dir_handler->update_matrix(); } auto dir_handler = std::make_unique< ZeroRowsDirichletHandler >( bcIndices, b_gko.get(), host_monolithic_matrix_, true ); dir_handler->update_matrix(); auto rhs = dir_handler->get_rhs( b_gko.get(), x_gko.get() ); auto x0 = dir_handler->get_initial_guess( x_gko.get() ); std::vector< int32_t > vIndices; std::vector< int32_t > pIndices; gatherIndices( vIndices, pIndices, *storage_, level, num_ ); std::vector< int32_t > perm_vec; std::vector< int32_t > recv_sizes( comm_->size() ); std::vector< int32_t > recv_offsets( comm_->size() + 1 ); auto local_size = static_cast< int32_t >( vIndices.size() ); gko::mpi::gather( &local_size, 1, recv_sizes.data(), 1, 0, comm_ ); std::partial_sum( recv_sizes.begin(), recv_sizes.end(), recv_offsets.begin() + 1 ); auto global_v_size = static_cast< gko::size_type >( recv_offsets.back() ); perm_vec.resize( global_v_size ); gko::mpi::gather( vIndices.data(), local_size, perm_vec.data(), recv_sizes.data(), recv_offsets.data(), 0, comm_ ); local_size = static_cast< int32_t >( pIndices.size() ); gko::mpi::gather( &local_size, 1, recv_sizes.data(), 1, 0, comm_ ); std::partial_sum( recv_sizes.begin(), recv_sizes.end(), recv_offsets.begin() + 1 ); auto global_p_size = static_cast< gko::size_type >( recv_offsets.back() ); perm_vec.resize( global_v_size + global_p_size ); gko::mpi::gather( pIndices.data(), local_size, perm_vec.data() + global_v_size, recv_sizes.data(), recv_offsets.data(), 0, comm_ ); auto [p_v_min, p_v_max] = std::minmax_element( std::begin( vIndices ), std::end( vIndices ) ); auto [p_p_min, p_p_max] = std::minmax_element( std::begin( pIndices ), std::end( pIndices ) ); gko::span v_span( *p_v_min, *p_v_max + 1 ); gko::span p_span( *p_p_min, *p_p_max + 1 ); if ( p_span.begin != v_span.end ) { WALBERLA_ABORT( "Indices are NOT blocked: v" << v_span << ", p" << p_span ) } monolithic_matrix_ = gko::share( csr::create( solver_exec_ ) ); gko::as< mtx >( host_monolithic_matrix_ )->convert_to( monolithic_matrix_.get() ); gko::Array< gko::int32 > perm{ solver_exec_, perm_vec.begin(), perm_vec.end() }; if ( monolithic_matrix_->get_size() ) { monolithic_matrix_->permute( &perm ); } { auto device_monolithic_pre = gko::share( csr::create( solver_exec_ ) ); monolithic_preconditioner_->convert_to( device_monolithic_pre.get() ); if ( device_monolithic_pre->get_size() && device_monolithic_pre->get_size() ) { gko::span block_v_span{ 0, global_v_size }; gko::span block_p_span{ global_v_size, global_v_size + global_p_size }; device_monolithic_pre->permute( &perm ); auto b_pre_v = gko::share( gko::preconditioner::Jacobi< valueType >::build() .with_max_block_size( 1u ) .on( solver_exec_ ) ->generate( gko::share( device_monolithic_pre->create_submatrix( block_v_span, block_v_span ) ) ) ); auto b_pre_p = gko::share( gko::preconditioner::Jacobi< valueType >::build() .with_max_block_size( 1u ) .on( solver_exec_ ) ->generate( gko::share( device_monolithic_pre->create_submatrix( block_p_span, block_p_span ) ) ) ); auto b_pre_vp = gko::share( gko::matrix::Zero< valueType >::create( solver_exec_, gko::dim< 2 >{ global_v_size, global_p_size } ) ); auto b_pre_pv = gko::share( gko::matrix::Zero< valueType >::create( solver_exec_, gko::dim< 2 >{ global_p_size, global_v_size } ) ); block_preconditioner_ = gko::share( gko::matrix::BlockMatrix::create( solver_exec_, gko::dim< 2 >{ global_v_size + global_p_size, global_v_size + global_p_size }, std::vector< std::vector< std::shared_ptr< gko::LinOp > > >{ { b_pre_v, b_pre_vp }, { b_pre_pv, b_pre_p } }, std::vector< gko::span >{ block_v_span, block_p_span } ) ); } } auto log = gko::share( gko::log::Convergence< valueType >::create( solver_exec_ ) ); if ( monolithic_matrix_->get_size() ) { solver_ = gko::solver::Bicgstab< valueType >::build() .with_criteria( gko::share( gko::stop::ResidualNorm<>::build() .with_baseline( gko::stop::mode::initial_resnorm ) .with_reduction_factor( 1e-30 ) .on( solver_exec_ ) ), gko::share( gko::stop::ResidualNorm<>::build() .with_baseline( gko::stop::mode::absolute ) .with_reduction_factor( 1e-12 ) .on( solver_exec_ ) ), gko::share( gko::stop::Iteration::build().with_max_iters( 15000 ).on( solver_exec_ ) ) ) .with_generated_preconditioner( block_preconditioner_ ) .on( solver_exec_ ) ->generate( monolithic_matrix_ ); auto factory = gko::clone( solver_->get_stop_criterion_factory() ); factory->add_logger( log ); solver_->set_stop_criterion_factory( gko::share( factory ) ); } auto global_rhs = dense::create( host_exec_ ); auto global_x0 = dense::create( host_exec_ ); rhs->convert_to( gko::lend( global_rhs ) ); x0->convert_to( gko::lend( global_x0 ) ); if ( monolithic_matrix_->get_size() ) { global_rhs->row_permute( &perm ); global_x0->row_permute( &perm ); solver_->apply( gko::lend( global_rhs ), gko::lend( global_x0 ) ); global_x0->inverse_row_permute( &perm ); } gather_idxs_ = compute_gather_idxs( part_ ); scatter_global_vector( global_x0.get(), x0, gather_idxs_, comm_ ); dir_handler->update_solution( x0 ); hyteg::petsc::createFunctionFromVector( x, num_, std::make_shared< GinkgoVectorProxy >( x0, gko::dim< 2 >{ num_global_dofs, 1 }, part_ ), level, All ); if ( printInfo_ && comm_->rank() == 0 ) { WALBERLA_LOG_INFO_ON_ROOT( "[Ginkgo CG]" << ( !log->has_converged() ? " NOT " : " " ) << "converged after " << log->get_num_iterations() << " iterations, residual norm: " << solver_exec_->copy_val_to_host( gko::as< gko::matrix::Dense< valueType > >( log->get_residual_norm() )->get_const_values() ) ); } } void setPrintInfo( bool printInfo ) { printInfo_ = printInfo; } private: std::shared_ptr< PrimitiveStorage > storage_; std::shared_ptr< const gko::Executor > host_exec_; std::shared_ptr< const gko::Executor > solver_exec_; std::shared_ptr< gko::mpi::communicator > comm_; std::shared_ptr< gko::distributed::Partition< int32_t > > part_; std::unique_ptr< gko::solver::Bicgstab< valueType > > solver_; BlockPreconditioner_T blockPreconditioner_; typename OperatorType::srcType::template FunctionType< int > num_; std::shared_ptr< gko::LinOp > block_preconditioner_; std::shared_ptr< gko::matrix::BlockMatrix > host_block_preconditioner_; std::shared_ptr< csr > monolithic_matrix_; std::shared_ptr< mtx > host_monolithic_matrix_; std::vector< gko::Array< gko::distributed::global_index_type > > gather_idxs_; bool printInfo_ = false; }; } // namespace hyteg