Commit 8bbf638f authored by wagnandr's avatar wagnandr
Browse files

Merge branch 'master' into wagnandr/stokes-eg

parents 8886f91f adba0cff
Pipeline #39555 failed with stages
in 10 minutes and 27 seconds
...@@ -79,9 +79,9 @@ variables: ...@@ -79,9 +79,9 @@ variables:
intel_19_serial: intel_20_serial:
extends: .build_template extends: .build_template
image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19 image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:20
variables: variables:
WALBERLA_BUILD_WITH_MPI: "OFF" WALBERLA_BUILD_WITH_MPI: "OFF"
WALBERLA_BUILD_WITH_OPENMP: "OFF" WALBERLA_BUILD_WITH_OPENMP: "OFF"
...@@ -93,9 +93,9 @@ intel_19_serial: ...@@ -93,9 +93,9 @@ intel_19_serial:
- docker - docker
- intel - intel
intel_19_mpionly: intel_20_mpionly:
extends: .build_template extends: .build_template
image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19 image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:20
variables: variables:
WALBERLA_BUILD_WITH_OPENMP: "OFF" WALBERLA_BUILD_WITH_OPENMP: "OFF"
only: only:
...@@ -105,9 +105,9 @@ intel_19_mpionly: ...@@ -105,9 +105,9 @@ intel_19_mpionly:
- docker - docker
- intel - intel
intel_19_serial_dbg: intel_20_serial_dbg:
extends: .build_template extends: .build_template
image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19 image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:20
variables: variables:
WALBERLA_BUILD_WITH_MPI: "OFF" WALBERLA_BUILD_WITH_MPI: "OFF"
WALBERLA_BUILD_WITH_OPENMP: "OFF" WALBERLA_BUILD_WITH_OPENMP: "OFF"
...@@ -117,9 +117,9 @@ intel_19_serial_dbg: ...@@ -117,9 +117,9 @@ intel_19_serial_dbg:
- docker - docker
- intel - intel
intel_19_mpionly_dbg_eigen_petsc-complex_trilinos: intel_20_mpionly_dbg_eigen_petsc-complex_trilinos:
extends: .build_template extends: .build_template
image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19 image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:20
variables: variables:
CMAKE_BUILD_TYPE: "DebugOptimized" CMAKE_BUILD_TYPE: "DebugOptimized"
WALBERLA_BUILD_WITH_OPENMP: "OFF" WALBERLA_BUILD_WITH_OPENMP: "OFF"
...@@ -131,9 +131,9 @@ intel_19_mpionly_dbg_eigen_petsc-complex_trilinos: ...@@ -131,9 +131,9 @@ intel_19_mpionly_dbg_eigen_petsc-complex_trilinos:
- docker - docker
- intel - intel
intel_19_mpionly_dbg_sp: intel_20_mpionly_dbg_sp:
extends: .build_template extends: .build_template
image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19 image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:20
variables: variables:
CMAKE_BUILD_TYPE: "DebugOptimized" CMAKE_BUILD_TYPE: "DebugOptimized"
WALBERLA_BUILD_WITH_OPENMP: "OFF" WALBERLA_BUILD_WITH_OPENMP: "OFF"
...@@ -144,9 +144,9 @@ intel_19_mpionly_dbg_sp: ...@@ -144,9 +144,9 @@ intel_19_mpionly_dbg_sp:
- docker - docker
- intel - intel
intel_19_mpionly_eigen_petsc_trilinos: intel_20_mpionly_eigen_petsc_trilinos:
extends: .build_template extends: .build_template
image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19 image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:20
variables: variables:
WALBERLA_BUILD_WITH_OPENMP: "OFF" WALBERLA_BUILD_WITH_OPENMP: "OFF"
HYTEG_BUILD_WITH_PETSC: "ON" HYTEG_BUILD_WITH_PETSC: "ON"
...@@ -157,9 +157,9 @@ intel_19_mpionly_eigen_petsc_trilinos: ...@@ -157,9 +157,9 @@ intel_19_mpionly_eigen_petsc_trilinos:
- docker - docker
- intel - intel
intel_19_mpionly_eigen_petsc_trilinos_no_werror: intel_20_mpionly_eigen_petsc_trilinos_no_werror:
extends: .build_template extends: .build_template
image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19 image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:20
stage: no_werror stage: no_werror
variables: variables:
WALBERLA_BUILD_WITH_OPENMP: "OFF" WALBERLA_BUILD_WITH_OPENMP: "OFF"
...@@ -1552,7 +1552,7 @@ benchmark_build_time: ...@@ -1552,7 +1552,7 @@ benchmark_build_time:
- cd $CI_PROJECT_DIR/ - cd $CI_PROJECT_DIR/
- cat BuildTiming.txt - cat BuildTiming.txt
- python3 $CI_PROJECT_DIR/data/scripts/upload.py - python3 $CI_PROJECT_DIR/data/scripts/upload.py
image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9 image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
tags: tags:
- docker-benchmark - docker-benchmark
variables: variables:
...@@ -1560,11 +1560,6 @@ benchmark_build_time: ...@@ -1560,11 +1560,6 @@ benchmark_build_time:
benchmark_ClangBuildAnalyzer: benchmark_ClangBuildAnalyzer:
script: script:
- apt-get update --fix-missing
- apt-get -y install apt-transport-https ca-certificates gnupg software-properties-common wget
- wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add -
- apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main'
- apt-get -y install cmake ninja-build
- cmake --version - cmake --version
- ccache --version - ccache --version
- mpirun --version - mpirun --version
...@@ -1586,7 +1581,7 @@ benchmark_ClangBuildAnalyzer: ...@@ -1586,7 +1581,7 @@ benchmark_ClangBuildAnalyzer:
- ninja hyteg - ninja hyteg
- ClangBuildAnalyzer --stop src CBA - ClangBuildAnalyzer --stop src CBA
- ClangBuildAnalyzer --analyze CBA - ClangBuildAnalyzer --analyze CBA
image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:9.0 image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
tags: tags:
- docker-benchmark - docker-benchmark
variables: variables:
...@@ -1661,14 +1656,14 @@ benchmark_ClangBuildAnalyzer: ...@@ -1661,14 +1656,14 @@ benchmark_ClangBuildAnalyzer:
needs: [ ] needs: [ ]
stage: benchmark stage: benchmark
benchmark_intel19: benchmark_intel20:
<<: *benchmark_definition <<: *benchmark_definition
image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19 image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:20
benchmark_gcc9: benchmark_gcc11:
<<: *benchmark_definition <<: *benchmark_definition
image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9 image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:11
benchmark_clang8: benchmark_clang13:
<<: *benchmark_definition <<: *benchmark_definition
image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:8.0 image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
\ No newline at end of file \ No newline at end of file
This diff is collapsed.
...@@ -3,25 +3,37 @@ Parameters ...@@ -3,25 +3,37 @@ Parameters
// spacial dimension of domain // spacial dimension of domain
dim 2; dim 2;
// domain shape (0=square/cube, 1=annulus/shpericalShell) // domain shape (0=square/cube, 1=annulus/shpericalShell)
shape 1; shape 0;
// initial mesh (n3 only used for cube) // initial mesh (n3 only used for cube)
n1 5; n1 1;
n2 2; n2 1;
n3 1; n3 1;
// diffusion coefficient // analytic solution
alpha 10; // control slope of the "jump", 5 <= alpha <= 35 // for shape=1 these parameters control the "jump" in the diffusion coefficient:
beta 1; // control height of the "jump", 1 <= beta <= 10 // alpha: control slope of the "jump", 5 <= alpha <= 35
// beta: control height of the "jump", 1 <= beta <= 10
// for shape=0 they control the peak in the analytic solution
// alpha: control the slope of the peak
// beta: unused
alpha 15; //
beta 1; //
// adaptive refinement // adaptive refinement:
n_refinements 4; // In each step, all elements where the error is greater than 0.5*err_p
proportion_of_elements_refined_per_step 0.2; // will be refined, where err_p is the specified percentile over all errors.
// The iteration stops when the resulting mesh exceeds the given maximum of allowed elements.
n_refinements 60; // number of refinement steps
percentile 0.01; // minimum proportion of elements to refine in each step [0,1]
n_el_max 8200; // max number of macro elements
// linear solver (cg) // linear solver (GMG)
microlevel 2; microlevel 1;
n_iterations 10000; n_iterations 10;
tolerance 1e-12; tolerance 1e-6;
// vtk // misc
vtkOutput 1; vtkName new_anal_2D_ada;
loadbalancing 1;
writeDomainPartitioning 1;
} }
\ No newline at end of file
...@@ -8,6 +8,7 @@ target_sources( hyteg ...@@ -8,6 +8,7 @@ target_sources( hyteg
refine_cell.hpp refine_cell.hpp
simplex.hpp simplex.hpp
mesh.hpp mesh.hpp
simplexFactory.cpp simplexFactory.cpp
loadbalancing.cpp
) )
<
/*
* Copyright (c) 2022 Benjamin Mann
*
* This file is part of HyTeG
* (see https://i10git.cs.fau.de/hyteg/hyteg).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <core/Format.hpp>
#include <core/logging/all.h>
#include <core/mpi/Broadcast.h>
#include <core/mpi/Reduce.h>
#include <numeric>
#include <utility>
#include <vector>
#include "simplexData.hpp"
namespace hyteg {
namespace adaptiveRefinement {
/* apply loadbalancing directly on our datastructures */
void loadbalancing( std::vector< VertexData >& vtxs,
std::vector< EdgeData >& edges,
std::vector< FaceData >& faces,
std::vector< CellData >& cells,
const uint_t& n_processes )
{
// roundrobin
uint_t i = 0;
for ( auto& vtx : vtxs )
{
vtx.setTargetRank( i % n_processes );
++i;
}
for ( auto& edge : edges )
{
edge.setTargetRank( i % n_processes );
++i;
}
for ( auto& face : faces )
{
face.setTargetRank( i % n_processes );
++i;
}
for ( auto& cell : cells )
{
cell.setTargetRank( i % n_processes );
++i;
}
}
void loadbalancing( std::vector< VertexData >& vtxs,
std::vector< EdgeData >& edges,
std::vector< FaceData >& faces,
std::vector< CellData >& cells,
const std::vector< Neighborhood >& nbrHood,
const uint_t& n_processes,
const uint_t& rank )
{
using PT = PrimitiveType;
constexpr std::array< PT, ALL > VEFC{ VTX, EDGE, FACE, CELL };
constexpr std::array< PT, ALL > CFEV{ CELL, FACE, EDGE, VTX };
const PT VOL = ( cells.size() == 0 ) ? FACE : CELL;
// number of primitives of each type
std::array< uint_t, ALL + 1 > n_prim;
n_prim[VTX] = vtxs.size();
n_prim[EDGE] = edges.size();
n_prim[FACE] = faces.size();
n_prim[CELL] = cells.size();
n_prim[ALL] = n_prim[VTX] + n_prim[EDGE] + n_prim[FACE] + n_prim[CELL];
// first Primitive ID for each primitive type
std::array< uint_t, ALL + 1 > id0{};
for ( auto pt : VEFC )
{
id0[pt + 1] = id0[pt] + n_prim[pt];
}
/* We assume that the elements in the input vectors are ordered by
PrimitiveID and that for each vertex v, edge e, face f and cell c it holds
id_v < id_e < id_f < id_c
*/
uint_t check_id = 0;
auto check = [&]( PrimitiveID id ) {
if ( id.getID() != check_id )
{
WALBERLA_ABORT( "Wrong numbering of primitives!" );
}
++check_id;
};
for ( auto& p : vtxs )
{
check( p.getPrimitiveID() );
}
for ( auto& p : edges )
{
check( p.getPrimitiveID() );
}
for ( auto& p : faces )
{
check( p.getPrimitiveID() );
}
for ( auto& p : cells )
{
check( p.getPrimitiveID() );
}
// we only use this algorithm if there are more volume elements than processes
if ( n_prim[VOL] < n_processes || n_processes < 2 )
{
return loadbalancing( vtxs, edges, faces, cells, n_processes );
}
// get primitive type of id
auto primitiveType = [&]( uint_t id ) -> PT {
PT pt = VTX;
while ( pt < ALL && id >= id0[pt + 1] )
{
pt = PT( pt + 1 );
}
return pt;
};
// unassign everything
for ( auto& p : vtxs )
{
p.setTargetRank( n_processes );
}
for ( auto& p : edges )
{
p.setTargetRank( n_processes );
}
for ( auto& p : faces )
{
p.setTargetRank( n_processes );
}
for ( auto& p : cells )
{
p.setTargetRank( n_processes );
}
// max number of primitives on one rank for each primitive type
std::array< uint_t, ALL > n_max;
// distributed id range for each primitive type
std::array< uint_t, ALL > begin, end;
for ( auto pt : VEFC )
{
auto n_min = n_prim[pt] / n_processes;
auto mod = n_prim[pt] % n_processes;
begin[pt] = id0[pt] + n_min * rank + ( ( rank < mod ) ? rank : mod );
end[pt] = begin[pt] + n_min + ( ( rank < mod ) ? 1 : 0 );
// we only prescribe a maximum for volume elements
if ( pt == VOL )
{
n_max[pt] = n_min + ( ( 0 < mod ) ? 1 : 0 );
}
else
{
n_max[pt] = n_prim[pt];
}
}
// compute neighboring volume primitives of all primitives
std::vector< std::vector< uint_t > > nbrVolumes( n_prim[ALL] );
for ( uint_t idx = 0; idx < nbrHood.size(); ++idx )
{
uint_t i = id0[VOL] + idx;
for ( PT pt : VEFC )
{
for ( uint_t j : nbrHood[idx][pt] )
{
nbrVolumes[j].push_back( i );
}
}
}
// which primitives are currently assigned to a cluster
std::vector< bool > isAssigned( n_prim[ALL] + 1, false );
// how many primitives of each type are assigned to each process
std::vector< std::array< uint_t, ALL + 1 > > n_assigned( n_processes + 1, std::array< uint_t, ALL + 1 >{} );
// volume elements assigned to each cluster
std::vector< std::vector< uint_t > > volume_elements( n_processes );
// assign primitive i to cluster k
auto assign = [&]( uint_t i, uint_t k ) -> bool {
if ( isAssigned[i] )
{
return false;
}
PT pt = primitiveType( i );
uint_t idx = i - id0[pt];
if ( pt == VTX )
{
vtxs[idx].setTargetRank( k );
}
else if ( pt == EDGE )
{
edges[idx].setTargetRank( k );
}
else if ( pt == FACE )
{
faces[idx].setTargetRank( k );
}
else if ( pt == CELL )
{
cells[idx].setTargetRank( k );
}
else
{
return false;
}
// mark as assigned
++n_assigned[k][pt];
++n_assigned[k][ALL];
++n_assigned[n_processes][pt];
++n_assigned[n_processes][ALL];
isAssigned[i] = true;
if ( pt == VOL )
{
volume_elements[k].push_back( i );
}
return true;
};
// unassign primitive i from its current cluster
auto unassign = [&]( uint_t i ) -> uint_t {
if ( !isAssigned[i] )
{
return n_processes;
}
PT pt = primitiveType( i );
uint_t idx = i - id0[pt];
uint_t k = n_processes;
if ( pt == VTX )
{
k = vtxs[idx].getTargetRank();
vtxs[idx].setTargetRank( n_processes );
}
else if ( pt == EDGE )
{
k = edges[idx].getTargetRank();
edges[idx].setTargetRank( n_processes );
}
else if ( pt == FACE )
{
k = faces[idx].getTargetRank();
faces[idx].setTargetRank( n_processes );
}
else if ( pt == CELL )
{
k = cells[idx].getTargetRank();
cells[idx].setTargetRank( n_processes );
}
if ( k == n_processes )
{
return n_processes;
}
// mark as unassigned
--n_assigned[k][pt];
--n_assigned[k][ALL];
--n_assigned[n_processes][pt];
--n_assigned[n_processes][ALL];
isAssigned[i] = false;
if ( pt == VOL )
{
volume_elements[k].erase( std::find( volume_elements[k].begin(), volume_elements[k].end(), i ) );
}
return k;
};
// which rank is primitive i currently assigned to
auto assigned_to = [&]( uint_t i ) -> uint_t {
if ( !isAssigned[i] )
{
return n_processes;
}
PT pt = primitiveType( i );
uint_t idx = i - id0[pt];
if ( pt == VTX )
{
return vtxs[idx].getTargetRank();
}
else if ( pt == EDGE )
{
return edges[idx].getTargetRank();
}
else if ( pt == FACE )
{
return faces[idx].getTargetRank();
}
else if ( pt == CELL )
{
return cells[idx].getTargetRank();
}
else
{
return n_processes;
}
};
// compute potential volume of cluster built around element i
auto predict_volume = [&]( uint_t i ) -> uint_t {
std::vector< uint_t > Q, Q_new;
std::vector< bool > visited( n_prim[ALL], false );
uint_t v = 0;
Q_new.push_back( i );
visited[i] = true;
// breadth first search to compute the number of free elements before hitting another cluster
while ( !Q_new.empty() )
{
v += Q_new.size();
std::swap( Q, Q_new );
Q_new.clear();
for ( auto j : Q )
{
for ( auto n : nbrVolumes[j] )
{
if ( !visited[n] )
{
if ( isAssigned[n] )
{
return v;
}
Q_new.push_back( n );
visited[n] = true;
}
}
}
}
return v;
};