Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
hyteg
hyteg
Commits
8bbf638f
Commit
8bbf638f
authored
May 04, 2022
by
wagnandr
Browse files
Merge branch 'master' into wagnandr/stokes-eg
parents
8886f91f
adba0cff
Pipeline
#39555
failed with stages
in 10 minutes and 27 seconds
Changes
21
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
.gitlab-ci.yml
View file @
8bbf638f
...
...
@@ -79,9 +79,9 @@ variables:
intel_
19
_serial
:
intel_
20
_serial
:
extends
:
.build_template
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/intel:
19
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/intel:
20
variables
:
WALBERLA_BUILD_WITH_MPI
:
"
OFF"
WALBERLA_BUILD_WITH_OPENMP
:
"
OFF"
...
...
@@ -93,9 +93,9 @@ intel_19_serial:
-
docker
-
intel
intel_
19
_mpionly
:
intel_
20
_mpionly
:
extends
:
.build_template
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/intel:
19
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/intel:
20
variables
:
WALBERLA_BUILD_WITH_OPENMP
:
"
OFF"
only
:
...
...
@@ -105,9 +105,9 @@ intel_19_mpionly:
-
docker
-
intel
intel_
19
_serial_dbg
:
intel_
20
_serial_dbg
:
extends
:
.build_template
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/intel:
19
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/intel:
20
variables
:
WALBERLA_BUILD_WITH_MPI
:
"
OFF"
WALBERLA_BUILD_WITH_OPENMP
:
"
OFF"
...
...
@@ -117,9 +117,9 @@ intel_19_serial_dbg:
-
docker
-
intel
intel_
19
_mpionly_dbg_eigen_petsc-complex_trilinos
:
intel_
20
_mpionly_dbg_eigen_petsc-complex_trilinos
:
extends
:
.build_template
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/intel:
19
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/intel:
20
variables
:
CMAKE_BUILD_TYPE
:
"
DebugOptimized"
WALBERLA_BUILD_WITH_OPENMP
:
"
OFF"
...
...
@@ -131,9 +131,9 @@ intel_19_mpionly_dbg_eigen_petsc-complex_trilinos:
-
docker
-
intel
intel_
19
_mpionly_dbg_sp
:
intel_
20
_mpionly_dbg_sp
:
extends
:
.build_template
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/intel:
19
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/intel:
20
variables
:
CMAKE_BUILD_TYPE
:
"
DebugOptimized"
WALBERLA_BUILD_WITH_OPENMP
:
"
OFF"
...
...
@@ -144,9 +144,9 @@ intel_19_mpionly_dbg_sp:
-
docker
-
intel
intel_
19
_mpionly_eigen_petsc_trilinos
:
intel_
20
_mpionly_eigen_petsc_trilinos
:
extends
:
.build_template
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/intel:
19
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/intel:
20
variables
:
WALBERLA_BUILD_WITH_OPENMP
:
"
OFF"
HYTEG_BUILD_WITH_PETSC
:
"
ON"
...
...
@@ -157,9 +157,9 @@ intel_19_mpionly_eigen_petsc_trilinos:
-
docker
-
intel
intel_
19
_mpionly_eigen_petsc_trilinos_no_werror
:
intel_
20
_mpionly_eigen_petsc_trilinos_no_werror
:
extends
:
.build_template
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/intel:
19
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/intel:
20
stage
:
no_werror
variables
:
WALBERLA_BUILD_WITH_OPENMP
:
"
OFF"
...
...
@@ -1552,7 +1552,7 @@ benchmark_build_time:
-
cd $CI_PROJECT_DIR/
-
cat BuildTiming.txt
-
python3 $CI_PROJECT_DIR/data/scripts/upload.py
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/gcc:
9
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/gcc:
11
tags
:
-
docker-benchmark
variables
:
...
...
@@ -1560,11 +1560,6 @@ benchmark_build_time:
benchmark_ClangBuildAnalyzer
:
script
:
-
apt-get update --fix-missing
-
apt-get -y install apt-transport-https ca-certificates gnupg software-properties-common wget
-
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add -
-
apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main'
-
apt-get -y install cmake ninja-build
-
cmake --version
-
ccache --version
-
mpirun --version
...
...
@@ -1586,7 +1581,7 @@ benchmark_ClangBuildAnalyzer:
-
ninja hyteg
-
ClangBuildAnalyzer --stop src CBA
-
ClangBuildAnalyzer --analyze CBA
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/clang:
9
.0
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/clang:
13
.0
tags
:
-
docker-benchmark
variables
:
...
...
@@ -1661,14 +1656,14 @@ benchmark_ClangBuildAnalyzer:
needs
:
[
]
stage
:
benchmark
benchmark_intel
19
:
benchmark_intel
20
:
<<
:
*benchmark_definition
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/intel:
19
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/intel:
20
benchmark_gcc
9
:
benchmark_gcc
11
:
<<
:
*benchmark_definition
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/gcc:
9
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/gcc:
11
benchmark_clang
8
:
benchmark_clang
13
:
<<
:
*benchmark_definition
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/clang:8.0
\ No newline at end of file
image
:
i10git.cs.fau.de:5005/walberla/buildenvs/clang:13.0
\ No newline at end of file
apps/adaptiveRefinement.cpp
View file @
8bbf638f
This diff is collapsed.
Click to expand it.
data/param/adaptiveRefinement.prm
View file @
8bbf638f
...
...
@@ -3,25 +3,37 @@ Parameters
// spacial dimension of domain
dim 2;
// domain shape (0=square/cube, 1=annulus/shpericalShell)
shape
1
;
shape
0
;
// initial mesh (n3 only used for cube)
n1
5
;
n2
2
;
n1
1
;
n2
1
;
n3 1;
// diffusion coefficient
alpha 10; // control slope of the "jump", 5 <= alpha <= 35
beta 1; // control height of the "jump", 1 <= beta <= 10
// analytic solution
// for shape=1 these parameters control the "jump" in the diffusion coefficient:
// alpha: control slope of the "jump", 5 <= alpha <= 35
// beta: control height of the "jump", 1 <= beta <= 10
// for shape=0 they control the peak in the analytic solution
// alpha: control the slope of the peak
// beta: unused
alpha 15; //
beta 1; //
// adaptive refinement
n_refinements 4;
proportion_of_elements_refined_per_step 0.2;
// adaptive refinement:
// In each step, all elements where the error is greater than 0.5*err_p
// will be refined, where err_p is the specified percentile over all errors.
// The iteration stops when the resulting mesh exceeds the given maximum of allowed elements.
n_refinements 60; // number of refinement steps
percentile 0.01; // minimum proportion of elements to refine in each step [0,1]
n_el_max 8200; // max number of macro elements
// linear solver (
cg
)
microlevel
2
;
n_iterations 10
000
;
tolerance 1e-
12
;
// linear solver (
GMG
)
microlevel
1
;
n_iterations 10;
tolerance 1e-
6
;
// vtk
vtkOutput 1;
// misc
vtkName new_anal_2D_ada;
loadbalancing 1;
writeDomainPartitioning 1;
}
\ No newline at end of file
src/hyteg/adaptiverefinement/CMakeLists.txt
View file @
8bbf638f
...
...
@@ -8,6 +8,7 @@ target_sources( hyteg
refine_cell.hpp
simplex.hpp
mesh.hpp
simplexFactory.cpp
simplexFactory.cpp
loadbalancing.cpp
)
src/hyteg/adaptiverefinement/loadbalancing.cpp
0 → 100644
View file @
8bbf638f
/*
* Copyright (c) 2022 Benjamin Mann
*
* This file is part of HyTeG
* (see https://i10git.cs.fau.de/hyteg/hyteg).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include
<core/Format.hpp>
#include
<core/logging/all.h>
#include
<core/mpi/Broadcast.h>
#include
<core/mpi/Reduce.h>
#include
<numeric>
#include
<utility>
#include
<vector>
#include
"simplexData.hpp"
namespace
hyteg
{
namespace
adaptiveRefinement
{
/* apply loadbalancing directly on our datastructures */
void
loadbalancing
(
std
::
vector
<
VertexData
>&
vtxs
,
std
::
vector
<
EdgeData
>&
edges
,
std
::
vector
<
FaceData
>&
faces
,
std
::
vector
<
CellData
>&
cells
,
const
uint_t
&
n_processes
)
{
// roundrobin
uint_t
i
=
0
;
for
(
auto
&
vtx
:
vtxs
)
{
vtx
.
setTargetRank
(
i
%
n_processes
);
++
i
;
}
for
(
auto
&
edge
:
edges
)
{
edge
.
setTargetRank
(
i
%
n_processes
);
++
i
;
}
for
(
auto
&
face
:
faces
)
{
face
.
setTargetRank
(
i
%
n_processes
);
++
i
;
}
for
(
auto
&
cell
:
cells
)
{
cell
.
setTargetRank
(
i
%
n_processes
);
++
i
;
}
}
void
loadbalancing
(
std
::
vector
<
VertexData
>&
vtxs
,
std
::
vector
<
EdgeData
>&
edges
,
std
::
vector
<
FaceData
>&
faces
,
std
::
vector
<
CellData
>&
cells
,
const
std
::
vector
<
Neighborhood
>&
nbrHood
,
const
uint_t
&
n_processes
,
const
uint_t
&
rank
)
{
using
PT
=
PrimitiveType
;
constexpr
std
::
array
<
PT
,
ALL
>
VEFC
{
VTX
,
EDGE
,
FACE
,
CELL
};
constexpr
std
::
array
<
PT
,
ALL
>
CFEV
{
CELL
,
FACE
,
EDGE
,
VTX
};
const
PT
VOL
=
(
cells
.
size
()
==
0
)
?
FACE
:
CELL
;
// number of primitives of each type
std
::
array
<
uint_t
,
ALL
+
1
>
n_prim
;
n_prim
[
VTX
]
=
vtxs
.
size
();
n_prim
[
EDGE
]
=
edges
.
size
();
n_prim
[
FACE
]
=
faces
.
size
();
n_prim
[
CELL
]
=
cells
.
size
();
n_prim
[
ALL
]
=
n_prim
[
VTX
]
+
n_prim
[
EDGE
]
+
n_prim
[
FACE
]
+
n_prim
[
CELL
];
// first Primitive ID for each primitive type
std
::
array
<
uint_t
,
ALL
+
1
>
id0
{};
for
(
auto
pt
:
VEFC
)
{
id0
[
pt
+
1
]
=
id0
[
pt
]
+
n_prim
[
pt
];
}
/* We assume that the elements in the input vectors are ordered by
PrimitiveID and that for each vertex v, edge e, face f and cell c it holds
id_v < id_e < id_f < id_c
*/
uint_t
check_id
=
0
;
auto
check
=
[
&
](
PrimitiveID
id
)
{
if
(
id
.
getID
()
!=
check_id
)
{
WALBERLA_ABORT
(
"Wrong numbering of primitives!"
);
}
++
check_id
;
};
for
(
auto
&
p
:
vtxs
)
{
check
(
p
.
getPrimitiveID
()
);
}
for
(
auto
&
p
:
edges
)
{
check
(
p
.
getPrimitiveID
()
);
}
for
(
auto
&
p
:
faces
)
{
check
(
p
.
getPrimitiveID
()
);
}
for
(
auto
&
p
:
cells
)
{
check
(
p
.
getPrimitiveID
()
);
}
// we only use this algorithm if there are more volume elements than processes
if
(
n_prim
[
VOL
]
<
n_processes
||
n_processes
<
2
)
{
return
loadbalancing
(
vtxs
,
edges
,
faces
,
cells
,
n_processes
);
}
// get primitive type of id
auto
primitiveType
=
[
&
](
uint_t
id
)
->
PT
{
PT
pt
=
VTX
;
while
(
pt
<
ALL
&&
id
>=
id0
[
pt
+
1
]
)
{
pt
=
PT
(
pt
+
1
);
}
return
pt
;
};
// unassign everything
for
(
auto
&
p
:
vtxs
)
{
p
.
setTargetRank
(
n_processes
);
}
for
(
auto
&
p
:
edges
)
{
p
.
setTargetRank
(
n_processes
);
}
for
(
auto
&
p
:
faces
)
{
p
.
setTargetRank
(
n_processes
);
}
for
(
auto
&
p
:
cells
)
{
p
.
setTargetRank
(
n_processes
);
}
// max number of primitives on one rank for each primitive type
std
::
array
<
uint_t
,
ALL
>
n_max
;
// distributed id range for each primitive type
std
::
array
<
uint_t
,
ALL
>
begin
,
end
;
for
(
auto
pt
:
VEFC
)
{
auto
n_min
=
n_prim
[
pt
]
/
n_processes
;
auto
mod
=
n_prim
[
pt
]
%
n_processes
;
begin
[
pt
]
=
id0
[
pt
]
+
n_min
*
rank
+
(
(
rank
<
mod
)
?
rank
:
mod
);
end
[
pt
]
=
begin
[
pt
]
+
n_min
+
(
(
rank
<
mod
)
?
1
:
0
);
// we only prescribe a maximum for volume elements
if
(
pt
==
VOL
)
{
n_max
[
pt
]
=
n_min
+
(
(
0
<
mod
)
?
1
:
0
);
}
else
{
n_max
[
pt
]
=
n_prim
[
pt
];
}
}
// compute neighboring volume primitives of all primitives
std
::
vector
<
std
::
vector
<
uint_t
>
>
nbrVolumes
(
n_prim
[
ALL
]
);
for
(
uint_t
idx
=
0
;
idx
<
nbrHood
.
size
();
++
idx
)
{
uint_t
i
=
id0
[
VOL
]
+
idx
;
for
(
PT
pt
:
VEFC
)
{
for
(
uint_t
j
:
nbrHood
[
idx
][
pt
]
)
{
nbrVolumes
[
j
].
push_back
(
i
);
}
}
}
// which primitives are currently assigned to a cluster
std
::
vector
<
bool
>
isAssigned
(
n_prim
[
ALL
]
+
1
,
false
);
// how many primitives of each type are assigned to each process
std
::
vector
<
std
::
array
<
uint_t
,
ALL
+
1
>
>
n_assigned
(
n_processes
+
1
,
std
::
array
<
uint_t
,
ALL
+
1
>
{}
);
// volume elements assigned to each cluster
std
::
vector
<
std
::
vector
<
uint_t
>
>
volume_elements
(
n_processes
);
// assign primitive i to cluster k
auto
assign
=
[
&
](
uint_t
i
,
uint_t
k
)
->
bool
{
if
(
isAssigned
[
i
]
)
{
return
false
;
}
PT
pt
=
primitiveType
(
i
);
uint_t
idx
=
i
-
id0
[
pt
];
if
(
pt
==
VTX
)
{
vtxs
[
idx
].
setTargetRank
(
k
);
}
else
if
(
pt
==
EDGE
)
{
edges
[
idx
].
setTargetRank
(
k
);
}
else
if
(
pt
==
FACE
)
{
faces
[
idx
].
setTargetRank
(
k
);
}
else
if
(
pt
==
CELL
)
{
cells
[
idx
].
setTargetRank
(
k
);
}
else
{
return
false
;
}
// mark as assigned
++
n_assigned
[
k
][
pt
];
++
n_assigned
[
k
][
ALL
];
++
n_assigned
[
n_processes
][
pt
];
++
n_assigned
[
n_processes
][
ALL
];
isAssigned
[
i
]
=
true
;
if
(
pt
==
VOL
)
{
volume_elements
[
k
].
push_back
(
i
);
}
return
true
;
};
// unassign primitive i from its current cluster
auto
unassign
=
[
&
](
uint_t
i
)
->
uint_t
{
if
(
!
isAssigned
[
i
]
)
{
return
n_processes
;
}
PT
pt
=
primitiveType
(
i
);
uint_t
idx
=
i
-
id0
[
pt
];
uint_t
k
=
n_processes
;
if
(
pt
==
VTX
)
{
k
=
vtxs
[
idx
].
getTargetRank
();
vtxs
[
idx
].
setTargetRank
(
n_processes
);
}
else
if
(
pt
==
EDGE
)
{
k
=
edges
[
idx
].
getTargetRank
();
edges
[
idx
].
setTargetRank
(
n_processes
);
}
else
if
(
pt
==
FACE
)
{
k
=
faces
[
idx
].
getTargetRank
();
faces
[
idx
].
setTargetRank
(
n_processes
);
}
else
if
(
pt
==
CELL
)
{
k
=
cells
[
idx
].
getTargetRank
();
cells
[
idx
].
setTargetRank
(
n_processes
);
}
if
(
k
==
n_processes
)
{
return
n_processes
;
}
// mark as unassigned
--
n_assigned
[
k
][
pt
];
--
n_assigned
[
k
][
ALL
];
--
n_assigned
[
n_processes
][
pt
];
--
n_assigned
[
n_processes
][
ALL
];
isAssigned
[
i
]
=
false
;
if
(
pt
==
VOL
)
{
volume_elements
[
k
].
erase
(
std
::
find
(
volume_elements
[
k
].
begin
(),
volume_elements
[
k
].
end
(),
i
)
);
}
return
k
;
};
// which rank is primitive i currently assigned to
auto
assigned_to
=
[
&
](
uint_t
i
)
->
uint_t
{
if
(
!
isAssigned
[
i
]
)
{
return
n_processes
;
}
PT
pt
=
primitiveType
(
i
);
uint_t
idx
=
i
-
id0
[
pt
];
if
(
pt
==
VTX
)
{
return
vtxs
[
idx
].
getTargetRank
();
}
else
if
(
pt
==
EDGE
)
{
return
edges
[
idx
].
getTargetRank
();
}
else
if
(
pt
==
FACE
)
{
return
faces
[
idx
].
getTargetRank
();
}
else
if
(
pt
==
CELL
)
{
return
cells
[
idx
].
getTargetRank
();
}
else
{
return
n_processes
;
}
};
// compute potential volume of cluster built around element i
auto
predict_volume
=
[
&
](
uint_t
i
)
->
uint_t
{
std
::
vector
<
uint_t
>
Q
,
Q_new
;
std
::
vector
<
bool
>
visited
(
n_prim
[
ALL
],
false
);
uint_t
v
=
0
;
Q_new
.
push_back
(
i
);
visited
[
i
]
=
true
;
// breadth first search to compute the number of free elements before hitting another cluster
while
(
!
Q_new
.
empty
()
)
{
v
+=
Q_new
.
size
();
std
::
swap
(
Q
,
Q_new
);
Q_new
.
clear
();
for
(
auto
j
:
Q
)
{
for
(
auto
n
:
nbrVolumes
[
j
]
)
{
if
(
!
visited
[
n
]
)
{
if
(
isAssigned
[
n
]
)
{
return
v
;
}
Q_new
.
push_back
(
n
);
visited
[
n
]
=
true
;
}
}
}
}
return
v
;
};
// IDs of initial elements
std
::
vector
<
uint_t
>
initID
(
n_processes
);
// select initial elements at random
std
::
iota
(
initID
.
begin
(),
initID
.
end
(),
id0
[
VOL
]
);
std
::
fill
(
isAssigned
.
begin
()
+
int64_t
(
id0
[
VOL
]
),
isAssigned
.
begin
()
+
int64_t
(
id0
[
VOL
]
+
n_processes
),
true
);
// loop over all clusters k and choose initial element maximizing potential cluster size
for
(
uint_t
k
=
0
;
k
<
n_processes
;
++
k
)
{
uint_t
max_i
=
0
;
// max_i predict_volume(i)
uint_t
i_max
=
n_prim
[
ALL
];
// arg max_i predict_volume(i)
isAssigned
[
initID
[
k
]]
=
false
;
// loop over all volume elements
for
(
uint_t
i
=
begin
[
VOL
];
i
<
end
[
VOL
];
++
i
)
{
// skip elements that are occupied by another cluster
if
(
isAssigned
[
i
]
)
continue
;
auto
v_i
=
predict_volume
(
i
);
// find maximum over i
if
(
v_i
>
max_i
)
{
max_i
=
v_i
;
i_max
=
i
;
}
}
// global maximum
auto
global_max
=
walberla
::
mpi
::
allReduce
(
max_i
,
walberla
::
mpi
::
MAX
);
if
(
global_max
>
max_i
)
{
i_max
=
n_prim
[
ALL
];
}
i_max
=
walberla
::
mpi
::
allReduce
(
i_max
,
walberla
::
mpi
::
MIN
);
WALBERLA_ASSERT
(
i_max
<
n_prim
[
ALL
]
);
// apply changes
initID
[
k
]
=
i_max
;
isAssigned
[
i_max
]
=
true
;
}
// reset flag
std
::
fill
(
isAssigned
.
begin
(),
isAssigned
.
end
(),
false
);
// add initial elements
for
(
uint_t
k
=
0
;
k
<
n_processes
;
++
k
)
{
assign
(
initID
[
k
],
k
);
}
// force of attraction between a cluster and its elements
std
::
vector
<
uint_t
>
attraction
(
n_processes
+
1
,
n_processes
);